Skip to content
18 changes: 18 additions & 0 deletions redisvl/query/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,24 @@ def _set_value(
self._value = val
self._operator = operator

def is_missing(self) -> "FilterExpression":
"""Create a filter expression for documents missing this field.

Returns:
FilterExpression: A filter expression that matches documents where the field is missing.

.. code-block:: python

from redisvl.query.filter import Tag, Text, Num, Geo, Timestamp

f = Tag("brand").is_missing()
f = Text("title").is_missing()
f = Num("price").is_missing()
f = Geo("location").is_missing()
f = Timestamp("created_at").is_missing()
"""
return FilterExpression(f"ismissing(@{self._field})")


def check_operator_misuse(func: Callable) -> Callable:
@wraps(func)
Expand Down
32 changes: 20 additions & 12 deletions redisvl/redis/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from redisvl.version import __version__


def compare_versions(version1, version2):
def compare_versions(version1: str, version2: str):
"""
Compare two Redis version strings numerically.

Expand Down Expand Up @@ -105,19 +105,27 @@ def parse_attrs(attrs):
# TODO 'WITHSUFFIXTRIE' is another boolean attr, but is not returned by ft.info
original = attrs.copy()
parsed_attrs = {}
if "NOSTEM" in attrs:
parsed_attrs["no_stem"] = True
attrs.remove("NOSTEM")
if "CASESENSITIVE" in attrs:
parsed_attrs["case_sensitive"] = True
attrs.remove("CASESENSITIVE")
if "SORTABLE" in attrs:
parsed_attrs["sortable"] = True
attrs.remove("SORTABLE")
if "UNF" in attrs:
attrs.remove("UNF") # UNF present on sortable numeric fields only

# Handle all boolean attributes first, regardless of position
boolean_attrs = {
"NOSTEM": "no_stem",
"CASESENSITIVE": "case_sensitive",
"SORTABLE": "sortable",
"INDEXMISSING": "index_missing",
"INDEXEMPTY": "index_empty",
}

for redis_attr, python_attr in boolean_attrs.items():
if redis_attr in attrs:
parsed_attrs[python_attr] = True
attrs.remove(redis_attr)

# Handle UNF which is associated with SORTABLE
if "UNF" in attrs:
attrs.remove("UNF") # UNF present on sortable numeric fields only

try:
# Parse remaining attributes as key-value pairs starting from index 6
parsed_attrs.update(
{attrs[i].lower(): attrs[i + 1] for i in range(6, len(attrs), 2)}
)
Expand Down
107 changes: 82 additions & 25 deletions redisvl/schema/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class BaseFieldAttributes(BaseModel):

sortable: bool = Field(default=False)
"""Enable faster result sorting on the field at runtime"""
index_missing: bool = Field(default=False)
"""Allow indexing and searching for missing values (documents without the field)"""


class TextFieldAttributes(BaseFieldAttributes):
Expand All @@ -74,6 +76,8 @@ class TextFieldAttributes(BaseFieldAttributes):
"""Keep a suffix trie with all terms which match the suffix to optimize certain queries"""
phonetic_matcher: Optional[str] = None
"""Used to perform phonetic matching during search"""
index_empty: bool = Field(default=False)
"""Allow indexing and searching for empty strings"""


class TagFieldAttributes(BaseFieldAttributes):
Expand All @@ -85,6 +89,8 @@ class TagFieldAttributes(BaseFieldAttributes):
"""Treat text as case sensitive or not. By default, tag characters are converted to lowercase"""
withsuffixtrie: bool = Field(default=False)
"""Keep a suffix trie with all terms which match the suffix to optimize certain queries"""
index_empty: bool = Field(default=False)
"""Allow indexing and searching for empty strings"""


class NumericFieldAttributes(BaseFieldAttributes):
Expand Down Expand Up @@ -112,6 +118,8 @@ class BaseVectorFieldAttributes(BaseModel):
"""The distance metric used to measure query relevance"""
initial_cap: Optional[int] = None
"""Initial vector capacity in the index affecting memory allocation size of the index"""
index_missing: bool = Field(default=False)
"""Allow indexing and searching for missing values (documents without the field)"""

@field_validator("algorithm", "datatype", "distance_metric", mode="before")
@classmethod
Expand All @@ -129,6 +137,8 @@ def field_data(self) -> Dict[str, Any]:
}
if self.initial_cap is not None: # Only include it if it's set
field_data["INITIAL_CAP"] = self.initial_cap
if self.index_missing: # Only include it if it's set
field_data["INDEXMISSING"] = True
return field_data


Expand Down Expand Up @@ -190,14 +200,30 @@ class TextField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisTextField(
name,
as_name=as_name,
weight=self.attrs.weight, # type: ignore
no_stem=self.attrs.no_stem, # type: ignore
phonetic_matcher=self.attrs.phonetic_matcher, # type: ignore
sortable=self.attrs.sortable,
)
# Build arguments for RedisTextField
kwargs: Dict[str, Any] = {
"weight": self.attrs.weight, # type: ignore
"no_stem": self.attrs.no_stem, # type: ignore
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Only add phonetic_matcher if it's not None
if self.attrs.phonetic_matcher is not None: # type: ignore
kwargs["phonetic_matcher"] = self.attrs.phonetic_matcher # type: ignore

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

# Add INDEXEMPTY if enabled
if self.attrs.index_empty: # type: ignore
kwargs["index_empty"] = True

return RedisTextField(name, **kwargs)


class TagField(BaseField):
Expand All @@ -208,13 +234,26 @@ class TagField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisTagField(
name,
as_name=as_name,
separator=self.attrs.separator, # type: ignore
case_sensitive=self.attrs.case_sensitive, # type: ignore
sortable=self.attrs.sortable,
)
# Build arguments for RedisTagField
kwargs: Dict[str, Any] = {
"separator": self.attrs.separator, # type: ignore
"case_sensitive": self.attrs.case_sensitive, # type: ignore
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

# Add INDEXEMPTY if enabled
if self.attrs.index_empty: # type: ignore
kwargs["index_empty"] = True

return RedisTagField(name, **kwargs)


class NumericField(BaseField):
Expand All @@ -225,11 +264,20 @@ class NumericField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisNumericField(
name,
as_name=as_name,
sortable=self.attrs.sortable,
)
# Build arguments for RedisNumericField
kwargs: Dict[str, Any] = {
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

return RedisNumericField(name, **kwargs)


class GeoField(BaseField):
Expand All @@ -240,11 +288,20 @@ class GeoField(BaseField):

def as_redis_field(self) -> RedisField:
name, as_name = self._handle_names()
return RedisGeoField(
name,
as_name=as_name,
sortable=self.attrs.sortable,
)
# Build arguments for RedisGeoField
kwargs: Dict[str, Any] = {
"sortable": self.attrs.sortable,
}

# Only add as_name if it's not None
if as_name is not None:
kwargs["as_name"] = as_name

# Add INDEXMISSING if enabled
if self.attrs.index_missing: # type: ignore
kwargs["index_missing"] = True

return RedisGeoField(name, **kwargs)


class FlatVectorField(BaseField):
Expand Down
13 changes: 8 additions & 5 deletions redisvl/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,11 +432,14 @@ def to_dict(self) -> Dict[str, Any]:
Returns:
Dict[str, Any]: The index schema as a dictionary.
"""
dict_schema = model_to_dict(self)
# cast fields back to a pure list
dict_schema["fields"] = [
field for field_name, field in dict_schema["fields"].items()
]
# Manually serialize to ensure all field attributes are preserved
dict_schema = {
"index": model_to_dict(self.index),
"fields": [
model_to_dict(field) for field_name, field in self.fields.items()
],
"version": self.version,
}
return dict_schema

def to_yaml(self, file_path: str, overwrite: bool = True) -> None:
Expand Down
47 changes: 37 additions & 10 deletions redisvl/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,19 @@ def model_to_dict(model: BaseModel) -> Dict[str, Any]:
def serialize_item(item):
if isinstance(item, Enum):
return item.value.lower()
elif isinstance(item, BaseModel):
# Recursively serialize nested BaseModel instances with exclude_defaults=False
nested_data = item.model_dump(exclude_none=True, exclude_defaults=False)
return {key: serialize_item(value) for key, value in nested_data.items()}
elif isinstance(item, dict):
return {key: serialize_item(value) for key, value in item.items()}
elif isinstance(item, list):
return [serialize_item(element) for element in item]
else:
return item

serialized_data = model.model_dump(exclude_none=True)
# Use exclude_defaults=False to preserve all field attributes including new ones
serialized_data = model.model_dump(exclude_none=True, exclude_defaults=False)
for key, value in serialized_data.items():
serialized_data[key] = serialize_item(value)
return serialized_data
Expand Down Expand Up @@ -170,29 +175,51 @@ def wrapper(*args, **kwargs):

def sync_wrapper(fn: Callable[[], Coroutine[Any, Any, Any]]) -> Callable[[], None]:
def wrapper():
# Check if the interpreter is shutting down
if sys is None or getattr(sys, "_getframe", None) is None:
# Interpreter is shutting down, skip cleanup
return

try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
except Exception:
# Any other exception during loop detection means we should skip cleanup
return

try:
if loop is None or not loop.is_running():
# Check if asyncio module is still available
if asyncio is None:
return

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
task = loop.create_task(fn())
loop.run_until_complete(task)
except RuntimeError:
except (RuntimeError, AttributeError, TypeError) as e:
# This could happen if an object stored an event loop and now
# that event loop is closed. There's nothing we can do other than
# advise the user to use explicit cleanup methods.
# that event loop is closed, or if asyncio modules are being
# torn down during interpreter shutdown.
#
# Uses logging module instead of get_logger() to avoid I/O errors
# if the wrapped function is called as a finalizer.
logging.info(
f"Could not run the async function {fn.__name__} because the event loop is closed. "
"This usually means the object was not properly cleaned up. Please use explicit "
"cleanup methods (e.g., disconnect(), close()) or use the object as an async "
"context manager.",
)
if logging is not None:
try:
logging.info(
f"Could not run the async function {fn.__name__} because the event loop is closed "
"or the interpreter is shutting down. "
"This usually means the object was not properly cleaned up. Please use explicit "
"cleanup methods (e.g., disconnect(), close()) or use the object as an async "
"context manager.",
)
except Exception:
# Even logging failed, interpreter is really shutting down
pass
return
except Exception:
# Any other unexpected exception should be silently ignored during shutdown
return

return wrapper
Expand Down
Loading