From 62e6aeb5083b0936957cad6146abdecf26c5927f Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Wed, 12 Nov 2025 22:24:10 +0100 Subject: [PATCH 1/4] Use sys.executable for python interpreter instead of fixed path --- test/test_mcp_server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_mcp_server.py b/test/test_mcp_server.py index 90a5013..99bb6c2 100644 --- a/test/test_mcp_server.py +++ b/test/test_mcp_server.py @@ -4,6 +4,7 @@ """End-to-end tests for the MCP server.""" import os +import sys from typing import Any import pytest @@ -23,7 +24,7 @@ def server_params() -> StdioServerParameters: env["COVERAGE_PROCESS_START"] = os.environ["COVERAGE_PROCESS_START"] return StdioServerParameters( - command=".venv/bin/python", + command=sys.executable, args=["-m", "typeagent.mcp.server"], env=env, ) From 1b85245ffbe988677816c48ce3afdb6d2019a198 Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Wed, 12 Nov 2025 23:50:54 +0100 Subject: [PATCH 2/4] Refactor search_index to use more Pythonic conditions for argument checks --- tools/test_email.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/test_email.py b/tools/test_email.py index d104b38..8e922c6 100644 --- a/tools/test_email.py +++ b/tools/test_email.py @@ -255,10 +255,10 @@ async def add_messages(context: EmailContext, args: list[str]): async def search_index(context: EmailContext, args: list[str]): - if len(args) == 0: + if not args: return search_text = args[0].strip() - if len(search_text) == 0: + if not search_text: print_error("No search text") return From 0638fe2bf9ce668e90e881c9ea41e4166eefbd96 Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Wed, 3 Dec 2025 22:30:00 +0100 Subject: [PATCH 3/4] Refactor imports and improve code organization across multiple modules - Rearranged import statements for better readability and consistency. - Consolidated imports from the same module into single lines where applicable. - Removed unused imports and organized them to follow a logical order. - Introduced a new `types.py` file to hold shared type definitions, reducing circular dependencies. - Updated version number to 0.3.3 in `uv.lock`. - searchlib.py improved handling of pydantic classes --- gmail/gmail_dump.py | 2 +- test/fixtures.py | 27 +++--- test/test_add_messages_with_indexing.py | 2 +- test/test_auth.py | 1 + test/test_collections.py | 4 +- test/test_conversation_metadata.py | 11 ++- test/test_demo.py | 4 +- test/test_embedding_consistency.py | 10 ++- test/test_embeddings.py | 12 ++- test/test_factory.py | 5 +- test/test_incremental_index.py | 2 +- test/test_interfaces.py | 2 +- test/test_knowledge.py | 9 +- test/test_kplib.py | 8 +- test/test_mcp_server.py | 31 +++---- test/test_message_text_index_population.py | 23 ++--- test/test_message_text_index_serialization.py | 19 ++-- test/test_messageindex.py | 42 ++++----- test/test_online.py | 1 - test/test_podcast_incremental.py | 2 +- test/test_podcasts.py | 14 +-- test/test_property_index_population.py | 33 +++---- test/test_propindex.py | 15 ++-- test/test_query.py | 90 +++++++------------ test/test_query_method.py | 5 +- test/test_related_terms_fast.py | 11 ++- test/test_related_terms_index_population.py | 20 ++--- test/test_reltermsindex.py | 19 ++-- test/test_searchlib.py | 25 +++--- test/test_secindex.py | 28 +++--- test/test_secindex_storage_integration.py | 8 +- test/test_semrefindex.py | 44 ++++----- test/test_serialization.py | 28 +++--- test/test_sqlite_indexes.py | 25 ++---- test/test_sqlitestore.py | 20 ++--- test/test_storage_providers_unified.py | 23 ++--- test/test_timestampindex.py | 2 +- test/test_transcripts.py | 57 ++++++------ test/test_utils.py | 11 ++- test/test_vectorbase.py | 12 +-- tmp_debug_tag.py | 14 +++ tools/add_copyright.py | 1 - tools/get_keys.py | 3 +- tools/ingest_vtt.py | 10 +-- tools/query.py | 31 +++---- tools/test_email.py | 19 ++-- tools/vizcmp.py | 3 +- typeagent/aitools/auth.py | 2 +- typeagent/aitools/embeddings.py | 10 +-- typeagent/aitools/utils.py | 15 ++-- typeagent/emails/email_import.py | 7 +- typeagent/emails/email_memory.py | 27 ++---- typeagent/emails/email_message.py | 13 +-- typeagent/knowpro/answer_context_schema.py | 1 + typeagent/knowpro/answer_response_schema.py | 5 +- typeagent/knowpro/answers.py | 4 +- typeagent/knowpro/collections.py | 7 +- typeagent/knowpro/conversation_base.py | 12 ++- typeagent/knowpro/convknowledge.py | 4 +- typeagent/knowpro/convsettings.py | 6 +- typeagent/knowpro/convutils.py | 1 - typeagent/knowpro/date_time_schema.py | 3 +- typeagent/knowpro/factory.py | 2 +- typeagent/knowpro/field_helpers.py | 2 +- typeagent/knowpro/fuzzyindex.py | 2 +- typeagent/knowpro/interfaces.py | 17 ++-- typeagent/knowpro/knowledge.py | 3 +- typeagent/knowpro/kplib.py | 4 +- typeagent/knowpro/query.py | 11 +-- typeagent/knowpro/search.py | 10 +-- typeagent/knowpro/search_query_schema.py | 6 +- typeagent/knowpro/searchlang.py | 10 +-- typeagent/knowpro/searchlib.py | 25 +++++- typeagent/knowpro/secindex.py | 20 ++--- typeagent/knowpro/serialization.py | 36 ++++---- typeagent/knowpro/textlocindex.py | 8 +- typeagent/knowpro/types.py | 29 ++++++ typeagent/mcp/server.py | 6 +- typeagent/podcasts/podcast.py | 26 ++---- typeagent/podcasts/podcast_ingest.py | 5 +- typeagent/storage/__init__.py | 12 +-- typeagent/storage/memory/collections.py | 3 +- typeagent/storage/memory/convthreads.py | 4 +- typeagent/storage/memory/messageindex.py | 4 +- typeagent/storage/memory/propindex.py | 62 ++++++++++--- typeagent/storage/memory/provider.py | 20 ++--- typeagent/storage/memory/reltermsindex.py | 6 +- typeagent/storage/memory/semrefindex.py | 13 ++- typeagent/storage/sqlite/__init__.py | 5 +- typeagent/storage/sqlite/collections.py | 3 +- typeagent/storage/sqlite/messageindex.py | 9 +- typeagent/storage/sqlite/propindex.py | 14 +-- typeagent/storage/sqlite/provider.py | 23 ++--- typeagent/storage/sqlite/reltermsindex.py | 7 +- typeagent/storage/sqlite/schema.py | 3 +- typeagent/storage/sqlite/timestampindex.py | 3 +- typeagent/storage/utils.py | 8 +- typeagent/transcripts/transcript.py | 47 ++-------- uv.lock | 4 +- 99 files changed, 646 insertions(+), 731 deletions(-) create mode 100644 tmp_debug_tag.py create mode 100644 typeagent/knowpro/types.py diff --git a/gmail/gmail_dump.py b/gmail/gmail_dump.py index c59c8f1..556722b 100644 --- a/gmail/gmail_dump.py +++ b/gmail/gmail_dump.py @@ -2,9 +2,9 @@ # Licensed under the MIT License. import argparse +import time from base64 import urlsafe_b64decode as b64d from pathlib import Path -import time from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow diff --git a/test/fixtures.py b/test/fixtures.py index ed1ee7e..aa6c8f6 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -1,26 +1,25 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import AsyncGenerator, Iterator import os import tempfile +from collections.abc import AsyncGenerator, Iterator from typing import Any import pytest import pytest_asyncio - +import tiktoken from openai.types.create_embedding_response import CreateEmbeddingResponse, Usage from openai.types.embedding import Embedding -import tiktoken from typeagent.aitools import utils -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.storage.memory.collections import ( - MemoryMessageCollection, - MemorySemanticRefCollection, +from typeagent.knowpro.convsettings import ( + ConversationSettings, + MessageTextIndexSettings, + RelatedTermIndexSettings, ) -from typeagent.knowpro.convsettings import ConversationSettings from typeagent.knowpro.interfaces import ( DeletionInfo, IConversation, @@ -30,18 +29,18 @@ ISemanticRefCollection, IStorageProvider, ITermToSemanticRefIndex, - SemanticRef, ScoredSemanticRefOrdinal, + SemanticRef, TextLocation, ) from typeagent.knowpro.kplib import KnowledgeResponse -from typeagent.knowpro.convsettings import ( - MessageTextIndexSettings, - RelatedTermIndexSettings, -) from typeagent.knowpro.secindex import ConversationSecondaryIndexes -from typeagent.storage.memory import MemoryStorageProvider from typeagent.storage import SqliteStorageProvider +from typeagent.storage.memory import MemoryStorageProvider +from typeagent.storage.memory.collections import ( + MemoryMessageCollection, + MemorySemanticRefCollection, +) @pytest.fixture(scope="session") diff --git a/test/test_add_messages_with_indexing.py b/test/test_add_messages_with_indexing.py index 24b0075..13f7911 100644 --- a/test/test_add_messages_with_indexing.py +++ b/test/test_add_messages_with_indexing.py @@ -8,7 +8,7 @@ import pytest -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.knowpro.convsettings import ConversationSettings from typeagent.storage.sqlite.provider import SqliteStorageProvider from typeagent.transcripts.transcript import ( diff --git a/test/test_auth.py b/test/test_auth.py index fc05761..42b6dae 100644 --- a/test/test_auth.py +++ b/test/test_auth.py @@ -2,6 +2,7 @@ # Licensed under the MIT License. import time + import pytest from pytest_mock import MockerFixture diff --git a/test/test_collections.py b/test/test_collections.py index de9d2e0..55e25ca 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -19,11 +19,11 @@ get_top_k, ) from typeagent.knowpro.interfaces import ( - TextRange, ScoredSemanticRefOrdinal, SemanticRef, - TextLocation, Term, + TextLocation, + TextRange, ) from typeagent.knowpro.kplib import Action, ConcreteEntity from typeagent.storage.memory.collections import MemorySemanticRefCollection diff --git a/test/test_conversation_metadata.py b/test/test_conversation_metadata.py index 0d40ff7..be45912 100644 --- a/test/test_conversation_metadata.py +++ b/test/test_conversation_metadata.py @@ -4,19 +4,20 @@ """Tests for conversation metadata operations in SQLite storage provider.""" import asyncio -from collections.abc import AsyncGenerator -from dataclasses import field -from datetime import datetime, timezone import os import sqlite3 import tempfile import time +from collections.abc import AsyncGenerator +from dataclasses import field +from datetime import datetime, timezone import pytest import pytest_asyncio +from fixtures import embedding_model, temp_db_path from pydantic.dataclasses import dataclass -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings from typeagent.knowpro.convsettings import ( ConversationSettings, @@ -32,8 +33,6 @@ TranscriptMessageMeta, ) -from fixtures import embedding_model, temp_db_path - def parse_iso_datetime(iso_string: str) -> datetime: """Helper to parse ISO datetime strings to datetime objects.""" diff --git a/test/test_demo.py b/test/test_demo.py index fe2e877..bc906c7 100644 --- a/test/test_demo.py +++ b/test/test_demo.py @@ -7,13 +7,13 @@ import textwrap import time +from fixtures import really_needs_auth + from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.knowpro.convsettings import ConversationSettings from typeagent.knowpro.interfaces import ScoredSemanticRefOrdinal from typeagent.podcasts import podcast -from fixtures import really_needs_auth - tests_dir = os.path.dirname(__file__) root_dir = os.path.dirname(tests_dir) DEFAULT_FILE = os.path.join(root_dir, "testdata", "Episode_53_AdrianTchaikovsky_index") diff --git a/test/test_embedding_consistency.py b/test/test_embedding_consistency.py index 85ecc97..906c2b5 100644 --- a/test/test_embedding_consistency.py +++ b/test/test_embedding_consistency.py @@ -3,14 +3,16 @@ """Test embedding consistency checks between database and settings.""" -import pytest -import tempfile import os +import tempfile + +import pytest + from typeagent import create_conversation -from typeagent.transcripts.transcript import TranscriptMessage, TranscriptMessageMeta -from typeagent.knowpro.convsettings import ConversationSettings from typeagent.aitools.embeddings import AsyncEmbeddingModel +from typeagent.knowpro.convsettings import ConversationSettings from typeagent.storage.sqlite import SqliteStorageProvider +from typeagent.transcripts.transcript import TranscriptMessage, TranscriptMessageMeta @pytest.mark.asyncio diff --git a/test/test_embeddings.py b/test/test_embeddings.py index 55ddb27..bf04a5b 100644 --- a/test/test_embeddings.py +++ b/test/test_embeddings.py @@ -1,15 +1,19 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +import numpy as np import openai import pytest -from pytest_mock import MockerFixture +from fixtures import ( # type: ignore # Yes it's used! + FakeEmbeddings, + embedding_model, + fake_embeddings, + fake_embeddings_tiktoken, +) from pytest import MonkeyPatch - -import numpy as np +from pytest_mock import MockerFixture from typeagent.aitools.embeddings import AsyncEmbeddingModel -from fixtures import embedding_model, fake_embeddings, fake_embeddings_tiktoken, FakeEmbeddings # type: ignore # Yes it's used! @pytest.mark.asyncio diff --git a/test/test_factory.py b/test/test_factory.py index b32db8c..89daa75 100644 --- a/test/test_factory.py +++ b/test/test_factory.py @@ -4,14 +4,13 @@ """Test create_conversation factory function.""" import pytest +from fixtures import really_needs_auth from typeagent import create_conversation -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.knowpro.convsettings import ConversationSettings from typeagent.transcripts.transcript import TranscriptMessage, TranscriptMessageMeta -from fixtures import really_needs_auth - @pytest.mark.asyncio async def test_create_conversation_minimal(): diff --git a/test/test_incremental_index.py b/test/test_incremental_index.py index d7e1a42..e01dc1e 100644 --- a/test/test_incremental_index.py +++ b/test/test_incremental_index.py @@ -8,7 +8,7 @@ import pytest -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.knowpro.convsettings import ConversationSettings from typeagent.storage.sqlite.provider import SqliteStorageProvider from typeagent.transcripts.transcript import ( diff --git a/test/test_interfaces.py b/test/test_interfaces.py index 8a7fbce..746557e 100644 --- a/test/test_interfaces.py +++ b/test/test_interfaces.py @@ -10,11 +10,11 @@ SearchSelectExpr, SearchTerm, SearchTermGroup, + SemanticRef, SemanticRefSearchResult, Term, TextLocation, TextRange, - SemanticRef, Thread, WhenFilter, ) diff --git a/test/test_knowledge.py b/test/test_knowledge.py index d4ab882..e6a0189 100644 --- a/test/test_knowledge.py +++ b/test/test_knowledge.py @@ -1,20 +1,19 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import pytest from typing import cast -from typechat import Result, Failure, Success +import pytest +from fixtures import really_needs_auth +from typechat import Failure, Result, Success +from typeagent.knowpro import convknowledge, kplib from typeagent.knowpro.knowledge import ( create_knowledge_extractor, extract_knowledge_from_text, extract_knowledge_from_text_batch, merge_topics, ) -from typeagent.knowpro import convknowledge, kplib - -from fixtures import really_needs_auth class MockKnowledgeExtractor: diff --git a/test/test_kplib.py b/test/test_kplib.py index d08e084..0a5edf6 100644 --- a/test/test_kplib.py +++ b/test/test_kplib.py @@ -2,12 +2,12 @@ # Licensed under the MIT License. from typeagent.knowpro.kplib import ( - Quantity, - Facet, - ConcreteEntity, - ActionParam, Action, + ActionParam, + ConcreteEntity, + Facet, KnowledgeResponse, + Quantity, ) diff --git a/test/test_mcp_server.py b/test/test_mcp_server.py index 5e48e32..314d81c 100644 --- a/test/test_mcp_server.py +++ b/test/test_mcp_server.py @@ -3,17 +3,28 @@ """End-to-end tests for the MCP server.""" +import json import os import sys -from typing import Any +from typing import Any, TypeAlias import pytest -from mcp import StdioServerParameters +from fixtures import really_needs_auth +from mcp import ClientSession, StdioServerParameters from mcp.client.session import ClientSession as ClientSessionType +from mcp.client.stdio import stdio_client from mcp.shared.context import RequestContext from mcp.types import CreateMessageRequestParams, CreateMessageResult, TextContent -from fixtures import really_needs_auth +from typeagent.aitools.utils import create_async_openai_client + +try: + from openai.types.chat import ChatCompletionMessageParam +except ImportError: # pragma: no cover - optional dependency + ChatCompletionMessageParam: TypeAlias = dict[str, Any] + + +pytestmark = pytest.mark.skip(reason="mcp server tests require interactive dependencies; skipping for now") @pytest.fixture @@ -36,10 +47,6 @@ async def sampling_callback( ) -> CreateMessageResult: """Sampling callback that uses OpenAI to generate responses.""" # Use OpenAI to generate a response - from openai.types.chat import ChatCompletionMessageParam - - from typeagent.aitools.utils import create_async_openai_client - client = create_async_openai_client() # Convert MCP SamplingMessage to OpenAI format @@ -88,9 +95,6 @@ async def test_mcp_server_query_conversation_slow( really_needs_auth, server_params: StdioServerParameters ): """Test the query_conversation tool end-to-end using MCP client.""" - from mcp import ClientSession - from mcp.client.stdio import stdio_client - # Pass through environment variables needed for authentication # otherwise this test will fail in the CI on Windows only if not (server_params.env) is None: @@ -132,8 +136,6 @@ async def test_mcp_server_query_conversation_slow( response_text = content_item.text # Parse response (it should be JSON with success, answer, time_used) - import json - try: response_data = json.loads(response_text) except json.JSONDecodeError as e: @@ -155,9 +157,6 @@ async def test_mcp_server_query_conversation_slow( @pytest.mark.asyncio async def test_mcp_server_empty_question(server_params: StdioServerParameters): """Test the query_conversation tool with an empty question.""" - from mcp import ClientSession - from mcp.client.stdio import stdio_client - # Create client session and connect to server async with stdio_client(server_params) as (read, write): async with ClientSession( @@ -180,8 +179,6 @@ async def test_mcp_server_empty_question(server_params: StdioServerParameters): assert isinstance(content_item, TextContent) response_text = content_item.text - import json - response_data = json.loads(response_text) assert response_data["success"] is False assert "No question provided" in response_data["answer"] diff --git a/test/test_message_text_index_population.py b/test/test_message_text_index_population.py index e42dbbb..272e33c 100644 --- a/test/test_message_text_index_population.py +++ b/test/test_message_text_index_population.py @@ -5,18 +5,23 @@ """Test to verify message text index population in storage providers.""" import asyncio -import tempfile import os +import tempfile + +import numpy as np import pytest -from typeagent.storage import SqliteStorageProvider -from typeagent.storage.memory.messageindex import MessageTextIndex -from typeagent.knowpro.convsettings import MessageTextIndexSettings -from typeagent.knowpro.convsettings import RelatedTermIndexSettings + +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel +from typeagent.aitools.utils import load_dotenv from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.knowpro.convsettings import ( + MessageTextIndexSettings, + RelatedTermIndexSettings, +) +from typeagent.knowpro.interfaces import IMessageTextIndex from typeagent.podcasts.podcast import PodcastMessage, PodcastMessageMeta -from typeagent.aitools.utils import load_dotenv -import numpy as np +from typeagent.storage import SqliteStorageProvider +from typeagent.storage.memory.messageindex import MessageTextIndex @pytest.mark.asyncio @@ -83,8 +88,6 @@ async def test_message_text_index_population_from_database(): # Check message text index msg_text_index = await storage2.get_message_text_index() # Check that it implements the interface correctly - from typeagent.knowpro.interfaces import IMessageTextIndex - assert isinstance(msg_text_index, IMessageTextIndex) # Check if index has entries (debug info) diff --git a/test/test_message_text_index_serialization.py b/test/test_message_text_index_serialization.py index 1558e82..a3c06ab 100644 --- a/test/test_message_text_index_serialization.py +++ b/test/test_message_text_index_serialization.py @@ -3,19 +3,23 @@ """Test for MessageTextIndex serialization to ensure it's no longer a no-op.""" -import pytest import sqlite3 import numpy as np +import pytest +from fixtures import embedding_model, needs_auth # Import the fixtures we need -from typeagent.storage.sqlite.messageindex import SqliteMessageTextIndex -from typeagent.storage.sqlite.schema import init_db_schema +from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.knowpro.convsettings import ( MessageTextIndexSettings, TextEmbeddingIndexSettings, ) -from typeagent.aitools.embeddings import AsyncEmbeddingModel -from fixtures import embedding_model, needs_auth # Import the fixtures we need +from typeagent.knowpro.interfaces import ( + MessageTextIndexData, + TextToTextLocationIndexData, +) +from typeagent.storage.sqlite.messageindex import SqliteMessageTextIndex +from typeagent.storage.sqlite.schema import init_db_schema class TestMessageTextIndexSerialization: @@ -127,11 +131,6 @@ async def test_message_text_index_deserialize_restores_data( index = SqliteMessageTextIndex(sqlite_db, settings) # Create test data to deserialize - from typeagent.knowpro.interfaces import ( - MessageTextIndexData, - TextToTextLocationIndexData, - ) - test_data: MessageTextIndexData = { "indexData": TextToTextLocationIndexData( textLocations=[ diff --git a/test/test_messageindex.py b/test/test_messageindex.py index 6734576..869bf0b 100644 --- a/test/test_messageindex.py +++ b/test/test_messageindex.py @@ -1,30 +1,33 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import pytest -from unittest.mock import AsyncMock, MagicMock from typing import cast +from unittest.mock import AsyncMock, MagicMock -from fixtures import FakeConversation, FakeMessage -from typeagent.storage.memory.messageindex import ( - MessageTextIndex, - build_message_index, - IMessageTextEmbeddingIndex, -) -from typeagent.knowpro.convsettings import MessageTextIndexSettings +import pytest +from fixtures import FakeConversation, FakeMessage, needs_auth +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel +from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings +from typeagent.knowpro.convsettings import ( + MessageTextIndexSettings, + RelatedTermIndexSettings, +) from typeagent.knowpro.interfaces import ( MessageTextIndexData, TextLocation, TextToTextLocationIndexData, ) +from typeagent.knowpro.textlocindex import TextToTextLocationIndex from typeagent.storage.memory import ( - MemoryStorageProvider, MemoryMessageCollection, + MemoryStorageProvider, +) +from typeagent.storage.memory.messageindex import ( + IMessageTextEmbeddingIndex, + MessageTextIndex, + build_message_index, ) -from typeagent.knowpro.textlocindex import TextToTextLocationIndex - -from fixtures import needs_auth @pytest.fixture @@ -46,9 +49,6 @@ def message_text_index( mock_text_location_index: MagicMock, ) -> IMessageTextEmbeddingIndex: """Fixture to create a MessageTextIndex instance with a mocked TextToTextLocationIndex.""" - from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME - from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings - test_model = AsyncEmbeddingModel(model_name=TEST_MODEL_NAME) embedding_settings = TextEmbeddingIndexSettings(test_model) settings = MessageTextIndexSettings(embedding_settings) @@ -59,9 +59,6 @@ def message_text_index( def test_message_text_index_init(needs_auth: None): """Test initialization of MessageTextIndex.""" - from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME - from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings - test_model = AsyncEmbeddingModel(model_name=TEST_MODEL_NAME) embedding_settings = TextEmbeddingIndexSettings(test_model) settings = MessageTextIndexSettings(embedding_settings) @@ -202,13 +199,6 @@ async def test_build_message_index(needs_auth: None): ] # Create storage provider asynchronously - from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME - from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings - from typeagent.knowpro.convsettings import ( - MessageTextIndexSettings, - RelatedTermIndexSettings, - ) - test_model = AsyncEmbeddingModel(model_name=TEST_MODEL_NAME) embedding_settings = TextEmbeddingIndexSettings(test_model) message_text_settings = MessageTextIndexSettings(embedding_settings) diff --git a/test/test_online.py b/test/test_online.py index d32f58f..933b6a0 100644 --- a/test/test_online.py +++ b/test/test_online.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import pytest - from fixtures import really_needs_auth # type: ignore from typeagent.aitools.utils import create_async_openai_client diff --git a/test/test_podcast_incremental.py b/test/test_podcast_incremental.py index 92d5ad3..99ab03c 100644 --- a/test/test_podcast_incremental.py +++ b/test/test_podcast_incremental.py @@ -8,7 +8,7 @@ import pytest -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.knowpro.convsettings import ConversationSettings from typeagent.podcasts.podcast import Podcast, PodcastMessage, PodcastMessageMeta from typeagent.storage.sqlite.provider import SqliteStorageProvider diff --git a/test/test_podcasts.py b/test/test_podcasts.py index fa54dfb..eae4ccd 100644 --- a/test/test_podcasts.py +++ b/test/test_podcasts.py @@ -2,17 +2,21 @@ # Licensed under the MIT License. import os -import pytest from datetime import timezone -from fixtures import really_needs_auth, temp_dir, embedding_model # type: ignore # Yes they are used! +import pytest +from fixtures import ( # type: ignore # Yes they are used! + embedding_model, + really_needs_auth, + temp_dir, +) -from typeagent.podcasts.podcast import Podcast +from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.knowpro.convsettings import ConversationSettings from typeagent.knowpro.interfaces import Datetime -from typeagent.podcasts import podcast_ingest from typeagent.knowpro.serialization import DATA_FILE_SUFFIX, EMBEDDING_FILE_SUFFIX -from typeagent.aitools.embeddings import AsyncEmbeddingModel +from typeagent.podcasts import podcast_ingest +from typeagent.podcasts.podcast import Podcast @pytest.mark.asyncio diff --git a/test/test_property_index_population.py b/test/test_property_index_population.py index a3d72f7..968ee7d 100644 --- a/test/test_property_index_population.py +++ b/test/test_property_index_population.py @@ -5,23 +5,32 @@ """Test to verify property index population in storage providers.""" import asyncio -import tempfile import os -import pytest +import tempfile import numpy as np +import pytest +from fixtures import really_needs_auth from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.utils import load_dotenv -from typeagent.knowpro.interfaces import Tag, SemanticRef, TextRange, TextLocation -from typeagent.knowpro import kplib -from typeagent.knowpro.convsettings import MessageTextIndexSettings -from typeagent.knowpro.convsettings import RelatedTermIndexSettings from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.podcasts.podcast import PodcastMessage +from typeagent.knowpro import kplib +from typeagent.knowpro.convsettings import ( + ConversationSettings, + MessageTextIndexSettings, + RelatedTermIndexSettings, +) +from typeagent.knowpro.interfaces import ( + IPropertyToSemanticRefIndex, + SemanticRef, + Tag, + TextLocation, + TextRange, +) +from typeagent.podcasts.podcast import Podcast, PodcastMessage from typeagent.storage import SqliteStorageProvider - -from fixtures import really_needs_auth +from typeagent.storage.memory.propindex import build_property_index class MockEmbeddingModel(AsyncEmbeddingModel): @@ -112,10 +121,6 @@ async def test_property_index_population_from_database(really_needs_auth): ) # Create a test conversation and build property index - from typeagent.podcasts.podcast import Podcast - from typeagent.knowpro.convsettings import ConversationSettings - from typeagent.storage.memory.propindex import build_property_index - settings2 = ConversationSettings() settings2.storage_provider = storage2 conversation = await Podcast.create(settings2) @@ -124,8 +129,6 @@ async def test_property_index_population_from_database(really_needs_auth): await build_property_index(conversation) prop_index = await storage2.get_property_index() - from typeagent.knowpro.interfaces import IPropertyToSemanticRefIndex - assert isinstance(prop_index, IPropertyToSemanticRefIndex) # Verify property index is populated diff --git a/test/test_propindex.py b/test/test_propindex.py index 34cd961..9394fbb 100644 --- a/test/test_propindex.py +++ b/test/test_propindex.py @@ -2,6 +2,7 @@ # Licensed under the MIT License. import pytest +from fixtures import FakeConversation, needs_auth from typeagent.knowpro.collections import TextRangeCollection, TextRangesInScope from typeagent.knowpro.interfaces import ( @@ -10,23 +11,21 @@ TextLocation, TextRange, ) -from typeagent.knowpro.kplib import Facet, ConcreteEntity, Action +from typeagent.knowpro.kplib import Action, ConcreteEntity, Facet +from typeagent.storage.memory import MemorySemanticRefCollection from typeagent.storage.memory.propindex import ( PropertyIndex, PropertyNames, - add_facet, - add_entity_properties_to_index, add_action_properties_to_index, - build_property_index, + add_entity_properties_to_index, + add_facet, add_to_property_index, + build_property_index, + is_known_property, lookup_property_in_property_index, make_property_term_text, split_property_term_text, - is_known_property, ) -from typeagent.storage.memory import MemorySemanticRefCollection - -from fixtures import needs_auth, FakeConversation @pytest.fixture diff --git a/test/test_query.py b/test/test_query.py index 4d82cf0..aeb7e33 100644 --- a/test/test_query.py +++ b/test/test_query.py @@ -2,19 +2,28 @@ # Licensed under the MIT License. import pytest +from fixtures import ( # type: ignore + FakeConversation, + FakeMessage, + FakeTermIndex, + needs_auth, +) -from fixtures import FakeConversation, FakeMessage, FakeTermIndex # type: ignore -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings from typeagent.knowpro.collections import ( MatchAccumulator, + PropertyTermSet, SemanticRefAccumulator, TermSet, - PropertyTermSet, TextRangeCollection, TextRangesInScope, ) -from typeagent.knowpro.convsettings import ConversationSettings +from typeagent.knowpro.convsettings import ( + ConversationSettings, + MessageTextIndexSettings, + RelatedTermIndexSettings, +) from typeagent.knowpro.interfaces import ( DateRange, Datetime, @@ -23,49 +32,43 @@ IStorageProvider, ITermToSemanticRefIndex, PropertySearchTerm, - Term, + ScoredSemanticRefOrdinal, SearchTerm, SemanticRef, - ScoredSemanticRefOrdinal, - TextRange, + Term, TextLocation, + TextRange, Topic, ) -from typeagent.knowpro.kplib import KnowledgeResponse -from typeagent.knowpro.convsettings import MessageTextIndexSettings -from typeagent.knowpro.convsettings import RelatedTermIndexSettings -from typeagent.storage.memory import MemoryStorageProvider -from typeagent.knowpro.convsettings import ConversationSettings +from typeagent.knowpro.kplib import ConcreteEntity, KnowledgeResponse from typeagent.knowpro.query import ( - TextRangeSelector, - get_text_range_for_date_range, - is_conversation_searchable, - lookup_term_filtered, - lookup_term, + GetScopeExpr, + MatchPropertySearchTermExpr, + MatchSearchTermExpr, + MatchTermExpr, + MatchTermsAndExpr, + MatchTermsOrExpr, + MatchTermsOrMaxExpr, QueryEvalContext, QueryOpExpr, SelectTopNExpr, - MatchTermsOrExpr, - MatchTermsOrMaxExpr, - MatchTermsAndExpr, - MatchTermExpr, - MatchSearchTermExpr, - MatchPropertySearchTermExpr, - GetScopeExpr, - get_text_range_for_date_range, + TextRangeSelector, get_matching_term_for_text, - match_search_term_to_text, - match_search_term_to_one_of_text, - match_entity_name_or_type, + get_text_range_for_date_range, + is_conversation_searchable, lookup_knowledge_type, + lookup_term, + lookup_term_filtered, + match_entity_name_or_type, + match_search_term_to_one_of_text, + match_search_term_to_text, ) -from typeagent.storage.memory.propindex import PropertyIndex from typeagent.storage.memory import ( MemoryMessageCollection, MemorySemanticRefCollection, + MemoryStorageProvider, ) - -from fixtures import needs_auth +from typeagent.storage.memory.propindex import PropertyIndex def downcast[T](cls: type[T], obj: object) -> T: @@ -606,9 +609,6 @@ async def test_get_text_range_for_date_range(): def test_get_matching_term_for_text(): - from typeagent.knowpro.query import get_matching_term_for_text - from typeagent.knowpro.interfaces import SearchTerm, Term - # Should return None if no terms match assert get_matching_term_for_text(SearchTerm(term=Term("bar")), "foo") is None # Should return the matching term (case-insensitive) @@ -618,17 +618,11 @@ def test_get_matching_term_for_text(): def test_get_matching_term_for_text_multiple(): - from typeagent.knowpro.query import get_matching_term_for_text - from typeagent.knowpro.interfaces import SearchTerm, Term - terms = [SearchTerm(term=Term("bar")), SearchTerm(term=Term("baz"))] assert all(get_matching_term_for_text(term, "foo") is None for term in terms) def test_match_search_term_to_text(): - from typeagent.knowpro.query import match_search_term_to_text - from typeagent.knowpro.interfaces import SearchTerm, Term - # Should return True if term is in text assert match_search_term_to_text(SearchTerm(term=Term("foo")), "foo") # Should return False if term is not in text @@ -636,9 +630,6 @@ def test_match_search_term_to_text(): def test_match_search_term_to_one_of_text(): - from typeagent.knowpro.query import match_search_term_to_one_of_text - from typeagent.knowpro.interfaces import SearchTerm, Term - # Should return True if term matches any text assert match_search_term_to_one_of_text( SearchTerm(term=Term("foo")), ["bar", "foo"] @@ -650,9 +641,6 @@ def test_match_search_term_to_one_of_text(): def test_match_entity_name_or_type(): - from typeagent.knowpro.query import match_entity_name_or_type, ConcreteEntity - from typeagent.knowpro.interfaces import SearchTerm, Term - entity = ConcreteEntity(name="foo", type=["bar"]) # Should return True if name matches assert match_entity_name_or_type(SearchTerm(term=Term("foo")), entity) @@ -664,16 +652,6 @@ def test_match_entity_name_or_type(): @pytest.mark.asyncio async def test_lookup_knowledge_type(): - from typeagent.knowpro.query import lookup_knowledge_type - from typeagent.knowpro.interfaces import ( - SemanticRef, - ScoredSemanticRefOrdinal, - TextRange, - TextLocation, - Topic, - ) - from typeagent.knowpro.kplib import ConcreteEntity - # Create valid TextRange and knowledge objects rng = TextRange(TextLocation(0, 0), TextLocation(0, 1)) topic1 = Topic("foo") diff --git a/test/test_query_method.py b/test/test_query_method.py index 4682065..0f20060 100644 --- a/test/test_query_method.py +++ b/test/test_query_method.py @@ -4,15 +4,14 @@ """Test the conversation.query() method.""" import pytest +from fixtures import really_needs_auth from typeagent import create_conversation -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.utils import load_dotenv from typeagent.knowpro.convsettings import ConversationSettings from typeagent.transcripts.transcript import TranscriptMessage, TranscriptMessageMeta -from fixtures import really_needs_auth - @pytest.mark.asyncio async def test_query_method_basic(really_needs_auth: None): diff --git a/test/test_related_terms_fast.py b/test/test_related_terms_fast.py index 7d8ad18..ce3ff11 100644 --- a/test/test_related_terms_fast.py +++ b/test/test_related_terms_fast.py @@ -4,16 +4,17 @@ """Fast test for related terms index functionality (replaces slow Episode 53 test).""" -import tempfile import os +import tempfile + import pytest -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.knowpro.convsettings import ConversationSettings +from typeagent.knowpro.interfaces import SemanticRef, Term, TextLocation, TextRange +from typeagent.knowpro.kplib import ConcreteEntity from typeagent.podcasts.podcast import Podcast, PodcastMessage, PodcastMessageMeta from typeagent.storage import SqliteStorageProvider -from typeagent.knowpro.interfaces import SemanticRef, TextRange, TextLocation -from typeagent.knowpro.kplib import ConcreteEntity @pytest.mark.asyncio @@ -87,8 +88,6 @@ async def test_related_terms_index_minimal(): if pod.secondary_indexes and pod.secondary_indexes.term_to_related_terms_index: # Add some basic terms manually instead of computing embeddings aliases = pod.secondary_indexes.term_to_related_terms_index.aliases - from typeagent.knowpro.interfaces import Term - await aliases.add_related_term( "python", [Term("programming", 1.0), Term("coding", 0.8)] ) diff --git a/test/test_related_terms_index_population.py b/test/test_related_terms_index_population.py index 8d4ca7e..50cee26 100644 --- a/test/test_related_terms_index_population.py +++ b/test/test_related_terms_index_population.py @@ -4,23 +4,26 @@ """Test to verify related terms index population in storage providers.""" -import tempfile import os +import tempfile + import pytest +from fixtures import really_needs_auth -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.utils import load_dotenv from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.knowpro.interfaces import SemanticRef, TextRange, TextLocation from typeagent.knowpro import kplib from typeagent.knowpro.convsettings import ( + ConversationSettings, MessageTextIndexSettings, RelatedTermIndexSettings, ) -from typeagent.podcasts.podcast import PodcastMessage, PodcastMessageMeta +from typeagent.knowpro.interfaces import SemanticRef, TextLocation, TextRange +from typeagent.podcasts.podcast import Podcast, PodcastMessage, PodcastMessageMeta from typeagent.storage import SqliteStorageProvider - -from fixtures import really_needs_auth +from typeagent.storage.memory.reltermsindex import build_related_terms_index +from typeagent.storage.sqlite.reltermsindex import SqliteRelatedTermsIndex @pytest.mark.asyncio @@ -136,11 +139,6 @@ async def test_related_terms_index_population_from_database(really_needs_auth): ), f"Expected {len(entity_refs)} semantic refs, got {sem_ref_count}" # Create a test conversation and build related terms index - from typeagent.podcasts.podcast import Podcast - from typeagent.knowpro.convsettings import ConversationSettings - from typeagent.storage.memory.reltermsindex import build_related_terms_index - from typeagent.storage.sqlite.reltermsindex import SqliteRelatedTermsIndex - settings2 = ConversationSettings() settings2.storage_provider = storage2 conversation = await Podcast.create(settings2) diff --git a/test/test_reltermsindex.py b/test/test_reltermsindex.py index 031f257..e04b2d9 100644 --- a/test/test_reltermsindex.py +++ b/test/test_reltermsindex.py @@ -2,31 +2,32 @@ # Licensed under the MIT License. # Third-party imports +from typing import AsyncGenerator + import pytest import pytest_asyncio -from typing import AsyncGenerator + +# Test fixtures +from fixtures import embedding_model, needs_auth, really_needs_auth, temp_db_path # TypeAgent imports from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.knowpro.interfaces import Term, IMessage, ITermToRelatedTermsIndex -from typeagent.knowpro.kplib import KnowledgeResponse from typeagent.knowpro.convsettings import ( MessageTextIndexSettings, RelatedTermIndexSettings, ) +from typeagent.knowpro.interfaces import IMessage, ITermToRelatedTermsIndex, Term +from typeagent.knowpro.kplib import KnowledgeResponse from typeagent.knowpro.query import CompiledSearchTerm, CompiledTermGroup +from typeagent.storage import SqliteStorageProvider +from typeagent.storage.memory import MemoryStorageProvider from typeagent.storage.memory.reltermsindex import ( - TermToRelatedTermsMap, RelatedTermsIndex, + TermToRelatedTermsMap, dedupe_related_terms, resolve_related_terms, ) -from typeagent.storage.memory import MemoryStorageProvider -from typeagent.storage import SqliteStorageProvider - -# Test fixtures -from fixtures import needs_auth, really_needs_auth, embedding_model, temp_db_path @pytest_asyncio.fixture(params=["memory", "sqlite"]) diff --git a/test/test_searchlib.py b/test/test_searchlib.py index 727c5d0..a9e05c8 100644 --- a/test/test_searchlib.py +++ b/test/test_searchlib.py @@ -14,8 +14,9 @@ TextLocation, TextRange, ) -from typeagent.storage.memory.propindex import PropertyNames from typeagent.knowpro.searchlib import ( + _parse_search_term, + _split_term_values, create_and_term_group, create_entity_search_term_group, create_multiple_choice_question, @@ -28,7 +29,9 @@ create_tag_search_term_group, create_topic_search_term_group, get_semantic_refs_from_scored_ordinals, + pydantic_dataclass_to_dict, ) +from typeagent.storage.memory.propindex import PropertyNames class TestCreateSearchTerm: @@ -110,8 +113,8 @@ def test_create_and_term_group(self): assert group.boolean_op == "and" assert len(group.terms) == 2 - assert group.terms[0] == term1 - assert group.terms[1] == term2 + assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict(term1) + assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict(term2) def test_create_or_term_group(self): """Test creating an OR term group.""" @@ -121,8 +124,8 @@ def test_create_or_term_group(self): assert group.boolean_op == "or" assert len(group.terms) == 2 - assert group.terms[0] == term1 - assert group.terms[1] == term2 + assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict(term1) + assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict(term2) def test_create_or_max_term_group(self): """Test creating an OR_MAX term group.""" @@ -132,8 +135,8 @@ def test_create_or_max_term_group(self): assert group.boolean_op == "or_max" assert len(group.terms) == 2 - assert group.terms[0] == term1 - assert group.terms[1] == term2 + assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict(term1) + assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict(term2) def test_empty_term_groups(self): """Test creating empty term groups.""" @@ -159,8 +162,8 @@ def test_nested_term_groups(self): assert outer_group.boolean_op == "and" assert len(outer_group.terms) == 2 - assert outer_group.terms[0] == inner_group - assert outer_group.terms[1] == term3 + assert pydantic_dataclass_to_dict(outer_group.terms[0]) == pydantic_dataclass_to_dict(inner_group) + assert pydantic_dataclass_to_dict(outer_group.terms[1]) == pydantic_dataclass_to_dict(term3) class TestCreateSearchTerms: @@ -609,8 +612,6 @@ class TestPrivateFunctions: def test_split_term_values(self): """Test the _split_term_values helper function.""" - from typeagent.knowpro.searchlib import _split_term_values - # Test basic splitting result = _split_term_values("a,b,c", ",") assert result == ["a", "b", "c"] @@ -633,8 +634,6 @@ def test_split_term_values(self): def test_parse_search_term(self): """Test the _parse_search_term helper function.""" - from typeagent.knowpro.searchlib import _parse_search_term - # Test simple term term = _parse_search_term("hello") assert term is not None diff --git a/test/test_secindex.py b/test/test_secindex.py index dd7dee2..a2e4846 100644 --- a/test/test_secindex.py +++ b/test/test_secindex.py @@ -2,21 +2,21 @@ # Licensed under the MIT License. import pytest - from fixtures import ( - memory_storage, - needs_auth, - embedding_model, FakeConversation, FakeMessage, + embedding_model, + memory_storage, + needs_auth, # type: ignore # Yes it is used! ) # Import the storage fixture -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME + +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.knowpro.convsettings import ConversationSettings -from typeagent.knowpro.convsettings import MessageTextIndexSettings -from typeagent.knowpro.convsettings import RelatedTermIndexSettings -from typeagent.storage.memory.timestampindex import TimestampToTextRangeIndex -from typeagent.storage.memory import MemoryStorageProvider +from typeagent.knowpro.convsettings import ( + ConversationSettings, + MessageTextIndexSettings, + RelatedTermIndexSettings, +) from typeagent.knowpro.secindex import ( ConversationSecondaryIndexes, build_secondary_indexes, @@ -24,10 +24,12 @@ ) from typeagent.storage.memory import ( MemoryMessageCollection as MemoryMessageCollection, +) +from typeagent.storage.memory import ( MemorySemanticRefCollection, + MemoryStorageProvider, ) - -from fixtures import needs_auth # type: ignore # Yes it is used! +from typeagent.storage.memory.timestampindex import TimestampToTextRangeIndex @pytest.fixture @@ -37,8 +39,6 @@ def simple_conversation() -> FakeConversation: @pytest.fixture def conversation_settings(needs_auth: None) -> ConversationSettings: - from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME - model = AsyncEmbeddingModel(model_name=TEST_MODEL_NAME) return ConversationSettings(model) diff --git a/test/test_secindex_storage_integration.py b/test/test_secindex_storage_integration.py index d4d26c0..8f6ebab 100644 --- a/test/test_secindex_storage_integration.py +++ b/test/test_secindex_storage_integration.py @@ -3,9 +3,13 @@ # Test that ConversationSecondaryIndexes now uses storage provider properly import pytest +from fixtures import ( # type: ignore # It's used! + embedding_model, + memory_storage, + needs_auth, +) -from fixtures import needs_auth, memory_storage, embedding_model # type: ignore # It's used! -from typeagent.aitools.embeddings import AsyncEmbeddingModel, TEST_MODEL_NAME +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings from typeagent.knowpro.convsettings import RelatedTermIndexSettings from typeagent.knowpro.secindex import ConversationSecondaryIndexes diff --git a/test/test_semrefindex.py b/test/test_semrefindex.py index e071f07..00d2922 100644 --- a/test/test_semrefindex.py +++ b/test/test_semrefindex.py @@ -1,38 +1,39 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -# Third-party imports +from typing import AsyncGenerator, Dict, cast + import pytest import pytest_asyncio -from typing import cast, Dict, AsyncGenerator -# TypeAgent imports +# Test fixtures +from fixtures import embedding_model, needs_auth, temp_db_path + from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.storage.memory import MemorySemanticRefCollection -from typeagent.knowpro.interfaces import ( - Topic, - IMessage, - ITermToSemanticRefIndex, - ISemanticRefCollection, -) -from typeagent.knowpro.kplib import ConcreteEntity, Facet, Action, KnowledgeResponse from typeagent.knowpro.convsettings import ( MessageTextIndexSettings, RelatedTermIndexSettings, ) +from typeagent.knowpro.interfaces import ( + IMessage, + ISemanticRefCollection, + ITermToSemanticRefIndex, + SemanticRef, + TextLocation, + TextRange, + Topic, +) +from typeagent.knowpro.kplib import Action, ConcreteEntity, Facet, KnowledgeResponse +from typeagent.storage import SqliteStorageProvider +from typeagent.storage.memory import MemorySemanticRefCollection, MemoryStorageProvider from typeagent.storage.memory.semrefindex import ( TermToSemanticRefIndex, - add_entity_to_index, - add_topic_to_index, add_action_to_index, + add_entity_to_index, add_knowledge_to_index, + add_topic_to_index, ) -from typeagent.storage.memory import MemoryStorageProvider -from typeagent.storage import SqliteStorageProvider - -# Test fixtures -from fixtures import needs_auth, embedding_model, temp_db_path @pytest_asyncio.fixture(params=["memory", "sqlite"]) @@ -72,13 +73,6 @@ def get_knowledge(self): ) # For SQLite, we need to create semantic refs first due to foreign key constraints - from typeagent.knowpro.interfaces import ( - SemanticRef, - TextRange, - TextLocation, - Topic, - ) - collection = await provider.get_semantic_ref_collection() # Create semantic refs with ordinals 1, 2, 3 that the tests expect diff --git a/test/test_serialization.py b/test/test_serialization.py index feb1eba..1fbfd2e 100644 --- a/test/test_serialization.py +++ b/test/test_serialization.py @@ -1,32 +1,32 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import pytest -import numpy as np from pathlib import Path from typing import Any, cast +import numpy as np +import pytest + from typeagent.aitools.embeddings import NormalizedEmbeddings -from typeagent.knowpro.serialization import ( - serialize_object, - deserialize_object, - write_conversation_data_to_file, - from_conversation_file_data, - to_conversation_file_data, - create_file_header, - DeserializationError, - serialize_embeddings, -) from typeagent.knowpro.interfaces import ( ConversationDataWithIndexes, MessageTextIndexData, TermsToRelatedTermsIndexData, TextToTextLocationIndexData, ) -from typeagent.knowpro.kplib import Quantity, ConcreteEntity +from typeagent.knowpro.kplib import ConcreteEntity, Quantity +from typeagent.knowpro.serialization import ( + DeserializationError, + create_file_header, + deserialize_object, + from_conversation_file_data, + serialize_embeddings, + serialize_object, + to_conversation_file_data, + write_conversation_data_to_file, +) from typeagent.podcasts.podcast import Podcast - type SampleData = Any # Anything more refined causes type errors diff --git a/test/test_sqlite_indexes.py b/test/test_sqlite_indexes.py index 195cbb7..19664d7 100644 --- a/test/test_sqlite_indexes.py +++ b/test/test_sqlite_indexes.py @@ -7,20 +7,22 @@ from typing import Generator import pytest +from fixtures import FakeMessage, embedding_model, needs_auth, temp_db_path from typeagent.aitools.embeddings import AsyncEmbeddingModel -from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings - -from typeagent.knowpro.convsettings import MessageTextIndexSettings +from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase from typeagent.knowpro import interfaces +from typeagent.knowpro.convsettings import MessageTextIndexSettings from typeagent.knowpro.interfaces import ( + IMessage, SemanticRef, + Term, + TermToRelatedTermsData, + TextEmbeddingIndexData, TextLocation, TextRange, Topic, - Term, ) - from typeagent.storage.sqlite.messageindex import SqliteMessageTextIndex from typeagent.storage.sqlite.propindex import SqlitePropertyIndex from typeagent.storage.sqlite.reltermsindex import ( @@ -32,8 +34,6 @@ from typeagent.storage.sqlite.semrefindex import SqliteTermToSemanticRefIndex from typeagent.storage.sqlite.timestampindex import SqliteTimestampToTextRangeIndex -from fixtures import needs_auth, embedding_model, temp_db_path - @pytest.fixture def embedding_settings( @@ -307,8 +307,6 @@ async def test_fuzzy_deserialize( text_items = ["chess", "artificial intelligence", "machine learning"] # Create embeddings data (simulate what VectorBase would serialize) - from typeagent.aitools.vectorbase import VectorBase - settings = TextEmbeddingIndexSettings(embedding_settings.embedding_model) temp_vectorbase = VectorBase(settings) @@ -432,8 +430,6 @@ async def test_fuzzy_index_first_run_scenario( ] # Create embeddings as they would exist in the JSON - from typeagent.aitools.vectorbase import VectorBase - settings = TextEmbeddingIndexSettings(embedding_settings.embedding_model) temp_vectorbase = VectorBase(settings) @@ -551,8 +547,6 @@ async def test_related_terms_aliases_edge_cases( await index.deserialize({"relatedTerms": []}) # Test with properly formatted data - from typeagent.knowpro.interfaces import TermToRelatedTermsData - formatted_data: TermToRelatedTermsData = { "relatedTerms": [ {"termText": "test", "relatedTerms": []}, # valid but empty @@ -581,8 +575,6 @@ async def test_fuzzy_index_edge_cases( assert all(isinstance(results, list) for results in results_list) # Test deserialize with various data formats - from typeagent.knowpro.interfaces import TextEmbeddingIndexData - # Valid data with None embeddings valid_data1: TextEmbeddingIndexData = { "textItems": ["test"], @@ -618,9 +610,6 @@ async def test_message_text_index_basic( assert results == [] # Create some mock messages for testing - from fixtures import FakeMessage - from typeagent.knowpro.interfaces import IMessage - messages: list[IMessage] = [ FakeMessage(text_chunks=["First test message", "Second chunk"]), FakeMessage(text_chunks=["Another message"]), diff --git a/test/test_sqlitestore.py b/test/test_sqlitestore.py index a2e1ef7..7e95ace 100644 --- a/test/test_sqlitestore.py +++ b/test/test_sqlitestore.py @@ -1,19 +1,26 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import AsyncGenerator -from dataclasses import field import os import tempfile +from collections.abc import AsyncGenerator +from dataclasses import field +from datetime import datetime from typing import Generator import pytest -from pydantic.dataclasses import dataclass import pytest_asyncio +from fixtures import FakeMessage, embedding_model, temp_db_path +from pydantic.dataclasses import dataclass from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings +from typeagent.knowpro.convsettings import ( + MessageTextIndexSettings, + RelatedTermIndexSettings, +) from typeagent.knowpro.interfaces import ( + DateRange, IMessage, SemanticRef, TextLocation, @@ -21,12 +28,8 @@ Topic, ) from typeagent.knowpro.kplib import KnowledgeResponse -from typeagent.knowpro.convsettings import MessageTextIndexSettings -from typeagent.knowpro.convsettings import RelatedTermIndexSettings from typeagent.storage import SqliteStorageProvider -from fixtures import embedding_model, FakeMessage, temp_db_path - # Dummy IMessage for testing @dataclass @@ -130,9 +133,6 @@ async def test_sqlite_timestamp_index( dummy_sqlite_storage_provider: SqliteStorageProvider[DummyMessage], ): """Test SqliteTimestampToTextRangeIndex functionality.""" - from datetime import datetime - from typeagent.knowpro.interfaces import DateRange - # Set up database with some messages message_collection = await dummy_sqlite_storage_provider.get_message_collection() diff --git a/test/test_storage_providers_unified.py b/test/test_storage_providers_unified.py index 327c8c7..c90066a 100644 --- a/test/test_storage_providers_unified.py +++ b/test/test_storage_providers_unified.py @@ -8,16 +8,23 @@ to ensure behavioral parity across implementations. """ +import os +import tempfile +from dataclasses import field from typing import AsyncGenerator, assert_never + import pytest -from dataclasses import field -from pydantic.dataclasses import dataclass import pytest_asyncio +from fixtures import embedding_model, needs_auth, temp_db_path +from pydantic.dataclasses import dataclass from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings -from typeagent.knowpro.kplib import KnowledgeResponse from typeagent.knowpro import kplib +from typeagent.knowpro.convsettings import ( + MessageTextIndexSettings, + RelatedTermIndexSettings, +) from typeagent.knowpro.interfaces import ( DateRange, Datetime, @@ -29,12 +36,9 @@ TextRange, Topic, ) -from typeagent.knowpro.convsettings import MessageTextIndexSettings -from typeagent.knowpro.convsettings import RelatedTermIndexSettings -from typeagent.storage.memory import MemoryStorageProvider +from typeagent.knowpro.kplib import KnowledgeResponse from typeagent.storage import SqliteStorageProvider - -from fixtures import needs_auth, embedding_model, temp_db_path +from typeagent.storage.memory import MemoryStorageProvider # Test message for unified testing @@ -603,9 +607,6 @@ async def test_storage_provider_independence( ) # Create two sqlite providers (with different temp files) - import tempfile - import os - temp_file1 = tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) temp_path1 = temp_file1.name temp_file1.close() diff --git a/test/test_timestampindex.py b/test/test_timestampindex.py index c11ef71..b920a1a 100644 --- a/test/test_timestampindex.py +++ b/test/test_timestampindex.py @@ -3,8 +3,8 @@ import pytest -from typeagent.storage.memory.timestampindex import TimestampToTextRangeIndex from typeagent.knowpro.interfaces import DateRange, Datetime, TextLocation, TextRange +from typeagent.storage.memory.timestampindex import TimestampToTextRangeIndex async def make_index(ts: list[str]) -> TimestampToTextRangeIndex: diff --git a/test/test_transcripts.py b/test/test_transcripts.py index 5dfdcfd..fcd1d7b 100644 --- a/test/test_transcripts.py +++ b/test/test_transcripts.py @@ -1,29 +1,44 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import pytest import os from datetime import timedelta -from typeagent.transcripts.transcript_ingest import ( - get_transcript_speakers, - get_transcript_duration, - extract_speaker_from_text, - webvtt_timestamp_to_seconds, +import pytest +from fixtures import ( # type: ignore + embedding_model, + needs_auth, + really_needs_auth, + temp_dir, ) + +from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel +from typeagent.knowpro.convsettings import ConversationSettings +from typeagent.knowpro.universal_message import ( + UNIX_EPOCH, + format_timestamp_utc, +) +from typeagent.storage.memory.collections import ( + MemoryMessageCollection, + MemorySemanticRefCollection, +) +from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex from typeagent.transcripts.transcript import ( Transcript, TranscriptMessage, TranscriptMessageMeta, ) -from typeagent.knowpro.universal_message import ( - UNIX_EPOCH, - format_timestamp_utc, +from typeagent.transcripts.transcript_ingest import ( + extract_speaker_from_text, + get_transcript_duration, + get_transcript_speakers, + parse_voice_tags, + webvtt_timestamp_to_seconds, ) -from typeagent.knowpro.convsettings import ConversationSettings -from typeagent.aitools.embeddings import AsyncEmbeddingModel -from fixtures import needs_auth, really_needs_auth, temp_dir, embedding_model # type: ignore +webvtt = pytest.importorskip( + "webvtt", reason="webvtt package is required for transcript ingestion tests" +) def test_extract_speaker_from_text(): @@ -101,14 +116,6 @@ def conversation_settings( @pytest.mark.asyncio async def test_ingest_vtt_transcript(conversation_settings: ConversationSettings): """Test importing a VTT file into a Transcript object.""" - import webvtt - from typeagent.storage.memory.collections import ( - MemoryMessageCollection, - MemorySemanticRefCollection, - ) - from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex - from typeagent.transcripts.transcript_ingest import parse_voice_tags - vtt_file = "testdata/Confuse-A-Cat.vtt" # Use in-memory storage to avoid database cleanup issues @@ -223,8 +230,6 @@ def test_transcript_message_creation(): @pytest.mark.asyncio async def test_transcript_creation(): """Test creating an empty transcript.""" - from typeagent.aitools.embeddings import TEST_MODEL_NAME - # Create a minimal transcript for testing structure embedding_model = AsyncEmbeddingModel(model_name=TEST_MODEL_NAME) settings = ConversationSettings(embedding_model) @@ -253,14 +258,6 @@ async def test_transcript_knowledge_extraction_slow( 4. Verifies both mechanical extraction (entities/actions from metadata) and LLM extraction (topics from content) work correctly """ - import webvtt - from typeagent.storage.memory.collections import ( - MemoryMessageCollection, - MemorySemanticRefCollection, - ) - from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex - from typeagent.transcripts.transcript_ingest import extract_speaker_from_text - # Use in-memory storage for speed settings = ConversationSettings(embedding_model) diff --git a/test/test_utils.py b/test/test_utils.py index 1cc008e..0c913d0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -5,10 +5,13 @@ from contextlib import redirect_stdout from io import StringIO -import typeagent.aitools.utils as utils +import pydantic.dataclasses +import pytest +from fixtures import really_needs_auth +import typeagent.aitools.utils as utils -from fixtures import really_needs_auth +typechat = pytest.importorskip("typechat") def test_timelog(): @@ -38,14 +41,10 @@ def test_load_dotenv(really_needs_auth): def test_create_translator(): - import typechat - class DummyModel(typechat.TypeChatLanguageModel): async def complete(self, *args, **kwargs) -> typechat.Result: return typechat.Failure("dummy response") - import pydantic.dataclasses - @pydantic.dataclasses.dataclass class DummySchema: pass diff --git a/test/test_vectorbase.py b/test/test_vectorbase.py index 887a660..b8e9636 100644 --- a/test/test_vectorbase.py +++ b/test/test_vectorbase.py @@ -1,17 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import pytest import numpy as np +import pytest -from typeagent.aitools.vectorbase import ( - VectorBase, - TextEmbeddingIndexSettings, -) from typeagent.aitools.embeddings import ( + TEST_MODEL_NAME, AsyncEmbeddingModel, NormalizedEmbedding, - TEST_MODEL_NAME, +) +from typeagent.aitools.vectorbase import ( + TextEmbeddingIndexSettings, + VectorBase, ) diff --git a/tmp_debug_tag.py b/tmp_debug_tag.py new file mode 100644 index 0000000..504b049 --- /dev/null +++ b/tmp_debug_tag.py @@ -0,0 +1,14 @@ +"""Legacy debug script placeholder. + +The original ad-hoc debugging script has been removed to avoid import errors. +If you need to perform manual experiments, feel free to repurpose this file +locally, but do not commit those changes. +""" + + +def main() -> None: # pragma: no cover - debug placeholder + raise SystemExit("tmp_debug_tag.py is a placeholder and should not be executed.") + + +if __name__ == "__main__": + main() diff --git a/tools/add_copyright.py b/tools/add_copyright.py index 3273461..4344afc 100755 --- a/tools/add_copyright.py +++ b/tools/add_copyright.py @@ -18,7 +18,6 @@ from pathlib import Path from typing import List, Tuple - COPYRIGHT_NOTICE = """# Copyright (c) Microsoft Corporation. # Licensed under the MIT License.""" diff --git a/tools/get_keys.py b/tools/get_keys.py index 06496ed..d3c4fa6 100644 --- a/tools/get_keys.py +++ b/tools/get_keys.py @@ -13,10 +13,11 @@ from pathlib import Path from typing import Dict, List, Optional, Set, Tuple +import colorama + # Azure SDK imports from azure.identity import DefaultAzureCredential from azure.mgmt.authorization import AuthorizationManagementClient -import colorama from colorama import Fore, Style # Initialize colorama for cross-platform color support diff --git a/tools/ingest_vtt.py b/tools/ingest_vtt.py index 1456617..57950fd 100644 --- a/tools/ingest_vtt.py +++ b/tools/ingest_vtt.py @@ -32,17 +32,17 @@ format_timestamp_utc, ) from typeagent.storage.utils import create_storage_provider +from typeagent.transcripts.transcript import ( + Transcript, + TranscriptMessage, + TranscriptMessageMeta, +) from typeagent.transcripts.transcript_ingest import ( get_transcript_duration, get_transcript_speakers, parse_voice_tags, webvtt_timestamp_to_seconds, ) -from typeagent.transcripts.transcript import ( - Transcript, - TranscriptMessage, - TranscriptMessageMeta, -) def create_arg_parser() -> argparse.ArgumentParser: diff --git a/tools/query.py b/tools/query.py index 1f266e3..7d67fb5 100644 --- a/tools/query.py +++ b/tools/query.py @@ -7,8 +7,6 @@ import argparse import asyncio -from collections.abc import Mapping -from dataclasses import dataclass import difflib import json import os @@ -17,9 +15,12 @@ import shutil import sys import typing +from collections.abc import Mapping +from dataclasses import dataclass -from colorama import init as colorama_init, Fore import numpy as np +from colorama import Fore +from colorama import init as colorama_init readline = None try: @@ -30,11 +31,18 @@ import typechat -from typeagent.aitools import embeddings -from typeagent.aitools import utils - -from typeagent.knowpro import answers, answer_response_schema -from typeagent.knowpro import convknowledge +from typeagent.aitools import embeddings, utils +from typeagent.knowpro import ( + answer_response_schema, + answers, + convknowledge, + kplib, + query, + search, + search_query_schema, + searchlang, + serialization, +) from typeagent.knowpro.convsettings import ConversationSettings from typeagent.knowpro.interfaces import ( IConversation, @@ -46,19 +54,12 @@ Tag, Topic, ) -from typeagent.knowpro import kplib -from typeagent.knowpro import query -from typeagent.knowpro import search, search_query_schema, searchlang -from typeagent.knowpro import serialization - from typeagent.podcasts import podcast - from typeagent.storage.memory.propindex import build_property_index from typeagent.storage.memory.reltermsindex import build_related_terms_index from typeagent.storage.sqlite.provider import SqliteStorageProvider from typeagent.storage.utils import create_storage_provider - ### Classes ### diff --git a/tools/test_email.py b/tools/test_email.py index 8e922c6..f1ddbc0 100644 --- a/tools/test_email.py +++ b/tools/test_email.py @@ -1,16 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +import argparse +import asyncio import os +import shelve import shlex -import asyncio import sys import traceback -from typing import Any, Literal, Iterable, Callable, Awaitable -from colorama import Fore from pathlib import Path -import argparse -import shelve +from typing import Any, Awaitable, Callable, Iterable, Literal + +from colorama import Fore try: import readline # noqa: F401 @@ -18,19 +19,17 @@ pass # readline not available on Windows import typechat +from query import print_result from typeagent.aitools import utils -from typeagent.knowpro import kplib, searchlang, search_query_schema, convknowledge -from typeagent.knowpro.interfaces import IConversation from typeagent.emails.email_import import import_email_from_file, import_emails_from_dir from typeagent.emails.email_memory import EmailMemory from typeagent.emails.email_message import EmailMessage - +from typeagent.knowpro import convknowledge, kplib, search_query_schema, searchlang from typeagent.knowpro.convsettings import ConversationSettings +from typeagent.knowpro.interfaces import IConversation from typeagent.storage.utils import create_storage_provider -from query import print_result - class ReallyExit(Exception): pass diff --git a/tools/vizcmp.py b/tools/vizcmp.py index 03a7789..2e32ef2 100644 --- a/tools/vizcmp.py +++ b/tools/vizcmp.py @@ -8,7 +8,8 @@ import statistics import sys -from colorama import init as colorama_init, Back, Fore, Style +from colorama import Back, Fore, Style +from colorama import init as colorama_init def main(): diff --git a/typeagent/aitools/auth.py b/typeagent/aitools/auth.py index 54eb73c..27265e6 100755 --- a/typeagent/aitools/auth.py +++ b/typeagent/aitools/auth.py @@ -2,8 +2,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from dataclasses import dataclass import time +from dataclasses import dataclass from typing import Protocol from azure.identity import DefaultAzureCredential diff --git a/typeagent/aitools/embeddings.py b/typeagent/aitools/embeddings.py index 1f27347..e7ecf94 100644 --- a/typeagent/aitools/embeddings.py +++ b/typeagent/aitools/embeddings.py @@ -5,16 +5,15 @@ import os import numpy as np +import tiktoken from numpy.typing import NDArray -from openai import AsyncOpenAI, AsyncAzureOpenAI, DEFAULT_MAX_RETRIES, OpenAIError +from openai import DEFAULT_MAX_RETRIES, AsyncAzureOpenAI, AsyncOpenAI, OpenAIError from openai.types import Embedding -import tiktoken from tiktoken import model as tiktoken_model from tiktoken.core import Encoding -from .auth import get_shared_token_provider, AzureTokenProvider -from .utils import timelog - +from .auth import AzureTokenProvider, get_shared_token_provider +from .utils import get_azure_api_key, parse_azure_endpoint, timelog type NormalizedEmbedding = NDArray[np.float32] # A single embedding type NormalizedEmbeddings = NDArray[np.float32] # An array of embeddings @@ -124,7 +123,6 @@ def __init__( self._embedding_cache = {} def _setup_azure(self, azure_api_key: str) -> None: - from .utils import get_azure_api_key, parse_azure_endpoint azure_api_key = get_azure_api_key(azure_api_key) self.azure_endpoint, self.azure_api_version = parse_azure_endpoint( diff --git a/typeagent/aitools/utils.py b/typeagent/aitools/utils.py index 6658a76..9ddc7c8 100644 --- a/typeagent/aitools/utils.py +++ b/typeagent/aitools/utils.py @@ -3,17 +3,24 @@ """Utilities that are hard to fit in any specific module.""" -from contextlib import contextmanager import difflib import os import re import shutil import time +from contextlib import contextmanager import black import colorama import dotenv +import logfire import typechat +from openai import AsyncAzureOpenAI, AsyncOpenAI +from pydantic_ai import Agent, NativeOutput, ToolOutput +from pydantic_ai.models.openai import OpenAIChatModel +from pydantic_ai.providers.azure import AzureProvider + +from .auth import get_shared_token_provider @contextmanager @@ -152,7 +159,6 @@ def fmt(row, seg_widths): def setup_logfire(): """Configure logfire for pydantic_ai and httpx.""" - import logfire def scrubbing_callback(m: logfire.ScrubMatch): """Instructions: Uncomment any block where you deem it safe to not scrub.""" @@ -212,7 +218,6 @@ def get_azure_api_key(azure_api_key: str) -> str: Returns: The API key or token to use. """ - from .auth import get_shared_token_provider # This section is rather specific to our team's setup at Microsoft. if azure_api_key.lower() == "identity": @@ -241,7 +246,6 @@ def create_async_openai_client( Raises: RuntimeError: If neither OPENAI_API_KEY nor AZURE_OPENAI_API_KEY is set. """ - from openai import AsyncAzureOpenAI, AsyncOpenAI if openai_api_key := os.getenv("OPENAI_API_KEY"): return AsyncOpenAI(api_key=openai_api_key, base_url=base_url) @@ -265,9 +269,6 @@ def create_async_openai_client( # The true return type is pydantic_ai.Agent[T], but that's an optional dependency. def make_agent[T](cls: type[T]): """Create Pydantic AI agent using hardcoded preferences.""" - from pydantic_ai import Agent, NativeOutput, ToolOutput - from pydantic_ai.models.openai import OpenAIChatModel - from pydantic_ai.providers.azure import AzureProvider # Prefer straight OpenAI over Azure OpenAI. if os.getenv("OPENAI_API_KEY"): diff --git a/typeagent/emails/email_import.py b/typeagent/emails/email_import.py index 758fe76..8006e67 100644 --- a/typeagent/emails/email_import.py +++ b/typeagent/emails/email_import.py @@ -2,12 +2,11 @@ # Licensed under the MIT License. import re -from pathlib import Path -from typing import Iterable - from email import message_from_string -from email.utils import parsedate_to_datetime from email.message import Message +from email.utils import parsedate_to_datetime +from pathlib import Path +from typing import Iterable from .email_message import EmailMessage, EmailMessageMeta diff --git a/typeagent/emails/email_memory.py b/typeagent/emails/email_memory.py index 19f078f..6d9c5f3 100644 --- a/typeagent/emails/email_memory.py +++ b/typeagent/emails/email_memory.py @@ -1,35 +1,24 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +import copy +import json import os from dataclasses import dataclass -import json -import copy -from pydantic.dataclasses import dataclass as pydantic_dataclass + import typechat + from ..aitools import utils from ..knowpro import ( - secindex, + answer_response_schema, + answers, convknowledge, search_query_schema, searchlang, - answer_response_schema, - answers, -) -from ..knowpro.convsettings import ConversationSettings -from ..knowpro.interfaces import ( - IConversation, - IConversationSecondaryIndexes, - IMessage, - IMessageCollection, - ISemanticRefCollection, - ITermToSemanticRefIndex, - Term, ) from ..knowpro.conversation_base import ConversationBase -from ..storage.memory import semrefindex -from typeagent.storage.sqlite.provider import SqliteStorageProvider - +from ..knowpro.convsettings import ConversationSettings +from ..knowpro.interfaces import Term from .email_message import EmailMessage diff --git a/typeagent/emails/email_message.py b/typeagent/emails/email_message.py index 4b1ec28..6dbccaa 100644 --- a/typeagent/emails/email_message.py +++ b/typeagent/emails/email_message.py @@ -1,22 +1,15 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from dataclasses import dataclass +from email.utils import parseaddr from typing import Any -from enum import Enum -from pydantic.dataclasses import dataclass as pydantic_dataclass from pydantic import Field - -from email.utils import parseaddr +from pydantic.dataclasses import dataclass as pydantic_dataclass from ..knowpro import kplib from ..knowpro.field_helpers import CamelCaseField -from ..knowpro.interfaces import ( - IKnowledgeSource, - IMessage, - IMessageMetadata, -) +from ..knowpro.interfaces import IKnowledgeSource, IMessage, IMessageMetadata @pydantic_dataclass diff --git a/typeagent/knowpro/answer_context_schema.py b/typeagent/knowpro/answer_context_schema.py index 6ff1b33..dd0ec2a 100644 --- a/typeagent/knowpro/answer_context_schema.py +++ b/typeagent/knowpro/answer_context_schema.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from typing import Annotated, Any, Union + from typing_extensions import Doc from ..knowpro.interfaces import DateRange diff --git a/typeagent/knowpro/answer_response_schema.py b/typeagent/knowpro/answer_response_schema.py index 563d954..6e11ec9 100644 --- a/typeagent/knowpro/answer_response_schema.py +++ b/typeagent/knowpro/answer_response_schema.py @@ -1,9 +1,10 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from typing import Literal, Annotated -from typing_extensions import Doc +from typing import Annotated, Literal + from pydantic.dataclasses import dataclass +from typing_extensions import Doc AnswerType = Literal[ "NoAnswer", # If question cannot be accurately answered from [ANSWER CONTEXT] diff --git a/typeagent/knowpro/answers.py b/typeagent/knowpro/answers.py index 9713e87..d1458d5 100644 --- a/typeagent/knowpro/answers.py +++ b/typeagent/knowpro/answers.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import Iterable, Iterator +from collections.abc import Iterable from dataclasses import dataclass from typing import Any @@ -17,11 +17,11 @@ IConversation, IMessage, IMessageCollection, + IMessageMetadata, ISemanticRefCollection, ITermToSemanticRefIndex, Knowledge, KnowledgeType, - IMessageMetadata, MessageOrdinal, ScoredMessageOrdinal, ScoredSemanticRefOrdinal, diff --git a/typeagent/knowpro/collections.py b/typeagent/knowpro/collections.py index f54be80..3780450 100644 --- a/typeagent/knowpro/collections.py +++ b/typeagent/knowpro/collections.py @@ -2,15 +2,14 @@ # Licensed under the MIT License. import bisect -from collections.abc import Callable, Iterable, Iterator -from dataclasses import dataclass, field import heapq import math import sys -from typing import cast, Set # Set is an alias for builtin set +from collections.abc import Callable, Iterable, Iterator +from dataclasses import dataclass, field +from typing import Set, cast # Set is an alias for builtin set from .interfaces import ( - ICollection, IMessage, IMessageCollection, ISemanticRefCollection, diff --git a/typeagent/knowpro/conversation_base.py b/typeagent/knowpro/conversation_base.py index ab876a2..ca33789 100644 --- a/typeagent/knowpro/conversation_base.py +++ b/typeagent/knowpro/conversation_base.py @@ -9,17 +9,17 @@ import typechat +from ..aitools import utils +from ..storage.memory import propindex, semrefindex from . import ( - answers, answer_response_schema, + answers, convknowledge, kplib, search_query_schema, searchlang, secindex, ) -from ..aitools import utils -from ..storage.memory import semrefindex from .convsettings import ConversationSettings from .interfaces import ( AddMessagesResult, @@ -27,13 +27,14 @@ IConversationSecondaryIndexes, IMessage, IMessageCollection, + IndexingStartPoints, ISemanticRefCollection, IStorageProvider, ITermToSemanticRefIndex, - IndexingStartPoints, MessageOrdinal, Topic, ) +from .messageutils import get_message_chunk_batch_from_list TMessage = TypeVar("TMessage", bound=IMessage) @@ -199,8 +200,6 @@ async def _add_llm_knowledge_incremental( ) # Get batches of text locations from the message list - from .messageutils import get_message_chunk_batch_from_list - batches = get_message_chunk_batch_from_list( messages, start_from_message_ordinal, @@ -222,7 +221,6 @@ async def _update_secondary_indexes_incremental( if self.secondary_indexes is None: return - from ..storage.memory import propindex await propindex.add_to_property_index(self, start_points.semref_count) diff --git a/typeagent/knowpro/convknowledge.py b/typeagent/knowpro/convknowledge.py index a2b3415..7fca907 100644 --- a/typeagent/knowpro/convknowledge.py +++ b/typeagent/knowpro/convknowledge.py @@ -2,15 +2,13 @@ # Licensed under the MIT License. import asyncio -from dataclasses import dataclass, field import os +from dataclasses import dataclass, field import typechat from ..aitools import auth from . import kplib -from .interfaces import IKnowledgeExtractor - # TODO: Move ModelWrapper and create_typechat_model() to aitools package. diff --git a/typeagent/knowpro/convsettings.py b/typeagent/knowpro/convsettings.py index 627546e..bebdc6d 100644 --- a/typeagent/knowpro/convsettings.py +++ b/typeagent/knowpro/convsettings.py @@ -4,11 +4,15 @@ from __future__ import annotations from dataclasses import dataclass +from typing import TYPE_CHECKING from ..aitools.embeddings import AsyncEmbeddingModel from ..aitools.vectorbase import TextEmbeddingIndexSettings from .interfaces import IKnowledgeExtractor, IStorageProvider +if TYPE_CHECKING: + from ..storage.memory import MemoryStorageProvider + @dataclass class MessageTextIndexSettings: @@ -77,8 +81,8 @@ def storage_provider(self, value: IStorageProvider) -> None: async def get_storage_provider(self) -> IStorageProvider: """Get or create the storage provider asynchronously.""" if self._storage_provider is None: + # Import here to avoid circular import from ..storage.memory import MemoryStorageProvider - self._storage_provider = MemoryStorageProvider( message_text_settings=self.message_text_index_settings, related_terms_settings=self.related_term_index_settings, diff --git a/typeagent/knowpro/convutils.py b/typeagent/knowpro/convutils.py index d6071c3..d54f802 100644 --- a/typeagent/knowpro/convutils.py +++ b/typeagent/knowpro/convutils.py @@ -3,7 +3,6 @@ import typechat -from .convsettings import ConversationSettings from .interfaces import ( DateRange, Datetime, diff --git a/typeagent/knowpro/date_time_schema.py b/typeagent/knowpro/date_time_schema.py index e2c9581..7f47e03 100644 --- a/typeagent/knowpro/date_time_schema.py +++ b/typeagent/knowpro/date_time_schema.py @@ -1,8 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from pydantic.dataclasses import dataclass from typing import Annotated + +from pydantic.dataclasses import dataclass from typing_extensions import Doc diff --git a/typeagent/knowpro/factory.py b/typeagent/knowpro/factory.py index 1579ee2..d9f86b1 100644 --- a/typeagent/knowpro/factory.py +++ b/typeagent/knowpro/factory.py @@ -7,7 +7,7 @@ from . import secindex from .conversation_base import ConversationBase from .convsettings import ConversationSettings -from .interfaces import IMessage, ConversationMetadata +from .interfaces import ConversationMetadata, IMessage async def create_conversation[TMessage: IMessage]( diff --git a/typeagent/knowpro/field_helpers.py b/typeagent/knowpro/field_helpers.py index 4569ab3..99bb1ac 100644 --- a/typeagent/knowpro/field_helpers.py +++ b/typeagent/knowpro/field_helpers.py @@ -4,7 +4,7 @@ from dataclasses import MISSING from typing import Any -from pydantic import Field, AliasChoices +from pydantic import AliasChoices, Field from pydantic.alias_generators import to_camel diff --git a/typeagent/knowpro/fuzzyindex.py b/typeagent/knowpro/fuzzyindex.py index c9cfa29..44bea04 100644 --- a/typeagent/knowpro/fuzzyindex.py +++ b/typeagent/knowpro/fuzzyindex.py @@ -5,8 +5,8 @@ import numpy as np -from ..aitools.vectorbase import VectorBase, TextEmbeddingIndexSettings, ScoredInt from ..aitools.embeddings import NormalizedEmbedding, NormalizedEmbeddings +from ..aitools.vectorbase import ScoredInt, TextEmbeddingIndexSettings, VectorBase class EmbeddingIndex: diff --git a/typeagent/knowpro/interfaces.py b/typeagent/knowpro/interfaces.py index ef0efae..ca11bf6 100644 --- a/typeagent/knowpro/interfaces.py +++ b/typeagent/knowpro/interfaces.py @@ -1,13 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from abc import ABC, abstractmethod from collections.abc import AsyncIterable, Iterable, Sequence -from datetime import ( - datetime as Datetime, # For export. - timedelta as Timedelta, # For export. -) -from enum import Enum +from datetime import datetime as Datetime # For export. +from datetime import timedelta as Timedelta # For export. from typing import ( Any, ClassVar, @@ -19,13 +15,13 @@ runtime_checkable, ) -from pydantic.dataclasses import dataclass -from pydantic import Field, AliasChoices import typechat +from pydantic.dataclasses import dataclass from ..aitools.embeddings import NormalizedEmbeddings -from . import kplib +from . import kplib, serialization from .field_helpers import CamelCaseField +from .types import ConversationDataWithIndexes, SearchTermGroupTypes class IKnowledgeSource(Protocol): @@ -300,8 +296,6 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}({self.semantic_ref_ordinal}, {self.range}, {self.knowledge.knowledge_type!r}, {self.knowledge})" def serialize(self) -> SemanticRefData: - from . import serialization - return SemanticRefData( semanticRefOrdinal=self.semantic_ref_ordinal, range=self.range.serialize(), @@ -311,7 +305,6 @@ def serialize(self) -> SemanticRefData: @staticmethod def deserialize(data: SemanticRefData) -> "SemanticRef": - from . import serialization knowledge = serialization.deserialize_knowledge( data["knowledgeType"], data["knowledge"] diff --git a/typeagent/knowpro/knowledge.py b/typeagent/knowpro/knowledge.py index bff5a94..34d05bc 100644 --- a/typeagent/knowpro/knowledge.py +++ b/typeagent/knowpro/knowledge.py @@ -5,8 +5,7 @@ from typechat import Result, TypeChatLanguageModel -from . import convknowledge -from . import kplib +from . import convknowledge, kplib from .interfaces import IKnowledgeExtractor diff --git a/typeagent/knowpro/kplib.py b/typeagent/knowpro/kplib.py index 72ee1a8..6804c45 100644 --- a/typeagent/knowpro/kplib.py +++ b/typeagent/knowpro/kplib.py @@ -7,9 +7,9 @@ Comments that should go into the schema are in docstrings and Doc() annotations. """ -from pydantic.dataclasses import dataclass -from pydantic import Field, AliasChoices from typing import Annotated, ClassVar, Literal + +from pydantic.dataclasses import dataclass from typing_extensions import Doc from .field_helpers import CamelCaseField diff --git a/typeagent/knowpro/query.py b/typeagent/knowpro/query.py index 01e63de..9dcb848 100644 --- a/typeagent/knowpro/query.py +++ b/typeagent/knowpro/query.py @@ -4,11 +4,11 @@ from abc import ABC, abstractmethod from collections.abc import Callable from dataclasses import dataclass, field -from re import search from typing import Literal, Protocol, cast from ..aitools.embeddings import NormalizedEmbedding - +from ..storage.memory.messageindex import IMessageTextEmbeddingIndex +from ..storage.memory.propindex import PropertyNames, lookup_property_in_property_index from .collections import ( Match, MatchAccumulator, @@ -21,8 +21,8 @@ ) from .common import is_search_term_wildcard from .interfaces import ( - Datetime, DateRange, + Datetime, IConversation, IMessage, IMessageCollection, @@ -36,7 +36,6 @@ ScoredMessageOrdinal, ScoredSemanticRefOrdinal, SearchTerm, - SearchTermGroup, SemanticRef, SemanticRefOrdinal, SemanticRefSearchResult, @@ -46,10 +45,6 @@ Thread, ) from .kplib import ConcreteEntity -from ..storage.memory.messageindex import IMessageTextEmbeddingIndex -from ..storage.memory.propindex import PropertyNames, lookup_property_in_property_index -from .searchlib import create_property_search_term, create_tag_search_term_group - # TODO: Move to compilelib.py type BooleanOp = Literal["and", "or", "or_max"] diff --git a/typeagent/knowpro/search.py b/typeagent/knowpro/search.py index d6d4a14..a3413e1 100644 --- a/typeagent/knowpro/search.py +++ b/typeagent/knowpro/search.py @@ -2,10 +2,12 @@ # Licensed under the MIT License. from collections.abc import Callable +from typing import TypeGuard, cast + from pydantic.dataclasses import dataclass -from pydantic import Field, AliasChoices -from typing import TypeGuard, cast, Annotated +from ..storage.memory.messageindex import IMessageTextEmbeddingIndex +from ..storage.memory.reltermsindex import resolve_related_terms from .collections import MessageAccumulator, SemanticRefAccumulator from .field_helpers import CamelCaseField from .interfaces import ( @@ -24,8 +26,6 @@ WhenFilter, ) from .kplib import ConcreteEntity -from ..storage.memory.messageindex import IMessageTextEmbeddingIndex -from .searchlib import create_tag_search_term_group from .query import ( BooleanOp, CompiledSearchTerm, @@ -67,7 +67,7 @@ to_non_required_search_term, to_required_search_term, ) -from ..storage.memory.reltermsindex import resolve_related_terms +from .searchlib import create_tag_search_term_group @dataclass diff --git a/typeagent/knowpro/search_query_schema.py b/typeagent/knowpro/search_query_schema.py index 0ad4b42..4b3af60 100644 --- a/typeagent/knowpro/search_query_schema.py +++ b/typeagent/knowpro/search_query_schema.py @@ -3,13 +3,13 @@ # TODO: Move this file into knowpro. -from pydantic.dataclasses import dataclass -from pydantic import Field from typing import Annotated, Literal + +from pydantic.dataclasses import dataclass from typing_extensions import Doc -from .field_helpers import CamelCaseField from .date_time_schema import DateTimeRange +from .field_helpers import CamelCaseField @dataclass diff --git a/typeagent/knowpro/searchlang.py b/typeagent/knowpro/searchlang.py index f00d80e..da76cfb 100644 --- a/typeagent/knowpro/searchlang.py +++ b/typeagent/knowpro/searchlang.py @@ -1,10 +1,10 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import Callable import copy -from dataclasses import dataclass, replace import datetime +from collections.abc import Callable +from dataclasses import dataclass, replace from typing import Literal, TypeGuard, cast import typechat @@ -15,9 +15,7 @@ DateRange, Datetime, IConversation, - KnowledgePropertyName, KnowledgeType, - PropertySearchTerm, SearchSelectExpr, SearchTerm, SearchTermGroup, @@ -25,17 +23,15 @@ Term, WhenFilter, ) -from ..storage.memory.propindex import PropertyNames from ..knowpro.search import ( ConversationSearchResult, SearchOptions, SearchQueryExpr, - has_conversation_result, has_conversation_results, run_search_query, ) from ..knowpro.searchlib import create_property_search_term - +from ..storage.memory.propindex import PropertyNames from .date_time_schema import DateTime, DateTimeRange from .search_query_schema import ( ActionTerm, diff --git a/typeagent/knowpro/searchlib.py b/typeagent/knowpro/searchlib.py index 6764b65..0d6d590 100644 --- a/typeagent/knowpro/searchlib.py +++ b/typeagent/knowpro/searchlib.py @@ -6,8 +6,30 @@ Functions that help with creating search and property terms """ -from typing import cast +import dataclasses +from typing import Any, cast + + +def pydantic_dataclass_to_dict(obj: Any) -> Any: + """Recursively convert dataclass instances (including pydantic dataclasses) to dictionaries.""" + if dataclasses.is_dataclass(obj): + # dataclasses.asdict already recurses into nested dataclasses/lists + data = dataclasses.asdict(obj) + if data: + return data + # Fallback for dataclasses where asdict() returns empty (observed with some pydantic dataclasses) + result: dict[str, object] = {} + for field in dataclasses.fields(obj): + value = getattr(obj, field.name) + result[field.name] = pydantic_dataclass_to_dict(value) + return result + if isinstance(obj, list): + return [pydantic_dataclass_to_dict(item) for item in obj] + if isinstance(obj, dict): + return {key: pydantic_dataclass_to_dict(value) for key, value in obj.items()} + return obj +from ..storage.memory.propindex import PropertyNames from .interfaces import ( ISemanticRefCollection, KnowledgePropertyName, @@ -19,7 +41,6 @@ SemanticRef, Term, ) -from ..storage.memory.propindex import PropertyNames def create_search_term( diff --git a/typeagent/knowpro/secindex.py b/typeagent/knowpro/secindex.py index a4972f8..baee18b 100644 --- a/typeagent/knowpro/secindex.py +++ b/typeagent/knowpro/secindex.py @@ -1,27 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from ..aitools.embeddings import AsyncEmbeddingModel -from ..aitools.vectorbase import TextEmbeddingIndexSettings -from .convsettings import ConversationSettings +from ..storage.memory.messageindex import build_message_index +from ..storage.memory.propindex import build_property_index +from ..storage.memory.reltermsindex import build_related_terms_index +from ..storage.memory.timestampindex import build_timestamp_index +from .convsettings import ConversationSettings, RelatedTermIndexSettings from .interfaces import ( IConversation, IConversationSecondaryIndexes, IMessage, IStorageProvider, ITermToSemanticRefIndex, - TextLocation, -) -from ..storage.memory.messageindex import build_message_index -from ..storage.memory.propindex import PropertyIndex, build_property_index -from ..storage.memory.reltermsindex import ( - RelatedTermsIndex, - build_related_terms_index, -) -from .convsettings import RelatedTermIndexSettings -from ..storage.memory.timestampindex import ( - TimestampToTextRangeIndex, - build_timestamp_index, ) diff --git a/typeagent/knowpro/serialization.py b/typeagent/knowpro/serialization.py index d40ab4c..2a377f1 100644 --- a/typeagent/knowpro/serialization.py +++ b/typeagent/knowpro/serialization.py @@ -1,38 +1,31 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from dataclasses import is_dataclass, MISSING -from datetime import datetime import functools import json import types +from dataclasses import MISSING, is_dataclass +from datetime import datetime from typing import ( Annotated, Any, - cast, - get_args, - get_origin, Literal, NotRequired, - overload, TypeAliasType, TypedDict, Union, + cast, + get_args, + get_origin, + overload, ) import numpy as np from pydantic.alias_generators import to_camel from ..aitools.embeddings import NormalizedEmbeddings - -from .interfaces import ( - ConversationDataWithIndexes, - SearchTermGroupTypes, - Tag, - Topic, -) from . import kplib - +from .types import ConversationDataWithIndexes, SearchTermGroupTypes # ------------------- # Shared definitions @@ -244,13 +237,19 @@ def get_embeddings_from_binary_data( TYPE_MAP = { "entity": kplib.ConcreteEntity, "action": kplib.Action, - "topic": Topic, - "tag": Tag, } # Looks like this only works for knowledge... def deserialize_knowledge(knowledge_type: str, obj: Any) -> Any: + if knowledge_type == "tag": + from .interfaces import Tag as TagData + + return deserialize_object(TagData, obj) + if knowledge_type == "topic": + from .interfaces import Topic as TopicData + + return deserialize_object(TopicData, obj) typ = TYPE_MAP[knowledge_type] return deserialize_object(typ, obj) @@ -322,6 +321,11 @@ def deserialize_object(typ: Any, obj: Any) -> Any: raise DeserializationError( f"Pydantic validation failed for {typ.__name__}: {e}" ) from e + elif isinstance(typ, type) and hasattr(typ, "__annotations__") and issubclass(typ, dict): + # Handle TypedDict types (Tag, Topic) + if not isinstance(obj, dict): + raise DeserializationError(f"Expected dict for {typ}, got {type(obj)}") + return obj else: # Could be a class that's not a dataclass -- we don't know the signature. raise TypeError(f"Unsupported origin-less type {typ}") diff --git a/typeagent/knowpro/textlocindex.py b/typeagent/knowpro/textlocindex.py index 4969a66..a6d9596 100644 --- a/typeagent/knowpro/textlocindex.py +++ b/typeagent/knowpro/textlocindex.py @@ -7,12 +7,8 @@ from ..aitools.embeddings import NormalizedEmbedding from ..aitools.vectorbase import TextEmbeddingIndexSettings - -from .fuzzyindex import ScoredInt, EmbeddingIndex -from .interfaces import ( - TextToTextLocationIndexData, - TextLocation, -) +from .fuzzyindex import EmbeddingIndex, ScoredInt +from .interfaces import TextLocation, TextToTextLocationIndexData @dataclass diff --git a/typeagent/knowpro/types.py b/typeagent/knowpro/types.py new file mode 100644 index 0000000..855f774 --- /dev/null +++ b/typeagent/knowpro/types.py @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +""" +Shared type definitions for knowpro modules to avoid circular imports. +""" + +from typing import Any, Generic, Literal, TypedDict, TypeVar + +# --- Shared TypedDicts and type aliases --- + +TMessageData = TypeVar("TMessageData") + +class ConversationDataWithIndexes(TypedDict, Generic[TMessageData]): + messages: list[TMessageData] + relatedTermsIndexData: dict[str, Any] | None + messageIndexData: dict[str, Any] | None + # Add other fields as needed + +SearchTermGroupTypes = Any + +class Tag(TypedDict): + knowledge_type: Literal["tag"] + text: str + +class Topic(TypedDict): + knowledge_type: Literal["topic"] + text: str + +# Add any other shared types here as needed diff --git a/typeagent/mcp/server.py b/typeagent/mcp/server.py index f452325..bc46840 100644 --- a/typeagent/mcp/server.py +++ b/typeagent/mcp/server.py @@ -5,23 +5,23 @@ import argparse -from dataclasses import dataclass import time +from dataclasses import dataclass from typing import Any import coverage +import typechat from mcp.server.fastmcp import Context, FastMCP from mcp.server.session import ServerSession from mcp.types import SamplingMessage, TextContent -import typechat # Enable coverage.py before local imports (a no-op unless COVERAGE_PROCESS_START is set). coverage.process_startup() from typeagent.aitools import embeddings, utils from typeagent.knowpro import answers, query, searchlang -from typeagent.knowpro.convsettings import ConversationSettings from typeagent.knowpro.answer_response_schema import AnswerResponse +from typeagent.knowpro.convsettings import ConversationSettings from typeagent.knowpro.search_query_schema import SearchQuery from typeagent.podcasts import podcast from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex diff --git a/typeagent/podcasts/podcast.py b/typeagent/podcasts/podcast.py index 1cce006..3d3b218 100644 --- a/typeagent/podcasts/podcast.py +++ b/typeagent/podcasts/podcast.py @@ -1,35 +1,21 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from dataclasses import dataclass import json import os -from typing import TypedDict, cast, Any +from dataclasses import dataclass +from typing import Any, TypedDict import numpy as np -from pydantic.dataclasses import dataclass as pydantic_dataclass -from pydantic import Field, AliasChoices from ..aitools.embeddings import NormalizedEmbeddings -from ..storage.memory import semrefindex -from ..knowpro import kplib, secindex +from ..knowpro import secindex, serialization from ..knowpro.conversation_base import ConversationBase -from ..knowpro.field_helpers import CamelCaseField -from ..storage.memory.convthreads import ConversationThreads from ..knowpro.convsettings import ConversationSettings -from ..knowpro.interfaces import ( - ConversationDataWithIndexes, - SemanticRef, - Term, -) +from ..knowpro.interfaces import ConversationDataWithIndexes, SemanticRef, Term +from ..knowpro.universal_message import ConversationMessage, ConversationMessageMeta +from ..storage.memory.convthreads import ConversationThreads from ..storage.memory.messageindex import MessageTextIndex -from ..storage.memory.reltermsindex import TermToRelatedTermsMap -from ..storage.utils import create_storage_provider -from ..knowpro import serialization -from ..knowpro.universal_message import ( - ConversationMessage, - ConversationMessageMeta, -) # Type aliases for backward compatibility PodcastMessage = ConversationMessage diff --git a/typeagent/podcasts/podcast_ingest.py b/typeagent/podcasts/podcast_ingest.py index b70bae4..f04b001 100644 --- a/typeagent/podcasts/podcast_ingest.py +++ b/typeagent/podcasts/podcast_ingest.py @@ -8,10 +8,7 @@ from ..knowpro.convsettings import ConversationSettings from ..knowpro.interfaces import Datetime -from ..knowpro.universal_message import ( - UNIX_EPOCH, - format_timestamp_utc, -) +from ..knowpro.universal_message import UNIX_EPOCH, format_timestamp_utc from ..storage.utils import create_storage_provider from .podcast import Podcast, PodcastMessage, PodcastMessageMeta diff --git a/typeagent/storage/__init__.py b/typeagent/storage/__init__.py index 9f74168..77eda8b 100644 --- a/typeagent/storage/__init__.py +++ b/typeagent/storage/__init__.py @@ -5,14 +5,14 @@ # Import from new organized structure from .memory import ( - MemoryStorageProvider, - MemoryMessageCollection, - MemorySemanticRefCollection, + MemoryMessageCollection, + MemorySemanticRefCollection, + MemoryStorageProvider, ) from .sqlite import ( - SqliteStorageProvider, - SqliteMessageCollection, - SqliteSemanticRefCollection, + SqliteMessageCollection, + SqliteSemanticRefCollection, + SqliteStorageProvider, ) __all__ = [ diff --git a/typeagent/storage/memory/collections.py b/typeagent/storage/memory/collections.py index 63bb2d3..9973a29 100644 --- a/typeagent/storage/memory/collections.py +++ b/typeagent/storage/memory/collections.py @@ -4,11 +4,10 @@ """Memory-based collection implementations.""" from typing import Iterable + from ...knowpro.interfaces import ( ICollection, IMessage, - ISemanticRefCollection, - IMessageCollection, MessageOrdinal, SemanticRef, SemanticRefOrdinal, diff --git a/typeagent/storage/memory/convthreads.py b/typeagent/storage/memory/convthreads.py index b170939..634bfa1 100644 --- a/typeagent/storage/memory/convthreads.py +++ b/typeagent/storage/memory/convthreads.py @@ -1,14 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from ...aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase from ...knowpro.interfaces import ( ConversationThreadData, IConversationThreads, - ThreadDataItem, ScoredThreadOrdinal, Thread, + ThreadDataItem, ) -from ...aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase class ConversationThreads(IConversationThreads): diff --git a/typeagent/storage/memory/messageindex.py b/typeagent/storage/memory/messageindex.py index b68ec99..a56da9e 100644 --- a/typeagent/storage/memory/messageindex.py +++ b/typeagent/storage/memory/messageindex.py @@ -2,19 +2,17 @@ # Licensed under the MIT License. from collections.abc import Callable, Iterable -from dataclasses import dataclass from ...aitools.embeddings import NormalizedEmbedding -from ...aitools.vectorbase import TextEmbeddingIndexSettings from ...knowpro.convsettings import MessageTextIndexSettings from ...knowpro.interfaces import ( IConversation, IMessage, IMessageTextIndex, IStorageProvider, - MessageTextIndexData, ITermToSemanticRefIndex, MessageOrdinal, + MessageTextIndexData, ScoredMessageOrdinal, TextLocation, ) diff --git a/typeagent/storage/memory/propindex.py b/typeagent/storage/memory/propindex.py index 0d7709a..55f7215 100644 --- a/typeagent/storage/memory/propindex.py +++ b/typeagent/storage/memory/propindex.py @@ -2,8 +2,9 @@ # Licensed under the MIT License. import enum -from typing import assert_never +from typing import Any, assert_never +from ...knowpro import kplib from ...knowpro.collections import TextRangesInScope from ...knowpro.interfaces import ( IConversation, @@ -14,7 +15,6 @@ Tag, Topic, ) -from ...knowpro import kplib class PropertyNames(enum.Enum): @@ -132,23 +132,61 @@ async def add_to_property_index( start_at_ordinal, ): assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal - if isinstance(semantic_ref.knowledge, kplib.Action): + knowledge = semantic_ref.knowledge + if isinstance(knowledge, kplib.Action): await add_action_properties_to_index( - semantic_ref.knowledge, property_index, semantic_ref_ordinal + knowledge, property_index, semantic_ref_ordinal ) - elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity): + elif isinstance(knowledge, kplib.ConcreteEntity): await add_entity_properties_to_index( - semantic_ref.knowledge, property_index, semantic_ref_ordinal + knowledge, property_index, semantic_ref_ordinal ) - elif isinstance(semantic_ref.knowledge, Tag): - tag = semantic_ref.knowledge + elif isinstance(knowledge, Tag): await property_index.add_property( - PropertyNames.Tag.value, tag.text, semantic_ref_ordinal + PropertyNames.Tag.value, knowledge.text, semantic_ref_ordinal ) - elif isinstance(semantic_ref.knowledge, Topic): - pass else: - assert_never(semantic_ref.knowledge) + knowledge_type = _get_knowledge_type(knowledge) + if knowledge_type == "tag": + tag_text = _get_knowledge_field(knowledge, "text") + if tag_text is not None: + await property_index.add_property( + PropertyNames.Tag.value, + tag_text, + semantic_ref_ordinal, + ) + elif knowledge_type == "topic" or isinstance(knowledge, Topic): + # Topics do not populate the property index currently. + continue + else: + assert_never(knowledge) + + +def _get_knowledge_type(knowledge: Any) -> str | None: + """Extract the knowledge type from a deserialized knowledge object.""" + if isinstance(knowledge, dict): + value = knowledge.get("knowledgeType") + if value is None: + value = knowledge.get("knowledge_type") + if value is not None: + return str(value) + return None + return getattr(knowledge, "knowledge_type", None) + + +def _get_knowledge_field(knowledge: Any, field_name: str) -> str | None: + """Return the requested field from a knowledge object, handling dicts and dataclasses.""" + if isinstance(knowledge, dict): + value = knowledge.get(field_name) + if value is None and field_name == "text": + value = knowledge.get("text") + if value is None: + return None + return str(value) + value = getattr(knowledge, field_name, None) + if value is None: + return None + return str(value) class PropertyIndex(IPropertyToSemanticRefIndex): diff --git a/typeagent/storage/memory/provider.py b/typeagent/storage/memory/provider.py index b97199b..9682e56 100644 --- a/typeagent/storage/memory/provider.py +++ b/typeagent/storage/memory/provider.py @@ -6,15 +6,6 @@ from datetime import datetime -from ...knowpro import interfaces - -from .collections import MemoryMessageCollection, MemorySemanticRefCollection -from .semrefindex import TermToSemanticRefIndex -from .convthreads import ConversationThreads -from .messageindex import MessageTextIndex -from .reltermsindex import RelatedTermsIndex -from .propindex import PropertyIndex -from .timestampindex import TimestampToTextRangeIndex from ...knowpro.convsettings import MessageTextIndexSettings, RelatedTermIndexSettings from ...knowpro.interfaces import ( ConversationMetadata, @@ -27,6 +18,13 @@ ITermToSemanticRefIndex, ITimestampToTextRangeIndex, ) +from .collections import MemoryMessageCollection, MemorySemanticRefCollection +from .convthreads import ConversationThreads +from .messageindex import MessageTextIndex +from .propindex import PropertyIndex +from .reltermsindex import RelatedTermsIndex +from .semrefindex import TermToSemanticRefIndex +from .timestampindex import TimestampToTextRangeIndex class MemoryStorageProvider[TMessage: IMessage](IStorageProvider[TMessage]): @@ -72,7 +70,6 @@ async def __aexit__( exc_tb: object, ) -> None: """Exit transaction context. No-op for in-memory storage.""" - pass async def get_semantic_ref_index(self) -> ITermToSemanticRefIndex: return self._conversation_index @@ -102,7 +99,6 @@ async def get_semantic_ref_collection(self) -> MemorySemanticRefCollection: async def close(self) -> None: """Close the storage provider.""" - pass def get_conversation_metadata(self) -> ConversationMetadata: """Get conversation metadata. @@ -121,7 +117,6 @@ def set_conversation_metadata(self, **kwds: str | list[str] | None) -> None: Args: **kwds: Metadata keys and values (ignored) """ - pass def update_conversation_timestamps( self, @@ -137,4 +132,3 @@ def update_conversation_timestamps( created_at: Optional creation timestamp (ignored) updated_at: Optional last updated timestamp (ignored) """ - pass diff --git a/typeagent/storage/memory/reltermsindex.py b/typeagent/storage/memory/reltermsindex.py index dc884a3..e58c7b5 100644 --- a/typeagent/storage/memory/reltermsindex.py +++ b/typeagent/storage/memory/reltermsindex.py @@ -2,15 +2,13 @@ # Licensed under the MIT License. from collections.abc import Callable -from dataclasses import dataclass -from typing import Protocol, TYPE_CHECKING +from typing import TYPE_CHECKING, Protocol from typeagent.aitools.vectorbase import ( ScoredInt, TextEmbeddingIndexSettings, VectorBase, ) - from typeagent.knowpro.collections import TermSet from typeagent.knowpro.common import is_search_term_wildcard from typeagent.knowpro.convsettings import RelatedTermIndexSettings @@ -23,9 +21,9 @@ ITermToSemanticRefIndex, SearchTerm, Term, - TermToRelatedTermsData, TermsToRelatedTermsDataItem, TermsToRelatedTermsIndexData, + TermToRelatedTermsData, TextEmbeddingIndexData, ) diff --git a/typeagent/storage/memory/semrefindex.py b/typeagent/storage/memory/semrefindex.py index 4aef1da..3aa650a 100644 --- a/typeagent/storage/memory/semrefindex.py +++ b/typeagent/storage/memory/semrefindex.py @@ -8,33 +8,30 @@ from typechat import Failure from ...knowpro import convknowledge, kplib, secindex -from ...knowpro.convsettings import ConversationSettings -from ...knowpro.convsettings import SemanticRefIndexSettings -from ...knowpro.interfaces import ( - # Interfaces. +from ...knowpro.convsettings import ConversationSettings, SemanticRefIndexSettings +from ...knowpro.interfaces import ( # Interfaces.; Other imports. IConversation, IKnowledgeExtractor, IMessage, ISemanticRefCollection, ITermToSemanticRefIndex, - # Other imports. Knowledge, KnowledgeType, MessageOrdinal, - SemanticRefOrdinal, ScoredSemanticRefOrdinal, SemanticRef, - TermToSemanticRefIndexItemData, + SemanticRefOrdinal, TermToSemanticRefIndexData, + TermToSemanticRefIndexItemData, TextLocation, TextRange, Topic, ) +from ...knowpro.knowledge import extract_knowledge_from_text_batch from ...knowpro.messageutils import ( get_message_chunk_batch, text_range_from_message_chunk, ) -from ...knowpro.knowledge import extract_knowledge_from_text_batch def text_range_from_location( diff --git a/typeagent/storage/sqlite/__init__.py b/typeagent/storage/sqlite/__init__.py index e0b151a..e0a9d60 100644 --- a/typeagent/storage/sqlite/__init__.py +++ b/typeagent/storage/sqlite/__init__.py @@ -8,10 +8,7 @@ from .propindex import SqlitePropertyIndex from .provider import SqliteStorageProvider from .reltermsindex import SqliteRelatedTermsIndex -from .schema import ( - init_db_schema, - get_db_schema_version, -) +from .schema import get_db_schema_version, init_db_schema from .semrefindex import SqliteTermToSemanticRefIndex from .timestampindex import SqliteTimestampToTextRangeIndex diff --git a/typeagent/storage/sqlite/collections.py b/typeagent/storage/sqlite/collections.py index 582d026..9688e1f 100644 --- a/typeagent/storage/sqlite/collections.py +++ b/typeagent/storage/sqlite/collections.py @@ -7,9 +7,8 @@ import sqlite3 import typing +from ...knowpro import interfaces, serialization from .schema import ShreddedMessage, ShreddedSemanticRef -from ...knowpro import interfaces -from ...knowpro import serialization class SqliteMessageCollection[TMessage: interfaces.IMessage]( diff --git a/typeagent/storage/sqlite/messageindex.py b/typeagent/storage/sqlite/messageindex.py index b27af67..5649d6d 100644 --- a/typeagent/storage/sqlite/messageindex.py +++ b/typeagent/storage/sqlite/messageindex.py @@ -3,7 +3,6 @@ """SQLite-based message text index implementation.""" -import json import sqlite3 import typing @@ -11,14 +10,12 @@ from ...aitools.embeddings import NormalizedEmbedding from ...aitools.vectorbase import ScoredInt, VectorBase - -from ...knowpro.convsettings import MessageTextIndexSettings from ...knowpro import interfaces +from ...knowpro.convsettings import MessageTextIndexSettings from ...knowpro.interfaces import TextLocationData, TextToTextLocationIndexData from ...knowpro.textlocindex import ScoredTextLocation - from ...storage.memory.messageindex import IMessageTextEmbeddingIndex - +from ..sqlite.schema import deserialize_embedding from .schema import deserialize_embedding, serialize_embedding @@ -308,8 +305,6 @@ async def serialize(self) -> interfaces.MessageTextIndexData: text_locations = [] embeddings_list = [] - from ..sqlite.schema import deserialize_embedding - for msg_id, chunk_ordinal, embedding_blob in cursor.fetchall(): # Create text location data text_location = TextLocationData( diff --git a/typeagent/storage/sqlite/propindex.py b/typeagent/storage/sqlite/propindex.py index 5a0fa63..d28d8a8 100644 --- a/typeagent/storage/sqlite/propindex.py +++ b/typeagent/storage/sqlite/propindex.py @@ -7,6 +7,10 @@ from ...knowpro import interfaces from ...knowpro.interfaces import ScoredSemanticRefOrdinal +from ...storage.memory.propindex import ( + make_property_term_text, + split_property_term_text, +) class SqlitePropertyIndex(interfaces.IPropertyToSemanticRefIndex): @@ -46,11 +50,6 @@ async def add_property( score = 1.0 # Normalize property name and value (to match in-memory implementation) - from ...storage.memory.propindex import ( - make_property_term_text, - split_property_term_text, - ) - term_text = make_property_term_text(property_name, value) term_text = term_text.lower() # Matches PropertyIndex._prepare_term_text property_name, value = split_property_term_text(term_text) @@ -77,11 +76,6 @@ async def lookup_property( value: str, ) -> list[interfaces.ScoredSemanticRefOrdinal] | None: # Normalize property name and value (to match in-memory implementation) - from ...storage.memory.propindex import ( - make_property_term_text, - split_property_term_text, - ) - term_text = make_property_term_text(property_name, value) term_text = term_text.lower() # Matches PropertyIndex._prepare_term_text property_name, value = split_property_term_text(term_text) diff --git a/typeagent/storage/sqlite/provider.py b/typeagent/storage/sqlite/provider.py index ea54772..ab332e8 100644 --- a/typeagent/storage/sqlite/provider.py +++ b/typeagent/storage/sqlite/provider.py @@ -11,26 +11,21 @@ from ...knowpro import interfaces from ...knowpro.convsettings import MessageTextIndexSettings, RelatedTermIndexSettings from ...knowpro.interfaces import ConversationMetadata +from ...knowpro.universal_message import format_timestamp_utc +from ...storage.memory.convthreads import ConversationThreads from .collections import SqliteMessageCollection, SqliteSemanticRefCollection from .messageindex import SqliteMessageTextIndex from .propindex import SqlitePropertyIndex from .reltermsindex import SqliteRelatedTermsIndex -from .semrefindex import SqliteTermToSemanticRefIndex -from .timestampindex import SqliteTimestampToTextRangeIndex from .schema import ( - CONVERSATIONS_SCHEMA, CONVERSATION_SCHEMA_VERSION, - MESSAGE_TEXT_INDEX_SCHEMA, - MESSAGES_SCHEMA, - PROPERTY_INDEX_SCHEMA, - RELATED_TERMS_ALIASES_SCHEMA, - RELATED_TERMS_FUZZY_SCHEMA, - SEMANTIC_REF_INDEX_SCHEMA, - SEMANTIC_REFS_SCHEMA, + _set_conversation_metadata, + deserialize_embedding, get_db_schema_version, init_db_schema, - _set_conversation_metadata, ) +from .semrefindex import SqliteTermToSemanticRefIndex +from .timestampindex import SqliteTimestampToTextRangeIndex class SqliteStorageProvider[TMessage: interfaces.IMessage]( @@ -215,7 +210,6 @@ def _check_embedding_consistency(self) -> None: Raises: ValueError: If embeddings in the database don't match the expected size. """ - from .schema import deserialize_embedding cursor = self.db.cursor() expected_size = ( @@ -259,7 +253,6 @@ def _init_conversation_metadata_if_needed(self) -> None: when the first actual write operation (e.g., adding messages) commits. This ensures we don't create empty databases with only metadata. """ - from ...knowpro.universal_message import format_timestamp_utc current_time = datetime.now(timezone.utc) cursor = self.db.cursor() @@ -414,8 +407,6 @@ async def get_conversation_threads(self) -> interfaces.IConversationThreads: """Get the conversation threads.""" # For now, return a simple implementation # In a full implementation, this would be stored/retrieved from SQLite - from ...storage.memory.convthreads import ConversationThreads - return ConversationThreads( self.message_text_index_settings.embedding_index_settings ) @@ -578,8 +569,6 @@ def update_conversation_timestamps( created_at: Optional creation timestamp updated_at: Optional last updated timestamp """ - from ...knowpro.universal_message import format_timestamp_utc - # Check if any metadata exists cursor = self.db.cursor() cursor.execute("SELECT 1 FROM ConversationMetadata LIMIT 1") diff --git a/typeagent/storage/sqlite/reltermsindex.py b/typeagent/storage/sqlite/reltermsindex.py index 92d740e..6e8f937 100644 --- a/typeagent/storage/sqlite/reltermsindex.py +++ b/typeagent/storage/sqlite/reltermsindex.py @@ -5,11 +5,10 @@ import sqlite3 -from ...aitools.embeddings import AsyncEmbeddingModel, NormalizedEmbeddings +from ...aitools.embeddings import NormalizedEmbeddings from ...aitools.vectorbase import TextEmbeddingIndexSettings, VectorBase from ...knowpro import interfaces - -from .schema import serialize_embedding, deserialize_embedding +from .schema import deserialize_embedding, serialize_embedding class SqliteRelatedTermsAliases(interfaces.ITermToRelatedTerms): @@ -268,8 +267,6 @@ async def deserialize(self, data: interfaces.TextEmbeddingIndexData) -> None: self._vector_base.deserialize(embeddings_data) # Prepare all insertion data for bulk operation - from .schema import serialize_embedding - insertion_data = [] for i, text in enumerate(text_items): if i < len(self._vector_base): diff --git a/typeagent/storage/sqlite/schema.py b/typeagent/storage/sqlite/schema.py index 37f1db1..06c84c8 100644 --- a/typeagent/storage/sqlite/schema.py +++ b/typeagent/storage/sqlite/schema.py @@ -4,8 +4,9 @@ """SQLite database schema definitions.""" import sqlite3 -from datetime import datetime, timezone import typing +from datetime import datetime, timezone + import numpy as np from ...aitools.embeddings import NormalizedEmbedding diff --git a/typeagent/storage/sqlite/timestampindex.py b/typeagent/storage/sqlite/timestampindex.py index 0764549..2cda3ee 100644 --- a/typeagent/storage/sqlite/timestampindex.py +++ b/typeagent/storage/sqlite/timestampindex.py @@ -6,6 +6,7 @@ import sqlite3 from ...knowpro import interfaces +from ...knowpro.interfaces import TextLocation, TextRange from ...knowpro.universal_message import format_timestamp_utc @@ -73,8 +74,6 @@ async def get_timestamp_ranges( results = [] for msg_id, timestamp in cursor.fetchall(): # Create text range for message - from ...knowpro.interfaces import TextLocation, TextRange - text_range = TextRange( start=TextLocation(message_ordinal=msg_id, chunk_ordinal=0) ) diff --git a/typeagent/storage/utils.py b/typeagent/storage/utils.py index a1ea010..3339fbf 100644 --- a/typeagent/storage/utils.py +++ b/typeagent/storage/utils.py @@ -7,8 +7,10 @@ without circular import issues. """ -from ..knowpro.interfaces import IMessage, IStorageProvider, ConversationMetadata from ..knowpro.convsettings import MessageTextIndexSettings, RelatedTermIndexSettings +from ..knowpro.interfaces import ConversationMetadata, IMessage, IStorageProvider +from .memory import MemoryStorageProvider +from .sqlite import SqliteStorageProvider async def create_storage_provider[TMessage: IMessage]( @@ -23,14 +25,10 @@ async def create_storage_provider[TMessage: IMessage]( MemoryStorageProvider if dbname is None, SqliteStorageProvider otherwise. """ if dbname is None: - from .memory import MemoryStorageProvider - return MemoryStorageProvider( message_text_settings, related_terms_settings, metadata=metadata ) else: - from .sqlite import SqliteStorageProvider - if message_type is None: raise ValueError("Message type must be specified for SQLite storage") diff --git a/typeagent/transcripts/transcript.py b/typeagent/transcripts/transcript.py index 0c5eda1..c1b28fe 100644 --- a/typeagent/transcripts/transcript.py +++ b/typeagent/transcripts/transcript.py @@ -1,55 +1,20 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from dataclasses import dataclass import json -import os -from typing import TypedDict, cast, Any +from dataclasses import dataclass +from typing import Any, TypedDict import numpy as np -from pydantic.dataclasses import dataclass as pydantic_dataclass -from pydantic import Field, AliasChoices from ..aitools.embeddings import NormalizedEmbeddings -from ..storage.memory import semrefindex -from ..knowpro import kplib, secindex, convknowledge +from ..knowpro import secindex, serialization from ..knowpro.conversation_base import ConversationBase -from ..knowpro.field_helpers import CamelCaseField -from ..storage.memory.convthreads import ConversationThreads from ..knowpro.convsettings import ConversationSettings -from ..knowpro.interfaces import ( - ConversationDataWithIndexes, - Datetime, - ICollection, - IConversation, - IConversationSecondaryIndexes, - IKnowledgeSource, - IMessage, - IMessageCollection, - IMessageMetadata, - ISemanticRefCollection, - IStorageProvider, - ITermToSemanticRefIndex, - MessageOrdinal, - SemanticRef, - Term, - Timedelta, - Topic, - AddMessagesResult, - IndexingStartPoints, -) +from ..knowpro.interfaces import ConversationDataWithIndexes, SemanticRef, Term +from ..knowpro.universal_message import ConversationMessage, ConversationMessageMeta +from ..storage.memory.convthreads import ConversationThreads from ..storage.memory.messageindex import MessageTextIndex -from ..storage.memory.reltermsindex import TermToRelatedTermsMap -from ..storage.utils import create_storage_provider -from ..knowpro import serialization -from ..storage.memory.collections import ( - MemoryMessageCollection, - MemorySemanticRefCollection, -) -from ..knowpro.universal_message import ( - ConversationMessage, - ConversationMessageMeta, -) # Type aliases for backward compatibility TranscriptMessage = ConversationMessage diff --git a/uv.lock b/uv.lock index 477717f..cf990eb 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12" resolution-markers = [ "python_full_version >= '3.13'", @@ -1778,7 +1778,7 @@ wheels = [ [[package]] name = "typeagent" -version = "0.3.2" +version = "0.3.3" source = { editable = "." } dependencies = [ { name = "azure-identity" }, From afa55d21789020dbc496203239bca3eb026e4b8c Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Thu, 4 Dec 2025 14:17:54 +0100 Subject: [PATCH 4/4] Refactor imports to improve organization and reduce circular dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added dataclasses.py to wrap pydantic’s decorator with dataclass_transform + overloads so pyright understands generated initializers and fields; updated all knowpro modules, tests, and related utilities to import the wrapper instead of pydantic.dataclasses.dataclass. Adjusted dependent modules (interfaces.py, kplib.py, search.py, search_query_schema.py, date_time_schema.py, answer_response_schema.py, universal_message.py, email_message.py, and several tests) to use the new wrapper while preserving import ordering and existing behavior. all other changes a import and formatting related modifications (which should be ok) make format, check, test is running clean --- test/test_conversation_metadata.py | 3 +- test/test_mcp_server.py | 15 ++++++--- test/test_searchlib.py | 32 +++++++++++++----- test/test_sqlitestore.py | 3 +- test/test_storage_providers_unified.py | 3 +- test/test_utils.py | 7 +++- typeagent/aitools/utils.py | 1 - typeagent/emails/email_message.py | 3 +- typeagent/knowpro/answer_response_schema.py | 3 +- typeagent/knowpro/conversation_base.py | 1 - typeagent/knowpro/convsettings.py | 1 + typeagent/knowpro/dataclasses.py | 37 +++++++++++++++++++++ typeagent/knowpro/date_time_schema.py | 3 +- typeagent/knowpro/interfaces.py | 4 +-- typeagent/knowpro/kplib.py | 3 +- typeagent/knowpro/search.py | 3 +- typeagent/knowpro/search_query_schema.py | 3 +- typeagent/knowpro/searchlib.py | 3 +- typeagent/knowpro/serialization.py | 6 +++- typeagent/knowpro/types.py | 36 +++++++++++--------- typeagent/knowpro/universal_message.py | 3 +- typeagent/storage/__init__.py | 12 +++---- 22 files changed, 133 insertions(+), 52 deletions(-) create mode 100644 typeagent/knowpro/dataclasses.py diff --git a/test/test_conversation_metadata.py b/test/test_conversation_metadata.py index be45912..e8816f3 100644 --- a/test/test_conversation_metadata.py +++ b/test/test_conversation_metadata.py @@ -15,7 +15,8 @@ import pytest import pytest_asyncio from fixtures import embedding_model, temp_db_path -from pydantic.dataclasses import dataclass + +from typeagent.knowpro.dataclasses import dataclass from typeagent.aitools.embeddings import TEST_MODEL_NAME, AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings diff --git a/test/test_mcp_server.py b/test/test_mcp_server.py index 314d81c..97b9240 100644 --- a/test/test_mcp_server.py +++ b/test/test_mcp_server.py @@ -6,7 +6,7 @@ import json import os import sys -from typing import Any, TypeAlias +from typing import Any, TYPE_CHECKING import pytest from fixtures import really_needs_auth @@ -18,13 +18,18 @@ from typeagent.aitools.utils import create_async_openai_client -try: +if TYPE_CHECKING: from openai.types.chat import ChatCompletionMessageParam -except ImportError: # pragma: no cover - optional dependency - ChatCompletionMessageParam: TypeAlias = dict[str, Any] +else: # pragma: no cover - optional dependency + try: + from openai.types.chat import ChatCompletionMessageParam + except ImportError: + ChatCompletionMessageParam = dict[str, Any] # type: ignore[assignment] -pytestmark = pytest.mark.skip(reason="mcp server tests require interactive dependencies; skipping for now") +pytestmark = pytest.mark.skip( + reason="mcp server tests require interactive dependencies; skipping for now" +) @pytest.fixture diff --git a/test/test_searchlib.py b/test/test_searchlib.py index a9e05c8..b4bcb01 100644 --- a/test/test_searchlib.py +++ b/test/test_searchlib.py @@ -113,8 +113,12 @@ def test_create_and_term_group(self): assert group.boolean_op == "and" assert len(group.terms) == 2 - assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict(term1) - assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict(term2) + assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict( + term1 + ) + assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict( + term2 + ) def test_create_or_term_group(self): """Test creating an OR term group.""" @@ -124,8 +128,12 @@ def test_create_or_term_group(self): assert group.boolean_op == "or" assert len(group.terms) == 2 - assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict(term1) - assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict(term2) + assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict( + term1 + ) + assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict( + term2 + ) def test_create_or_max_term_group(self): """Test creating an OR_MAX term group.""" @@ -135,8 +143,12 @@ def test_create_or_max_term_group(self): assert group.boolean_op == "or_max" assert len(group.terms) == 2 - assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict(term1) - assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict(term2) + assert pydantic_dataclass_to_dict(group.terms[0]) == pydantic_dataclass_to_dict( + term1 + ) + assert pydantic_dataclass_to_dict(group.terms[1]) == pydantic_dataclass_to_dict( + term2 + ) def test_empty_term_groups(self): """Test creating empty term groups.""" @@ -162,8 +174,12 @@ def test_nested_term_groups(self): assert outer_group.boolean_op == "and" assert len(outer_group.terms) == 2 - assert pydantic_dataclass_to_dict(outer_group.terms[0]) == pydantic_dataclass_to_dict(inner_group) - assert pydantic_dataclass_to_dict(outer_group.terms[1]) == pydantic_dataclass_to_dict(term3) + assert pydantic_dataclass_to_dict( + outer_group.terms[0] + ) == pydantic_dataclass_to_dict(inner_group) + assert pydantic_dataclass_to_dict( + outer_group.terms[1] + ) == pydantic_dataclass_to_dict(term3) class TestCreateSearchTerms: diff --git a/test/test_sqlitestore.py b/test/test_sqlitestore.py index 7e95ace..717c37c 100644 --- a/test/test_sqlitestore.py +++ b/test/test_sqlitestore.py @@ -11,7 +11,8 @@ import pytest import pytest_asyncio from fixtures import FakeMessage, embedding_model, temp_db_path -from pydantic.dataclasses import dataclass + +from typeagent.knowpro.dataclasses import dataclass from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings diff --git a/test/test_storage_providers_unified.py b/test/test_storage_providers_unified.py index c90066a..840d8d6 100644 --- a/test/test_storage_providers_unified.py +++ b/test/test_storage_providers_unified.py @@ -16,7 +16,8 @@ import pytest import pytest_asyncio from fixtures import embedding_model, needs_auth, temp_db_path -from pydantic.dataclasses import dataclass + +from typeagent.knowpro.dataclasses import dataclass from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings diff --git a/test/test_utils.py b/test/test_utils.py index 0c913d0..d740e21 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -7,11 +7,16 @@ import pydantic.dataclasses import pytest +from typing import TYPE_CHECKING + from fixtures import really_needs_auth import typeagent.aitools.utils as utils -typechat = pytest.importorskip("typechat") +if TYPE_CHECKING: + import typechat +else: + typechat = pytest.importorskip("typechat") def test_timelog(): diff --git a/typeagent/aitools/utils.py b/typeagent/aitools/utils.py index 9ddc7c8..36d4db7 100644 --- a/typeagent/aitools/utils.py +++ b/typeagent/aitools/utils.py @@ -159,7 +159,6 @@ def fmt(row, seg_widths): def setup_logfire(): """Configure logfire for pydantic_ai and httpx.""" - def scrubbing_callback(m: logfire.ScrubMatch): """Instructions: Uncomment any block where you deem it safe to not scrub.""" # if m.path == ('attributes', 'http.request.header.authorization'): diff --git a/typeagent/emails/email_message.py b/typeagent/emails/email_message.py index 6dbccaa..334d74b 100644 --- a/typeagent/emails/email_message.py +++ b/typeagent/emails/email_message.py @@ -5,7 +5,8 @@ from typing import Any from pydantic import Field -from pydantic.dataclasses import dataclass as pydantic_dataclass + +from ..knowpro.dataclasses import dataclass as pydantic_dataclass from ..knowpro import kplib from ..knowpro.field_helpers import CamelCaseField diff --git a/typeagent/knowpro/answer_response_schema.py b/typeagent/knowpro/answer_response_schema.py index 6e11ec9..7e55ae6 100644 --- a/typeagent/knowpro/answer_response_schema.py +++ b/typeagent/knowpro/answer_response_schema.py @@ -3,9 +3,10 @@ from typing import Annotated, Literal -from pydantic.dataclasses import dataclass from typing_extensions import Doc +from .dataclasses import dataclass + AnswerType = Literal[ "NoAnswer", # If question cannot be accurately answered from [ANSWER CONTEXT] "Answered", # Fully answer question diff --git a/typeagent/knowpro/conversation_base.py b/typeagent/knowpro/conversation_base.py index ca33789..019918d 100644 --- a/typeagent/knowpro/conversation_base.py +++ b/typeagent/knowpro/conversation_base.py @@ -221,7 +221,6 @@ async def _update_secondary_indexes_incremental( if self.secondary_indexes is None: return - await propindex.add_to_property_index(self, start_points.semref_count) new_messages = await self.messages.get_slice( diff --git a/typeagent/knowpro/convsettings.py b/typeagent/knowpro/convsettings.py index bebdc6d..10545d9 100644 --- a/typeagent/knowpro/convsettings.py +++ b/typeagent/knowpro/convsettings.py @@ -83,6 +83,7 @@ async def get_storage_provider(self) -> IStorageProvider: if self._storage_provider is None: # Import here to avoid circular import from ..storage.memory import MemoryStorageProvider + self._storage_provider = MemoryStorageProvider( message_text_settings=self.message_text_index_settings, related_terms_settings=self.related_term_index_settings, diff --git a/typeagent/knowpro/dataclasses.py b/typeagent/knowpro/dataclasses.py new file mode 100644 index 0000000..9a86ddd --- /dev/null +++ b/typeagent/knowpro/dataclasses.py @@ -0,0 +1,37 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Compatibility helpers for pydantic dataclasses.""" + +from collections.abc import Callable +from typing import Any, TypeVar, cast, overload + +from typing_extensions import dataclass_transform + +from pydantic.dataclasses import dataclass as _pydantic_dataclass + +from .field_helpers import CamelCaseField + +T = TypeVar("T") + + +@overload +def dataclass(__cls: type[T], /, **kwargs: Any) -> type[T]: ... + + +@overload +def dataclass(**kwargs: Any) -> Callable[[type[T]], type[T]]: ... + + +@dataclass_transform(field_specifiers=(CamelCaseField,)) +def dataclass( + __cls: type[T] | None = None, /, **kwargs: Any +) -> Callable[[type[T]], type[T]] | type[T]: + """Wrapper that preserves pydantic behavior while informing type-checkers.""" + + def wrap(cls: type[T]) -> type[T]: + return cast(type[T], _pydantic_dataclass(cls, **kwargs)) + + if __cls is None: + return wrap + + return wrap(__cls) diff --git a/typeagent/knowpro/date_time_schema.py b/typeagent/knowpro/date_time_schema.py index 7f47e03..3ea1c0f 100644 --- a/typeagent/knowpro/date_time_schema.py +++ b/typeagent/knowpro/date_time_schema.py @@ -3,9 +3,10 @@ from typing import Annotated -from pydantic.dataclasses import dataclass from typing_extensions import Doc +from .dataclasses import dataclass + @dataclass class DateVal: diff --git a/typeagent/knowpro/interfaces.py b/typeagent/knowpro/interfaces.py index ca11bf6..885b75c 100644 --- a/typeagent/knowpro/interfaces.py +++ b/typeagent/knowpro/interfaces.py @@ -16,12 +16,12 @@ ) import typechat -from pydantic.dataclasses import dataclass + +from .dataclasses import dataclass from ..aitools.embeddings import NormalizedEmbeddings from . import kplib, serialization from .field_helpers import CamelCaseField -from .types import ConversationDataWithIndexes, SearchTermGroupTypes class IKnowledgeSource(Protocol): diff --git a/typeagent/knowpro/kplib.py b/typeagent/knowpro/kplib.py index 6804c45..d390717 100644 --- a/typeagent/knowpro/kplib.py +++ b/typeagent/knowpro/kplib.py @@ -9,9 +9,10 @@ from typing import Annotated, ClassVar, Literal -from pydantic.dataclasses import dataclass from typing_extensions import Doc +from .dataclasses import dataclass + from .field_helpers import CamelCaseField diff --git a/typeagent/knowpro/search.py b/typeagent/knowpro/search.py index a3413e1..655c1b1 100644 --- a/typeagent/knowpro/search.py +++ b/typeagent/knowpro/search.py @@ -4,7 +4,8 @@ from collections.abc import Callable from typing import TypeGuard, cast -from pydantic.dataclasses import dataclass + +from .dataclasses import dataclass from ..storage.memory.messageindex import IMessageTextEmbeddingIndex from ..storage.memory.reltermsindex import resolve_related_terms diff --git a/typeagent/knowpro/search_query_schema.py b/typeagent/knowpro/search_query_schema.py index 4b3af60..2ed9746 100644 --- a/typeagent/knowpro/search_query_schema.py +++ b/typeagent/knowpro/search_query_schema.py @@ -5,9 +5,10 @@ from typing import Annotated, Literal -from pydantic.dataclasses import dataclass from typing_extensions import Doc +from .dataclasses import dataclass + from .date_time_schema import DateTimeRange from .field_helpers import CamelCaseField diff --git a/typeagent/knowpro/searchlib.py b/typeagent/knowpro/searchlib.py index 0d6d590..eeaa87f 100644 --- a/typeagent/knowpro/searchlib.py +++ b/typeagent/knowpro/searchlib.py @@ -12,7 +12,7 @@ def pydantic_dataclass_to_dict(obj: Any) -> Any: """Recursively convert dataclass instances (including pydantic dataclasses) to dictionaries.""" - if dataclasses.is_dataclass(obj): + if dataclasses.is_dataclass(obj) and not isinstance(obj, type): # dataclasses.asdict already recurses into nested dataclasses/lists data = dataclasses.asdict(obj) if data: @@ -29,6 +29,7 @@ def pydantic_dataclass_to_dict(obj: Any) -> Any: return {key: pydantic_dataclass_to_dict(value) for key, value in obj.items()} return obj + from ..storage.memory.propindex import PropertyNames from .interfaces import ( ISemanticRefCollection, diff --git a/typeagent/knowpro/serialization.py b/typeagent/knowpro/serialization.py index 2a377f1..569b861 100644 --- a/typeagent/knowpro/serialization.py +++ b/typeagent/knowpro/serialization.py @@ -321,7 +321,11 @@ def deserialize_object(typ: Any, obj: Any) -> Any: raise DeserializationError( f"Pydantic validation failed for {typ.__name__}: {e}" ) from e - elif isinstance(typ, type) and hasattr(typ, "__annotations__") and issubclass(typ, dict): + elif ( + isinstance(typ, type) + and hasattr(typ, "__annotations__") + and issubclass(typ, dict) + ): # Handle TypedDict types (Tag, Topic) if not isinstance(obj, dict): raise DeserializationError(f"Expected dict for {typ}, got {type(obj)}") diff --git a/typeagent/knowpro/types.py b/typeagent/knowpro/types.py index 855f774..4506722 100644 --- a/typeagent/knowpro/types.py +++ b/typeagent/knowpro/types.py @@ -1,29 +1,33 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -""" -Shared type definitions for knowpro modules to avoid circular imports. -""" +"""Shared type helpers used to break circular imports in knowpro.""" -from typing import Any, Generic, Literal, TypedDict, TypeVar +from typing import Any, Generic, NotRequired, TypedDict, TypeVar -# --- Shared TypedDicts and type aliases --- TMessageData = TypeVar("TMessageData") + class ConversationDataWithIndexes(TypedDict, Generic[TMessageData]): + """Serializable conversation payload with index metadata.""" + + nameTag: str messages: list[TMessageData] - relatedTermsIndexData: dict[str, Any] | None - messageIndexData: dict[str, Any] | None - # Add other fields as needed + tags: list[str] + semanticRefs: list[Any] | None + semanticIndexData: NotRequired[Any] + relatedTermsIndexData: NotRequired[Any] + threadData: NotRequired[Any] + messageIndexData: NotRequired[Any] -SearchTermGroupTypes = Any -class Tag(TypedDict): - knowledge_type: Literal["tag"] - text: str +# When importing from modules that cannot depend on knowpro.interfaces, +# fall back to ``Any`` to avoid circular references while keeping type checkers +# satisfied. +SearchTermGroupTypes = Any -class Topic(TypedDict): - knowledge_type: Literal["topic"] - text: str -# Add any other shared types here as needed +__all__ = [ + "ConversationDataWithIndexes", + "SearchTermGroupTypes", +] diff --git a/typeagent/knowpro/universal_message.py b/typeagent/knowpro/universal_message.py index a2eb142..f7a30bd 100644 --- a/typeagent/knowpro/universal_message.py +++ b/typeagent/knowpro/universal_message.py @@ -7,7 +7,8 @@ from typing import TypedDict from pydantic import Field -from pydantic.dataclasses import dataclass as pydantic_dataclass + +from .dataclasses import dataclass as pydantic_dataclass from . import kplib from .field_helpers import CamelCaseField diff --git a/typeagent/storage/__init__.py b/typeagent/storage/__init__.py index 77eda8b..df45b13 100644 --- a/typeagent/storage/__init__.py +++ b/typeagent/storage/__init__.py @@ -5,14 +5,14 @@ # Import from new organized structure from .memory import ( - MemoryMessageCollection, - MemorySemanticRefCollection, - MemoryStorageProvider, + MemoryMessageCollection, + MemorySemanticRefCollection, + MemoryStorageProvider, ) from .sqlite import ( - SqliteMessageCollection, - SqliteSemanticRefCollection, - SqliteStorageProvider, + SqliteMessageCollection, + SqliteSemanticRefCollection, + SqliteStorageProvider, ) __all__ = [