Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cli/src/mcp_tef_cli/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from mcp_tef_cli.models import (
DifferentiationRecommendationResponse,
HealthResponse,
MCPServerConfig,
ModelSettingsCreate,
OverlapMatrixResponse,
PaginatedTestCaseResponse,
Expand Down Expand Up @@ -174,7 +175,7 @@ async def create_test_case(
self,
name: str,
query: str,
available_mcp_servers: list[str],
available_mcp_servers: list[MCPServerConfig],
expected_mcp_server_url: str | None = None,
expected_tool_name: str | None = None,
expected_parameters: dict | None = None,
Expand All @@ -184,7 +185,7 @@ async def create_test_case(
Args:
name: Descriptive name for the test case
query: User query to evaluate
available_mcp_servers: List of MCP server URLs available for selection
available_mcp_servers: List of MCPServerConfig objects
expected_mcp_server_url: Expected MCP server URL (null for negative tests)
expected_tool_name: Expected tool name (null for negative tests)
expected_parameters: Expected parameters as dict
Expand Down
87 changes: 77 additions & 10 deletions cli/src/mcp_tef_cli/commands/test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
EXIT_INVALID_ARGUMENTS,
EXIT_SUCCESS,
)
from mcp_tef_cli.models import PaginatedTestCaseResponse, TestCaseCreate, TestCaseResponse
from mcp_tef_cli.models import (
MCPServerConfig,
PaginatedTestCaseResponse,
TestCaseCreate,
TestCaseResponse,
)
from mcp_tef_cli.output import print_error, print_success
from mcp_tef_cli.utils import handle_api_errors, resolve_tef_url

Expand Down Expand Up @@ -168,7 +173,7 @@ def format_test_case_table(tc: TestCaseResponse, title: str = "Test Case") -> No
console.print(f"Expected Params: {json.dumps(tc.expected_parameters)}")
console.print("Available Servers:")
for server in tc.available_mcp_servers:
console.print(f" - {server}")
console.print(f" - {server.url} ({server.transport})")
console.print(f"Created: {tc.created_at}")
console.print(f"Updated: {tc.updated_at}")

Expand Down Expand Up @@ -275,6 +280,52 @@ def parse_set_option(values: tuple[str, ...]) -> dict[str, str]:
return result


def parse_server_spec(server_spec: str) -> MCPServerConfig:
"""Parse a server specification into MCPServerConfig.

Supports two formats:
- URL only: "http://localhost:3000/sse" (uses default transport)
- URL with transport: "http://localhost:3000/sse:sse"

Args:
server_spec: Server specification string

Returns:
MCPServerConfig object

Raises:
click.BadParameter: If format is invalid or transport is not recognized
"""
if ":" in server_spec and not server_spec.startswith(("http://", "https://")):
raise click.BadParameter(
f"Invalid server format: '{server_spec}'. "
"Expected 'URL' or 'URL:transport' (e.g., 'http://localhost:3000:sse')"
)

# Check if transport is specified (format: url:transport)
# Need to be careful not to split on the ':' in 'http://'
parts = server_spec.rsplit(":", 1)

if len(parts) == 2:
url, transport = parts
# Check if this is actually a transport spec or just part of the URL
# Valid transports are 'sse' or 'streamable-http'
if transport in ("sse", "streamable-http"):
# This is a transport specification
if not url.startswith(("http://", "https://")):
raise click.BadParameter(
f"Invalid URL in server spec: '{url}'. Must start with http:// or https://"
)
return MCPServerConfig(url=url, transport=transport)

# No valid transport found, treat entire string as URL with default transport
if not server_spec.startswith(("http://", "https://")):
raise click.BadParameter(
f"Invalid server URL: '{server_spec}'. Must start with http:// or https://"
)
return MCPServerConfig(url=server_spec)


@test_case.command(name="create")
@click.option("--name", default=None, help="Descriptive name for the test case")
@click.option("--query", default=None, help="User query to evaluate")
Expand All @@ -296,7 +347,12 @@ def parse_set_option(values: tuple[str, ...]) -> dict[str, str]:
@click.option(
"--servers",
default=None,
help="Comma-separated MCP server URLs available for selection",
help=(
"Comma-separated MCP server specifications. "
"Format: 'URL' or 'URL:transport'. "
"Transport defaults to 'streamable-http' if not specified. "
"Examples: 'http://localhost:3000/sse:sse' or 'http://localhost:3001'"
),
)
@click.option(
"--from-file",
Expand Down Expand Up @@ -367,7 +423,10 @@ def create(
"name": "Test case name", // required
"query": "User query to evaluate", // required
"available_mcp_servers": [ // required, non-empty
"${MCP_SERVER_URL}" // supports variable substitution
{
"url": "${MCP_SERVER_URL}", // required, supports variable substitution
"transport": "streamable-http" // optional, defaults to "streamable-http"
}
],
"expected_mcp_server_url": "...", // optional (null for negative tests)
"expected_tool_name": "tool_name", // optional (must pair with server)
Expand All @@ -383,20 +442,27 @@ def create(
Examples:

\b
# Create test case with expected tool
# Create test case with expected tool (SSE transport)
mtef test-case create \\
--name "Weather test" \\
--query "What is the weather in San Francisco?" \\
--expected-server "http://localhost:3000/sse" \\
--expected-tool "get_weather" \\
--servers "http://localhost:3000/sse"
--servers "http://localhost:3000/sse:sse"

\b
# Create test case with multiple servers (mixed transports)
mtef test-case create \\
--name "Multi-server test" \\
--query "Get my calendar events" \\
--servers "http://localhost:3000:sse,http://localhost:3001"

\b
# Create negative test case (no tool should be selected)
mtef test-case create \\
--name "No tool needed" \\
--query "What is 2 + 2?" \\
--servers "http://localhost:3000/sse"
--servers "http://localhost:3000/sse:sse"

\b
# Create from JSON file (single or multiple test cases)
Expand Down Expand Up @@ -447,8 +513,9 @@ def create(
print_error("--servers is required (or use --from-file)")
raise SystemExit(EXIT_INVALID_ARGUMENTS)

# Parse servers from comma-separated string
server_urls = [url.strip() for url in servers.split(",") if url.strip()]
# Parse servers from comma-separated string and convert to MCPServerConfig
server_specs = [spec.strip() for spec in servers.split(",") if spec.strip()]
server_configs = [parse_server_spec(spec) for spec in server_specs]

# Parse expected parameters JSON
try:
Expand All @@ -463,7 +530,7 @@ def create(
TestCaseCreate(
name=name,
query=query,
available_mcp_servers=server_urls,
available_mcp_servers=server_configs,
expected_mcp_server_url=expected_server,
expected_tool_name=expected_tool,
expected_parameters=expected_parameters,
Expand Down
40 changes: 29 additions & 11 deletions cli/src/mcp_tef_cli/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"ToolQualityResult",
"ToolQualityResponse",
# Test case models
"MCPServerConfig",
"TestCaseCreate",
"TestCaseResponse",
"PaginatedTestCaseResponse",
Expand Down Expand Up @@ -102,6 +103,22 @@ class ToolQualityResponse(BaseModel):
# =============================================================================


class MCPServerConfig(BaseModel):
"""MCP server configuration with transport type."""

url: str = Field(
...,
min_length=1,
pattern=r"^https?://",
description="Server URL (must be http or https)",
)
transport: str = Field(
default="streamable-http",
pattern=r"^(sse|streamable-http)$",
description="Transport type: 'sse' or 'streamable-http'",
)


class ToolDefinition(BaseModel):
"""Definition of a tool available from an MCP server."""

Expand All @@ -123,8 +140,8 @@ class TestCaseCreate(BaseModel):
expected_parameters: dict | None = Field(
default=None, description="Expected parameters as JSON object"
)
available_mcp_servers: list[str] = Field(
..., description="MCP server URLs available for selection", min_length=1
available_mcp_servers: list[MCPServerConfig] = Field(
..., description="MCP server configurations available for selection", min_length=1
)

@model_validator(mode="after")
Expand All @@ -138,14 +155,13 @@ def validate_expected_tool_fields(self) -> "TestCaseCreate":
)

# expected_server must be in available_mcp_servers
if (
self.expected_mcp_server_url
and self.expected_mcp_server_url not in self.available_mcp_servers
):
raise ValueError(
f"expected_mcp_server_url '{self.expected_mcp_server_url}' "
"must be in available_mcp_servers"
)
if self.expected_mcp_server_url:
available_urls = [server.url for server in self.available_mcp_servers]
if self.expected_mcp_server_url not in available_urls:
raise ValueError(
f"expected_mcp_server_url '{self.expected_mcp_server_url}' "
"must be in available_mcp_servers"
)

# expected_parameters requires expected_tool_name
if self.expected_parameters and not self.expected_tool_name:
Expand All @@ -163,7 +179,9 @@ class TestCaseResponse(BaseModel):
expected_mcp_server_url: str | None = Field(default=None, description="Expected MCP server URL")
expected_tool_name: str | None = Field(default=None, description="Expected tool name")
expected_parameters: dict | None = Field(default=None, description="Expected parameters")
available_mcp_servers: list[str] = Field(..., description="Available MCP servers")
available_mcp_servers: list[MCPServerConfig] = Field(
..., description="Available MCP server configurations"
)
available_tools: dict[str, list[ToolDefinition]] | None = Field(
default=None, description="Available tools by server URL"
)
Expand Down
17 changes: 12 additions & 5 deletions src/mcp_tef/api/mcp_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ def get_mcp_loader_service(request: Request) -> MCPLoaderService:
description="List all tools from a specific MCP server",
)
async def get_mcp_server_tools(
server_url: str | None = Query(default=None, description="MCP server url to get tools from"),
server_url: str = Query(description="MCP server url to get tools from"),
transport: str = Query(
default="streamable-http", description="Transport protocol: 'sse' or 'streamable-http'"
),
offset: int = 0,
limit: int = 100,
mcp_loader_service: MCPLoaderService = Depends(get_mcp_loader_service),
Expand All @@ -53,6 +56,7 @@ async def get_mcp_server_tools(

Args:
server_url: MCP server url
transport: Transport protocol ('sse' or 'streamable-http')
offset: Number of tools to skip (default: 0)
limit: Maximum tools to return (default: 100)

Expand All @@ -62,9 +66,7 @@ async def get_mcp_server_tools(
Raises:
ResourceNotFoundError: If server not found (404 status)
"""
if not server_url:
raise BadRequestError("server_url required")
tools = await mcp_loader_service.load_tools_from_url_typed(server_url)
tools = await mcp_loader_service.load_tools_from_server(server_url, transport)
if offset >= len(tools):
return MCPServerToolsResponse(tools=[], count=0)
tools = tools[offset : min(offset + limit, len(tools))]
Expand All @@ -82,6 +84,9 @@ async def get_mcp_server_tools(
)
async def get_mcp_server_tool_quality_by_url(
server_urls: str = Query(description="MCP server url to get tools from"),
transport: str = Query(
default="streamable-http", description="Transport protocol: 'sse' or 'streamable-http'"
),
model_provider: str = Query(description="Provider for quality evaluation model"),
model_name: str = Query(description="Quality evaluation model"),
mcp_loader_service: MCPLoaderService = Depends(get_mcp_loader_service),
Expand All @@ -95,6 +100,7 @@ async def get_mcp_server_tool_quality_by_url(
tasks = [
_get_mcp_server_tool_quality_inner(
server_url=url,
transport=transport,
model_provider=model_provider,
model_name=model_name,
mcp_loader_service=mcp_loader_service,
Expand All @@ -117,6 +123,7 @@ async def get_mcp_server_tool_quality_by_url(

async def _get_mcp_server_tool_quality_inner(
server_url: str,
transport: str,
model_provider: str,
model_name: str,
mcp_loader_service: MCPLoaderService,
Expand All @@ -134,5 +141,5 @@ async def _get_mcp_server_tool_quality_inner(
mcp_loader_service=mcp_loader_service,
llm_service=llm_service,
)
quality_results = await tool_quality_service.evaluate_server(server_url)
quality_results = await tool_quality_service.evaluate_server(server_url, transport)
return ToolQualityResponse(results=quality_results)
8 changes: 4 additions & 4 deletions src/mcp_tef/api/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ async def analyze_similarity(

# Extract and normalize tools
tools = await similarity_service.extract_and_normalize_tools(
url_list=request.mcp_server_urls,
server_configs=request.mcp_servers,
tool_names=request.tool_names,
)

Expand Down Expand Up @@ -314,7 +314,7 @@ async def generate_similarity_matrix(

# Extract and normalize tools
tools = await similarity_service.extract_and_normalize_tools(
url_list=request.mcp_server_urls,
server_configs=request.mcp_servers,
tool_names=request.tool_names,
)

Expand Down Expand Up @@ -366,7 +366,7 @@ async def generate_overlap_matrix(
# Extract and normalize tools

tools = await similarity_service.extract_and_normalize_tools(
url_list=request.mcp_server_urls,
server_configs=request.mcp_servers,
)

logger.info(f"Generating overlap matrix for {len(tools)} tools")
Expand Down Expand Up @@ -419,7 +419,7 @@ async def get_recommendations(

# Extract and normalize tools
tools = await similarity_service.extract_and_normalize_tools(
url_list=request.mcp_server_urls,
server_configs=request.mcp_servers,
tool_names=request.tool_names,
)

Expand Down
7 changes: 4 additions & 3 deletions src/mcp_tef/api/test_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from mcp_tef.config.settings import Settings, get_settings
from mcp_tef.models.schemas import (
MCPServerConfig,
PaginatedTestCaseResponse,
TestCaseCreate,
TestCaseResponse,
Expand Down Expand Up @@ -199,7 +200,7 @@ async def get_test_case(


async def _gather_tools_for_servers(
server_urls: list[str], mcp_loader: MCPLoaderService
mcp_servers: list[MCPServerConfig], mcp_loader: MCPLoaderService
) -> dict[str, list[ToolDefinition]]:
"""
Gather tools from MCP servers.
Expand All @@ -208,10 +209,10 @@ async def _gather_tools_for_servers(
dict of server_url -> tools
"""
gather_tools_tasks = [
mcp_loader.load_tools_from_url_typed(server_url) for server_url in server_urls
mcp_loader.load_tools_from_server(server.url, server.transport) for server in mcp_servers
]
gather_tools_results = await asyncio.gather(*gather_tools_tasks)
return dict(zip(server_urls, gather_tools_results, strict=False))
return dict(zip([server.url for server in mcp_servers], gather_tools_results, strict=False))


@router.delete(
Expand Down
Loading
Loading