Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions marimo/_ai/_tools/tools/datasource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Copyright 2025 Marimo. All rights reserved.

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Optional

from marimo import _loggers
from marimo._ai._tools.base import ToolBase
from marimo._ai._tools.types import SuccessResult
from marimo._ai._tools.utils.exceptions import ToolExecutionError
from marimo._data.models import DataTable
from marimo._server.sessions import Session
from marimo._types.ids import SessionId
from marimo._utils.fuzzy_match import compile_regex, is_fuzzy_match

LOGGER = _loggers.marimo_logger()


@dataclass
class GetDatabaseTablesArgs:
session_id: SessionId
query: Optional[str] = None


@dataclass
class TableDetails:
connection: str
database: str
schema: str
table: DataTable


@dataclass
class GetDatabaseTablesOutput(SuccessResult):
tables: list[TableDetails] = field(default_factory=list)
Copy link
Contributor

@mscolnick mscolnick Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i wonder if it would help to add an example sql: e.g. _df = mo.sql("SELECT * FROM database.schema.table LIMIT 100" in the response



class GetDatabaseTables(
ToolBase[GetDatabaseTablesArgs, GetDatabaseTablesOutput]
):
"""
Get information about tables in a database.

Args:
session_id: The session id.
query (optional): The query to match the database, schemas, and tables. Regex is supported.

If a query is provided, it will fuzzy match the query to the database, schemas, and tables available. If no query is provided, all tables are returned. Don't provide a query if you need to see the entire schema view.

The tables returned contain information about the database, schema and connection name to use in forming SQL queries.
"""

def handle(self, args: GetDatabaseTablesArgs) -> GetDatabaseTablesOutput:
session_id = args.session_id
session = self.context.get_session(session_id)

return self._get_tables(session, args.query)

def _get_tables(
self, session: Session, query: Optional[str]
) -> GetDatabaseTablesOutput:
session_view = session.session_view
data_connectors = session_view.data_connectors

if len(data_connectors.connections) == 0:
raise ToolExecutionError(
message="No databases found. Please create a connection first.",
code="NO_DATABASES_FOUND",
is_retryable=False,
)

tables: list[TableDetails] = []

# Pre-compile regex if query exists
compiled_pattern = None
is_regex = False
if query:
compiled_pattern, is_regex = compile_regex(query)

for connection in data_connectors.connections:
for database in connection.databases:
for schema in database.schemas:
# If query is None, match all schemas
if query is None or is_fuzzy_match(
query, schema.name, compiled_pattern, is_regex
):
for table in schema.tables:
tables.append(
TableDetails(
connection=connection.name,
database=database.name,
schema=schema.name,
table=table,
)
)
continue
for table in schema.tables:
if is_fuzzy_match(
query, table.name, compiled_pattern, is_regex
):
tables.append(
TableDetails(
connection=connection.name,
database=database.name,
schema=schema.name,
table=table,
)
)

return GetDatabaseTablesOutput(tables=tables)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to prevent duplicates?

2 changes: 2 additions & 0 deletions marimo/_ai/_tools/tools_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
GetCellRuntimeData,
GetLightweightCellMap,
)
from marimo._ai._tools.tools.datasource import GetDatabaseTables
from marimo._ai._tools.tools.notebooks import GetActiveNotebooks
from marimo._ai._tools.tools.tables_and_variables import GetTablesAndVariables

Expand All @@ -14,4 +15,5 @@
GetCellRuntimeData,
GetLightweightCellMap,
GetTablesAndVariables,
GetDatabaseTables,
]
36 changes: 36 additions & 0 deletions marimo/_utils/fuzzy_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2025 Marimo. All rights reserved.

from __future__ import annotations

import re


def compile_regex(query: str) -> tuple[re.Pattern[str] | None, bool]:
"""
Returns compiled regex pattern and whether the query is a valid regex.
"""
try:
return re.compile(query, re.IGNORECASE), True
except re.error:
return None, False


def is_fuzzy_match(
query: str,
name: str,
compiled_pattern: re.Pattern[str] | None,
is_regex: bool,
) -> bool:
"""
Fuzzy match using pre-compiled regex. If is not regex, fallback to substring match.

Args:
query: The query to match.
name: The name to match against.
compiled_pattern: Pre-compiled regex pattern (None if not regex).
is_regex: Whether the query is a valid regex.
"""
if is_regex and compiled_pattern:
return bool(compiled_pattern.search(name))
else:
return query.lower() in name.lower()
Loading
Loading