Skip to content

Commit e985313

Browse files
authored
add datasource tool (#6422)
## 📝 Summary <!-- Provide a concise summary of what this pull request is addressing. If this PR fixes any issues, list them here by number (e.g., Fixes #123). --> Fetches tables (with schema, database and connection fields) in a database. Supports regex queries. <img width="550" height="628" alt="CleanShot 2025-09-19 at 01 22 12" src="https://github.com/user-attachments/assets/8b3a678a-2ded-48e0-a36e-3a63bd38aa9e" /> <img width="549" height="492" alt="CleanShot 2025-09-19 at 01 24 20" src="https://github.com/user-attachments/assets/eeb456bd-383c-4dc5-8ae4-4f2e86fd6686" /> ## 🔍 Description of Changes <!-- Detail the specific changes made in this pull request. Explain the problem addressed and how it was resolved. If applicable, provide before and after comparisons, screenshots, or any relevant details to help reviewers understand the changes easily. --> ## 📋 Checklist - [x] I have read the [contributor guidelines](https://github.com/marimo-team/marimo/blob/main/CONTRIBUTING.md). - [ ] For large changes, or changes that affect the public API: this change was discussed or approved through an issue, on [Discord](https://marimo.io/discord?ref=pr), or the community [discussions](https://github.com/marimo-team/marimo/discussions) (Please provide a link if applicable). - [x] I have added tests for the changes made. - [x] I have run the code and verified that it works as expected.
1 parent 299fc9a commit e985313

File tree

5 files changed

+763
-0
lines changed

5 files changed

+763
-0
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# Copyright 2025 Marimo. All rights reserved.
2+
3+
from __future__ import annotations
4+
5+
from dataclasses import dataclass, field
6+
from typing import Optional
7+
8+
from marimo import _loggers
9+
from marimo._ai._tools.base import ToolBase
10+
from marimo._ai._tools.types import SuccessResult
11+
from marimo._ai._tools.utils.exceptions import ToolExecutionError
12+
from marimo._data.models import DataTable
13+
from marimo._server.sessions import Session
14+
from marimo._types.ids import SessionId
15+
from marimo._utils.fuzzy_match import compile_regex, is_fuzzy_match
16+
17+
LOGGER = _loggers.marimo_logger()
18+
19+
20+
@dataclass
21+
class GetDatabaseTablesArgs:
22+
session_id: SessionId
23+
query: Optional[str] = None
24+
25+
26+
@dataclass
27+
class TableDetails:
28+
connection: str
29+
database: str
30+
schema: str
31+
table: DataTable
32+
33+
34+
@dataclass
35+
class GetDatabaseTablesOutput(SuccessResult):
36+
tables: list[TableDetails] = field(default_factory=list)
37+
38+
39+
class GetDatabaseTables(
40+
ToolBase[GetDatabaseTablesArgs, GetDatabaseTablesOutput]
41+
):
42+
"""
43+
Get information about tables in a database.
44+
45+
Args:
46+
session_id: The session id.
47+
query (optional): The query to match the database, schemas, and tables. Regex is supported.
48+
49+
If a query is provided, it will fuzzy match the query to the database, schemas, and tables available. If no query is provided, all tables are returned. Don't provide a query if you need to see the entire schema view.
50+
51+
The tables returned contain information about the database, schema and connection name to use in forming SQL queries.
52+
"""
53+
54+
def handle(self, args: GetDatabaseTablesArgs) -> GetDatabaseTablesOutput:
55+
session_id = args.session_id
56+
session = self.context.get_session(session_id)
57+
58+
return self._get_tables(session, args.query)
59+
60+
def _get_tables(
61+
self, session: Session, query: Optional[str]
62+
) -> GetDatabaseTablesOutput:
63+
session_view = session.session_view
64+
data_connectors = session_view.data_connectors
65+
66+
if len(data_connectors.connections) == 0:
67+
raise ToolExecutionError(
68+
message="No databases found. Please create a connection first.",
69+
code="NO_DATABASES_FOUND",
70+
is_retryable=False,
71+
)
72+
73+
tables: list[TableDetails] = []
74+
75+
# Pre-compile regex if query exists
76+
compiled_pattern = None
77+
is_regex = False
78+
if query:
79+
compiled_pattern, is_regex = compile_regex(query)
80+
81+
for connection in data_connectors.connections:
82+
for database in connection.databases:
83+
for schema in database.schemas:
84+
# If query is None, match all schemas
85+
# If matching, add all tables to the list
86+
if query is None or is_fuzzy_match(
87+
query, schema.name, compiled_pattern, is_regex
88+
):
89+
for table in schema.tables:
90+
tables.append(
91+
TableDetails(
92+
connection=connection.name,
93+
database=database.name,
94+
schema=schema.name,
95+
table=table,
96+
)
97+
)
98+
continue
99+
for table in schema.tables:
100+
if is_fuzzy_match(
101+
query, table.name, compiled_pattern, is_regex
102+
):
103+
tables.append(
104+
TableDetails(
105+
connection=connection.name,
106+
database=database.name,
107+
schema=schema.name,
108+
table=table,
109+
)
110+
)
111+
112+
return GetDatabaseTablesOutput(
113+
tables=tables,
114+
next_steps=[
115+
'Example of an SQL query: _df = mo.sql(f"""SELECT * FROM database.schema.name LIMIT 100""")',
116+
],
117+
)

marimo/_ai/_tools/tools_registry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
GetCellRuntimeData,
77
GetLightweightCellMap,
88
)
9+
from marimo._ai._tools.tools.datasource import GetDatabaseTables
910
from marimo._ai._tools.tools.notebooks import GetActiveNotebooks
1011
from marimo._ai._tools.tools.tables_and_variables import GetTablesAndVariables
1112

@@ -14,4 +15,5 @@
1415
GetCellRuntimeData,
1516
GetLightweightCellMap,
1617
GetTablesAndVariables,
18+
GetDatabaseTables,
1719
]

marimo/_utils/fuzzy_match.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2025 Marimo. All rights reserved.
2+
3+
from __future__ import annotations
4+
5+
import re
6+
7+
8+
def compile_regex(query: str) -> tuple[re.Pattern[str] | None, bool]:
9+
"""
10+
Returns compiled regex pattern and whether the query is a valid regex.
11+
"""
12+
try:
13+
return re.compile(query, re.IGNORECASE), True
14+
except re.error:
15+
return None, False
16+
17+
18+
def is_fuzzy_match(
19+
query: str,
20+
name: str,
21+
compiled_pattern: re.Pattern[str] | None,
22+
is_regex: bool,
23+
) -> bool:
24+
"""
25+
Fuzzy match using pre-compiled regex. If is not regex, fallback to substring match.
26+
27+
Args:
28+
query: The query to match.
29+
name: The name to match against.
30+
compiled_pattern: Pre-compiled regex pattern (None if not regex).
31+
is_regex: Whether the query is a valid regex.
32+
"""
33+
if is_regex and compiled_pattern:
34+
return bool(compiled_pattern.search(name))
35+
else:
36+
return query.lower() in name.lower()

0 commit comments

Comments
 (0)