Skip to content

Commit 574ac18

Browse files
feat: VectorDB migrations script (#5172)
## Summary Describe key changes, mention related issues or motivation for the changes. (If applicable, issue number: #\_\_\_\_) ## Type of change - [ ] Bug fix - [ ] New feature - [ ] Breaking change - [ ] Improvement - [ ] Model update - [ ] Other: --- ## Checklist - [ ] Code complies with style guidelines - [ ] Ran format/validation scripts (`./scripts/format.sh` and `./scripts/validate.sh`) - [ ] Self-review completed - [ ] Documentation updated (comments, docstrings) - [ ] Examples and guides: Relevant cookbook examples have been included or updated (if applicable) - [ ] Tested in clean environment - [ ] Tests added/updated (if applicable) --- ## Additional Notes Add any important context (deployment instructions, screenshots, security considerations, etc.) --------- Co-authored-by: manu <[email protected]>
1 parent 7ac7097 commit 574ac18

File tree

1 file changed

+203
-0
lines changed

1 file changed

+203
-0
lines changed
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
# mypy: disable-error-code=var-annotated
2+
"""Use this script to migrate your Agno VectorDBs from v1 to v2
3+
4+
This script works with PgVector and SingleStore.
5+
6+
This script will update the provided tables to add the two new columns introduced in v2:
7+
- content_hash: String column for content hash tracking
8+
- content_id: String column for content ID tracking
9+
10+
To use the script simply:
11+
- For PGVector, set the `pg_vector_db_url` and `pg_vector_config` variables
12+
- For SingleStore, set the `singlestore_db_url` and `singlestore_config` variables
13+
- Run the script
14+
"""
15+
16+
from agno.utils.log import log_error, log_info, log_warning
17+
18+
# ------------ Setup for PGVector ------------
19+
20+
## Your database connection string
21+
pg_vector_db_url = "" # Example: "postgresql+psycopg://ai:ai@localhost:5532/ai"
22+
23+
## Configuration of the schema and tables to migrate
24+
pg_vector_config = {
25+
# "schema": "ai", # Schema where your tables are located
26+
# "table_names": ["documents"], # Tables to migrate
27+
}
28+
# -----------------------------------------
29+
30+
# ------------ Setup for SingleStore ------------
31+
32+
# Your database connection string
33+
singlestore_db_url = "" # Example: "mysql+pymysql://user:password@host:port/database"
34+
35+
# Exact configuration of the tables to migrate
36+
singlestore_config = {
37+
# "schema": "ai", # Schema where your tables are located
38+
# "table_names": ["documents"], # Tables to migrate
39+
}
40+
# -----------------------------------------
41+
42+
# Migration batch size (adjust based on available memory and table size)
43+
migration_batch_size = 5000
44+
45+
# Exit if no configurations are provided
46+
if not (pg_vector_db_url and pg_vector_config) and not (singlestore_db_url and singlestore_config):
47+
log_error(
48+
"To run the migration, you need to set the `pg_vector_db_url` and `pg_vector_config` variables for PGVector, or `singlestore_db_url` and `singlestore_config` for SingleStore."
49+
)
50+
exit()
51+
52+
53+
def migrate_pgvector_table(table_name: str, schema: str = "ai") -> None:
54+
"""
55+
Migrate a single PgVector table to v2 by adding content_hash and content_id columns.
56+
57+
Args:
58+
table_name: Name of the table to migrate
59+
schema: Database schema name
60+
"""
61+
try:
62+
log_info(f"Starting migration for PgVector table: {schema}.{table_name}")
63+
64+
# Create PgVector instance to get database connection
65+
from agno.vectordb.pgvector.pgvector import PgVector
66+
67+
pgvector = PgVector(
68+
table_name=table_name,
69+
schema=schema,
70+
db_url=pg_vector_db_url,
71+
schema_version=1, # Use v1 schema for compatibility
72+
)
73+
74+
# Check if table exists
75+
if not pgvector.table_exists():
76+
log_warning(f"Table {schema}.{table_name} not found. Skipping migration.")
77+
return
78+
79+
# Check if the new columns already exist
80+
from sqlalchemy import inspect, text
81+
from sqlalchemy.exc import SQLAlchemyError
82+
83+
inspector = inspect(pgvector.db_engine)
84+
columns = inspector.get_columns(table_name, schema=schema)
85+
column_names = [col["name"] for col in columns]
86+
87+
content_hash_exists = "content_hash" in column_names
88+
content_id_exists = "content_id" in column_names
89+
90+
if content_hash_exists and content_id_exists:
91+
log_info(f"Table {schema}.{table_name} already has the v2 columns. No migration needed.")
92+
return
93+
94+
# Add missing columns
95+
with pgvector.Session() as sess, sess.begin():
96+
if not content_hash_exists:
97+
log_info(f"Adding content_hash column to {schema}.{table_name}")
98+
sess.execute(text(f'ALTER TABLE "{schema}"."{table_name}" ADD COLUMN content_hash VARCHAR;'))
99+
100+
if not content_id_exists:
101+
log_info(f"Adding content_id column to {schema}.{table_name}")
102+
sess.execute(text(f'ALTER TABLE "{schema}"."{table_name}" ADD COLUMN content_id VARCHAR;'))
103+
104+
# Add indexes for the new columns
105+
with pgvector.Session() as sess, sess.begin():
106+
if not content_hash_exists:
107+
index_name = f"idx_{table_name}_content_hash"
108+
log_info(f"Creating index {index_name} on content_hash column")
109+
try:
110+
sess.execute(
111+
text(f'CREATE INDEX IF NOT EXISTS "{index_name}" ON "{schema}"."{table_name}" (content_hash);')
112+
)
113+
except SQLAlchemyError as e:
114+
log_warning(f"Could not create index {index_name}: {e}")
115+
116+
if not content_id_exists:
117+
index_name = f"idx_{table_name}_content_id"
118+
log_info(f"Creating index {index_name} on content_id column")
119+
try:
120+
sess.execute(
121+
text(f'CREATE INDEX IF NOT EXISTS "{index_name}" ON "{schema}"."{table_name}" (content_id);')
122+
)
123+
except SQLAlchemyError as e:
124+
log_warning(f"Could not create index {index_name}: {e}")
125+
126+
log_info(f"Successfully migrated PgVector table {schema}.{table_name} to v2")
127+
128+
except Exception as e:
129+
log_error(f"Error migrating PgVector table {schema}.{table_name}: {e}")
130+
raise
131+
132+
133+
def migrate_singlestore_table(table_name: str, schema: str = "ai") -> None:
134+
"""
135+
Migrate a single SingleStore table to v2 by adding content_hash and content_id columns.
136+
137+
Args:
138+
table_name: Name of the table to migrate
139+
schema: Database schema name
140+
"""
141+
try:
142+
log_info(f"Starting migration for SingleStore table: {schema}.{table_name}")
143+
144+
from agno.vectordb.singlestore.singlestore import SingleStore
145+
146+
singlestore = SingleStore(
147+
collection=table_name,
148+
schema=schema,
149+
db_url=singlestore_db_url,
150+
)
151+
152+
# Check if table exists
153+
if not singlestore.table_exists():
154+
log_warning(f"Table {schema}.{table_name} not found. Skipping migration.")
155+
return
156+
157+
# Check if the new columns already exist
158+
from sqlalchemy import inspect, text
159+
160+
inspector = inspect(singlestore.db_engine)
161+
columns = inspector.get_columns(table_name, schema=schema)
162+
column_names = [col["name"] for col in columns]
163+
164+
content_hash_exists = "content_hash" in column_names
165+
content_id_exists = "content_id" in column_names
166+
167+
if content_hash_exists and content_id_exists:
168+
log_info(f"Table {schema}.{table_name} already has the v2 columns. No migration needed.")
169+
return
170+
171+
# Add missing columns
172+
with singlestore.Session() as sess, sess.begin():
173+
if not content_hash_exists:
174+
log_info(f"Adding content_hash column to {schema}.{table_name}")
175+
sess.execute(text(f"ALTER TABLE `{schema}`.`{table_name}` ADD COLUMN content_hash TEXT;"))
176+
177+
if not content_id_exists:
178+
log_info(f"Adding content_id column to {schema}.{table_name}")
179+
sess.execute(text(f"ALTER TABLE `{schema}`.`{table_name}` ADD COLUMN content_id TEXT;"))
180+
181+
log_info(f"Successfully migrated SingleStore table {schema}.{table_name} to v2")
182+
183+
except Exception as e:
184+
log_error(f"Error migrating SingleStore table {schema}.{table_name}: {e}")
185+
raise
186+
187+
188+
# Run the migrations
189+
try:
190+
# PGVector migration
191+
if pg_vector_config:
192+
for table_name in pg_vector_config["table_names"]:
193+
migrate_pgvector_table(table_name, pg_vector_config["schema"]) # type: ignore
194+
195+
# SingleStore migration
196+
if singlestore_config:
197+
for table_name in singlestore_config["table_names"]:
198+
migrate_singlestore_table(table_name, singlestore_config["schema"]) # type: ignore
199+
200+
except Exception as e:
201+
log_error(f"Error during migration: {e}")
202+
203+
log_info("VectorDB migration completed.")

0 commit comments

Comments
 (0)