|
| 1 | +# mypy: disable-error-code=var-annotated |
| 2 | +"""Use this script to migrate your Agno VectorDBs from v1 to v2 |
| 3 | +
|
| 4 | +This script works with PgVector and SingleStore. |
| 5 | +
|
| 6 | +This script will update the provided tables to add the two new columns introduced in v2: |
| 7 | +- content_hash: String column for content hash tracking |
| 8 | +- content_id: String column for content ID tracking |
| 9 | +
|
| 10 | +To use the script simply: |
| 11 | +- For PGVector, set the `pg_vector_db_url` and `pg_vector_config` variables |
| 12 | +- For SingleStore, set the `singlestore_db_url` and `singlestore_config` variables |
| 13 | +- Run the script |
| 14 | +""" |
| 15 | + |
| 16 | +from agno.utils.log import log_error, log_info, log_warning |
| 17 | + |
| 18 | +# ------------ Setup for PGVector ------------ |
| 19 | + |
| 20 | +## Your database connection string |
| 21 | +pg_vector_db_url = "" # Example: "postgresql+psycopg://ai:ai@localhost:5532/ai" |
| 22 | + |
| 23 | +## Configuration of the schema and tables to migrate |
| 24 | +pg_vector_config = { |
| 25 | + # "schema": "ai", # Schema where your tables are located |
| 26 | + # "table_names": ["documents"], # Tables to migrate |
| 27 | +} |
| 28 | +# ----------------------------------------- |
| 29 | + |
| 30 | +# ------------ Setup for SingleStore ------------ |
| 31 | + |
| 32 | +# Your database connection string |
| 33 | +singlestore_db_url = "" # Example: "mysql+pymysql://user:password@host:port/database" |
| 34 | + |
| 35 | +# Exact configuration of the tables to migrate |
| 36 | +singlestore_config = { |
| 37 | + # "schema": "ai", # Schema where your tables are located |
| 38 | + # "table_names": ["documents"], # Tables to migrate |
| 39 | +} |
| 40 | +# ----------------------------------------- |
| 41 | + |
| 42 | +# Migration batch size (adjust based on available memory and table size) |
| 43 | +migration_batch_size = 5000 |
| 44 | + |
| 45 | +# Exit if no configurations are provided |
| 46 | +if not (pg_vector_db_url and pg_vector_config) and not (singlestore_db_url and singlestore_config): |
| 47 | + log_error( |
| 48 | + "To run the migration, you need to set the `pg_vector_db_url` and `pg_vector_config` variables for PGVector, or `singlestore_db_url` and `singlestore_config` for SingleStore." |
| 49 | + ) |
| 50 | + exit() |
| 51 | + |
| 52 | + |
| 53 | +def migrate_pgvector_table(table_name: str, schema: str = "ai") -> None: |
| 54 | + """ |
| 55 | + Migrate a single PgVector table to v2 by adding content_hash and content_id columns. |
| 56 | +
|
| 57 | + Args: |
| 58 | + table_name: Name of the table to migrate |
| 59 | + schema: Database schema name |
| 60 | + """ |
| 61 | + try: |
| 62 | + log_info(f"Starting migration for PgVector table: {schema}.{table_name}") |
| 63 | + |
| 64 | + # Create PgVector instance to get database connection |
| 65 | + from agno.vectordb.pgvector.pgvector import PgVector |
| 66 | + |
| 67 | + pgvector = PgVector( |
| 68 | + table_name=table_name, |
| 69 | + schema=schema, |
| 70 | + db_url=pg_vector_db_url, |
| 71 | + schema_version=1, # Use v1 schema for compatibility |
| 72 | + ) |
| 73 | + |
| 74 | + # Check if table exists |
| 75 | + if not pgvector.table_exists(): |
| 76 | + log_warning(f"Table {schema}.{table_name} not found. Skipping migration.") |
| 77 | + return |
| 78 | + |
| 79 | + # Check if the new columns already exist |
| 80 | + from sqlalchemy import inspect, text |
| 81 | + from sqlalchemy.exc import SQLAlchemyError |
| 82 | + |
| 83 | + inspector = inspect(pgvector.db_engine) |
| 84 | + columns = inspector.get_columns(table_name, schema=schema) |
| 85 | + column_names = [col["name"] for col in columns] |
| 86 | + |
| 87 | + content_hash_exists = "content_hash" in column_names |
| 88 | + content_id_exists = "content_id" in column_names |
| 89 | + |
| 90 | + if content_hash_exists and content_id_exists: |
| 91 | + log_info(f"Table {schema}.{table_name} already has the v2 columns. No migration needed.") |
| 92 | + return |
| 93 | + |
| 94 | + # Add missing columns |
| 95 | + with pgvector.Session() as sess, sess.begin(): |
| 96 | + if not content_hash_exists: |
| 97 | + log_info(f"Adding content_hash column to {schema}.{table_name}") |
| 98 | + sess.execute(text(f'ALTER TABLE "{schema}"."{table_name}" ADD COLUMN content_hash VARCHAR;')) |
| 99 | + |
| 100 | + if not content_id_exists: |
| 101 | + log_info(f"Adding content_id column to {schema}.{table_name}") |
| 102 | + sess.execute(text(f'ALTER TABLE "{schema}"."{table_name}" ADD COLUMN content_id VARCHAR;')) |
| 103 | + |
| 104 | + # Add indexes for the new columns |
| 105 | + with pgvector.Session() as sess, sess.begin(): |
| 106 | + if not content_hash_exists: |
| 107 | + index_name = f"idx_{table_name}_content_hash" |
| 108 | + log_info(f"Creating index {index_name} on content_hash column") |
| 109 | + try: |
| 110 | + sess.execute( |
| 111 | + text(f'CREATE INDEX IF NOT EXISTS "{index_name}" ON "{schema}"."{table_name}" (content_hash);') |
| 112 | + ) |
| 113 | + except SQLAlchemyError as e: |
| 114 | + log_warning(f"Could not create index {index_name}: {e}") |
| 115 | + |
| 116 | + if not content_id_exists: |
| 117 | + index_name = f"idx_{table_name}_content_id" |
| 118 | + log_info(f"Creating index {index_name} on content_id column") |
| 119 | + try: |
| 120 | + sess.execute( |
| 121 | + text(f'CREATE INDEX IF NOT EXISTS "{index_name}" ON "{schema}"."{table_name}" (content_id);') |
| 122 | + ) |
| 123 | + except SQLAlchemyError as e: |
| 124 | + log_warning(f"Could not create index {index_name}: {e}") |
| 125 | + |
| 126 | + log_info(f"Successfully migrated PgVector table {schema}.{table_name} to v2") |
| 127 | + |
| 128 | + except Exception as e: |
| 129 | + log_error(f"Error migrating PgVector table {schema}.{table_name}: {e}") |
| 130 | + raise |
| 131 | + |
| 132 | + |
| 133 | +def migrate_singlestore_table(table_name: str, schema: str = "ai") -> None: |
| 134 | + """ |
| 135 | + Migrate a single SingleStore table to v2 by adding content_hash and content_id columns. |
| 136 | +
|
| 137 | + Args: |
| 138 | + table_name: Name of the table to migrate |
| 139 | + schema: Database schema name |
| 140 | + """ |
| 141 | + try: |
| 142 | + log_info(f"Starting migration for SingleStore table: {schema}.{table_name}") |
| 143 | + |
| 144 | + from agno.vectordb.singlestore.singlestore import SingleStore |
| 145 | + |
| 146 | + singlestore = SingleStore( |
| 147 | + collection=table_name, |
| 148 | + schema=schema, |
| 149 | + db_url=singlestore_db_url, |
| 150 | + ) |
| 151 | + |
| 152 | + # Check if table exists |
| 153 | + if not singlestore.table_exists(): |
| 154 | + log_warning(f"Table {schema}.{table_name} not found. Skipping migration.") |
| 155 | + return |
| 156 | + |
| 157 | + # Check if the new columns already exist |
| 158 | + from sqlalchemy import inspect, text |
| 159 | + |
| 160 | + inspector = inspect(singlestore.db_engine) |
| 161 | + columns = inspector.get_columns(table_name, schema=schema) |
| 162 | + column_names = [col["name"] for col in columns] |
| 163 | + |
| 164 | + content_hash_exists = "content_hash" in column_names |
| 165 | + content_id_exists = "content_id" in column_names |
| 166 | + |
| 167 | + if content_hash_exists and content_id_exists: |
| 168 | + log_info(f"Table {schema}.{table_name} already has the v2 columns. No migration needed.") |
| 169 | + return |
| 170 | + |
| 171 | + # Add missing columns |
| 172 | + with singlestore.Session() as sess, sess.begin(): |
| 173 | + if not content_hash_exists: |
| 174 | + log_info(f"Adding content_hash column to {schema}.{table_name}") |
| 175 | + sess.execute(text(f"ALTER TABLE `{schema}`.`{table_name}` ADD COLUMN content_hash TEXT;")) |
| 176 | + |
| 177 | + if not content_id_exists: |
| 178 | + log_info(f"Adding content_id column to {schema}.{table_name}") |
| 179 | + sess.execute(text(f"ALTER TABLE `{schema}`.`{table_name}` ADD COLUMN content_id TEXT;")) |
| 180 | + |
| 181 | + log_info(f"Successfully migrated SingleStore table {schema}.{table_name} to v2") |
| 182 | + |
| 183 | + except Exception as e: |
| 184 | + log_error(f"Error migrating SingleStore table {schema}.{table_name}: {e}") |
| 185 | + raise |
| 186 | + |
| 187 | + |
| 188 | +# Run the migrations |
| 189 | +try: |
| 190 | + # PGVector migration |
| 191 | + if pg_vector_config: |
| 192 | + for table_name in pg_vector_config["table_names"]: |
| 193 | + migrate_pgvector_table(table_name, pg_vector_config["schema"]) # type: ignore |
| 194 | + |
| 195 | + # SingleStore migration |
| 196 | + if singlestore_config: |
| 197 | + for table_name in singlestore_config["table_names"]: |
| 198 | + migrate_singlestore_table(table_name, singlestore_config["schema"]) # type: ignore |
| 199 | + |
| 200 | +except Exception as e: |
| 201 | + log_error(f"Error during migration: {e}") |
| 202 | + |
| 203 | +log_info("VectorDB migration completed.") |
0 commit comments