diff --git a/.gitignore b/.gitignore index da838999..da7e0b5e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ __pycache__/ .pytest_cache/ *.py[cod] *$py.class +*.swp # C extensions *.so diff --git a/.travis.yml b/.travis.yml index 198d7110..7877a788 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,9 +38,9 @@ before_script: - ./wait_for_mysql.sh - sudo docker ps # set up a database - - mysql --protocol=tcp -u root -e "CREATE DATABASE index_digest; CREATE USER 'index_digest'@'%' IDENTIFIED BY 'qwerty'; GRANT ALL ON index_digest.* TO 'index_digest'@'%';" + - mysql --protocol=tcp -u root -v < setup.sql - "./sql/populate.sh" # import the test schema files - - mysql --protocol=tcp -uindex_digest -pqwerty index_digest -v -e '\s; SHOW TABLES;' + - mysql --protocol=tcp -uindex_digest -pqwerty index_digest -v -e '\s; SHOW TABLES; SHOW DATABASES;' install: make install script: make coverage && make lint && make demo diff --git a/Makefile b/Makefile index 218daa05..341f9364 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ lint: pylint $(project_name)/ --ignore=test demo: - index_digest mysql://index_digest:qwerty@127.0.0.1/index_digest --sql-log sql/0002-not-used-indices-log --analyze-data --skip-checks=non_utf_columns --skip-tables=0028_no_time + index_digest mysql://index_digest:qwerty@127.0.0.1/index_digest --sql-log sql/0002-not-used-indices-log --analyze-data --check-empty-databases --skip-checks=non_utf_columns --skip-tables=0028_no_time sql-console: mysql --prompt='mysql@\h[\d]>' --protocol=tcp -uindex_digest -pqwerty index_digest diff --git a/README.md b/README.md index 5a48186c..535f68ac 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ Analyses your database queries and schema and suggests indices improvements. You * if run with `--analyze-data` switch it: * reports tables with old data (by querying for `MIN()` value of time column) where data retency can be reviewed * reports tables with not up-to-date data (by querying for `MAX()` value of time column) +* if run with `--check-empty-databases` switch it: + * report empty databases on the current MySQL server This tool **supports MySQL 5.5, 5.6, 5.7, 8.0 and MariaDB 10.0, 10.2** and runs under **Python 2.7, 3.4, 3.5 and 3.6**. @@ -116,7 +118,7 @@ Outputs YML file with results and metadata. You can select which checks should be reported by the tool by using `--checks` command line option. Certain checks can also be skipped via `--skip-checks` option. Refer to `index_digest --help` for examples. -> **Number of checks**: 22 +> **Number of checks**: 23 * `redundant_indices`: reports indices that are redundant and covered by other * `non_utf_columns`: reports text columns that have characters encoding set to `latin1` (utf is the way to go) @@ -151,6 +153,12 @@ You can select which checks should be reported by the tool by using `--checks` c * `data_too_old`: reports tables that have really old data, maybe it's worth checking if such long data retention is actually needed (**defaults to three months threshold**, can be customized via `INDEX_DIGEST_DATA_TOO_OLD_THRESHOLD_DAYS` env variable) * `data_not_updated_recently`: reports tables that were not updated recently, check if it should be up-to-date (**defaults a month threshold**, can be customized via `INDEX_DIGEST_DATA_NOT_UPDATED_RECENTLY_THRESHOLD_DAYS` env variable) +### Additional checks performed across database on the current MySQL server + +> You need to use `--check-empty-databases` command line switch. + +* `empty_database`: reports databases that have no `BASE TABLE` tables (as provided by `information_schema.TABLES`) + ## An example report ```sql @@ -422,6 +430,11 @@ high_offset_selects → table affected: page - limit: 200 - offset: 927600 +------------------------------------------------------------ +empty_database → table affected: index_digest_empty + +✗ "index_digest_empty" database has no tables + ------------------------------------------------------------ Queries performed: 100 ``` diff --git a/indexdigest/cli/script.py b/indexdigest/cli/script.py index 4ef155d6..0e485f95 100644 --- a/indexdigest/cli/script.py +++ b/indexdigest/cli/script.py @@ -4,7 +4,7 @@ Analyses your database queries and schema and suggests indices improvements. Usage: - index_digest DSN [--sql-log=] [--format=] [--analyze-data] [--checks= | --skip-checks=] [--tables= | --skip-tables=] + index_digest DSN [--sql-log=] [--format=] [--analyze-data] [--check-empty-databases] [--checks= | --skip-checks=] [--tables= | --skip-tables=] index_digest (-h | --help) index_digest --version @@ -13,6 +13,7 @@ --sql-log= Text file with SQL queries to check against the database --format= Use a given results formatter (plain, syslog, yaml) --analyze-data Run additional checks that will query table data (can be slow!) + --check-empty-databases Detect empty databases on this MySQL server --checks= Comma-separated lists of checks to report --skip-checks= Comma-separated lists of checks to skip from report --tables= Comma-separated lists of tables to report @@ -63,14 +64,16 @@ check_data_not_updated_recently, \ check_generic_primary_key, \ check_high_offset_selects, \ - check_use_innodb + check_use_innodb, \ + check_empty_database -def get_reports(database, sql_log=None, analyze_data=False): +def get_reports(database, sql_log=None, analyze_data=False, check_empty_databases=False): """ :type database Database :type sql_log str :type analyze_data bool + :type check_empty_databases bool :rtype: list[indexdigest.utils.LinterEntry] """ logger = logging.getLogger(__name__) @@ -126,6 +129,15 @@ def get_reports(database, sql_log=None, analyze_data=False): check_data_not_updated_recently(database, env=environ), ) + # --check-empty-databases switch to be on to run "empty_database" (see #146) + if check_empty_databases is True: + logger.info("Will analyze databases on this MySQL server, can take a while...") + + reports = chain( + reports, + check_empty_database(database), + ) + return reports @@ -190,7 +202,8 @@ def main(): reports = get_reports( database, sql_log=arguments.get('--sql-log'), - analyze_data=arguments.get('--analyze-data') + analyze_data=arguments.get('--analyze-data'), + check_empty_databases=arguments.get('--check-empty-databases') ) # handle --checks / --skip-checks diff --git a/indexdigest/linters/__init__.py b/indexdigest/linters/__init__.py index 9e4f8722..c0630e32 100644 --- a/indexdigest/linters/__init__.py +++ b/indexdigest/linters/__init__.py @@ -23,3 +23,4 @@ from .linter_0093_having_clause import check_having_clause from .linter_0094_generic_primary_key import check_generic_primary_key from .linter_0118_high_offset_selects import check_high_offset_selects +from .linter_0164_empty_database import check_empty_database diff --git a/indexdigest/linters/linter_0164_empty_database.py b/indexdigest/linters/linter_0164_empty_database.py new file mode 100644 index 00000000..7b73df1e --- /dev/null +++ b/indexdigest/linters/linter_0164_empty_database.py @@ -0,0 +1,32 @@ +""" +This linter checks for databases with no tables +""" +from indexdigest.utils import LinterEntry + + +def get_empty_databases(database): + """ + :type database indexdigest.database.Database + :rtype: list[str] + """ + for db_name in database.query_list('SHOW DATABASES'): + # skip "core" MySQL databases + if db_name in ['information_schema']: + continue + + tables_count = database.query_field('SELECT COUNT(*) FROM information_schema.TABLES ' + 'WHERE TABLE_SCHEMA = "{}" AND ' + 'TABLE_TYPE = "BASE TABLE"'.format(db_name)) + # print(db_name, tables_count) + if tables_count == 0: + yield db_name + + +def check_empty_database(database): + """ + :type database indexdigest.database.Database + :rtype: list[LinterEntry] + """ + for db_name in get_empty_databases(database): + yield LinterEntry(linter_type='empty_database', table_name=db_name, + message='"{}" database has no tables'.format(db_name)) diff --git a/indexdigest/test/linters/test_0028_data_not_updated_recently.py b/indexdigest/test/linters/test_0028_data_not_updated_recently.py index f1014d75..0628fa66 100644 --- a/indexdigest/test/linters/test_0028_data_not_updated_recently.py +++ b/indexdigest/test/linters/test_0028_data_not_updated_recently.py @@ -36,9 +36,9 @@ def test_data_not_updated_recently(self): assert len(reports) == 1 assert str(reports[0]).startswith('0028_data_not_updated_recently: "0028_data_not_updated_recently" ' - 'has the latest row added 4') # 40 days ago + 'has the latest row added ') assert str(reports[0]).endswith('consider checking if it should be up-to-date') - self.assertAlmostEquals(reports[0].context['diff_days'], 40) + assert abs(reports[0].context['diff_days'] - 40) < 2, 'diff_days is around 40 days' assert reports[0].table_name == '0028_data_not_updated_recently' assert 'data_since' in reports[0].context diff --git a/indexdigest/test/linters/test_0164_empty_database.py b/indexdigest/test/linters/test_0164_empty_database.py new file mode 100644 index 00000000..f1023dd3 --- /dev/null +++ b/indexdigest/test/linters/test_0164_empty_database.py @@ -0,0 +1,19 @@ +from __future__ import print_function + +from unittest import TestCase + +from indexdigest.linters.linter_0164_empty_database import check_empty_database +from indexdigest.test import DatabaseTestMixin + + +class TestLinter(TestCase, DatabaseTestMixin): + + def test_empty_database(self): + reports = list(check_empty_database(self.connection)) + + print(reports, reports[0].context) + + assert len(reports) == 1 + + assert str(reports[0]) == 'index_digest_empty: "index_digest_empty" database has no tables' + assert reports[0].table_name == 'index_digest_empty' diff --git a/setup.sql b/setup.sql new file mode 100644 index 00000000..ec26c880 --- /dev/null +++ b/setup.sql @@ -0,0 +1,9 @@ +-- create databases +CREATE DATABASE IF NOT EXISTS index_digest; +CREATE DATABASE IF NOT EXISTS index_digest_empty; -- #146 + +-- create a user and grant access to our databases +CREATE USER 'index_digest'@'%' IDENTIFIED BY 'qwerty'; + +GRANT ALL ON index_digest.* TO 'index_digest'@'%'; +GRANT ALL ON index_digest_empty.* TO 'index_digest'@'%';