From 61d76b7bdfee78f79283eddb0f2d3759c2ee5666 Mon Sep 17 00:00:00 2001 From: macbre Date: Sun, 25 Mar 2018 15:38:19 +0200 Subject: [PATCH 1/5] setup.sql | databases and user setup script --- .gitignore | 1 + .travis.yml | 4 ++-- setup.sql | 9 +++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 setup.sql diff --git a/.gitignore b/.gitignore index da838999..da7e0b5e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ __pycache__/ .pytest_cache/ *.py[cod] *$py.class +*.swp # C extensions *.so diff --git a/.travis.yml b/.travis.yml index 198d7110..7877a788 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,9 +38,9 @@ before_script: - ./wait_for_mysql.sh - sudo docker ps # set up a database - - mysql --protocol=tcp -u root -e "CREATE DATABASE index_digest; CREATE USER 'index_digest'@'%' IDENTIFIED BY 'qwerty'; GRANT ALL ON index_digest.* TO 'index_digest'@'%';" + - mysql --protocol=tcp -u root -v < setup.sql - "./sql/populate.sh" # import the test schema files - - mysql --protocol=tcp -uindex_digest -pqwerty index_digest -v -e '\s; SHOW TABLES;' + - mysql --protocol=tcp -uindex_digest -pqwerty index_digest -v -e '\s; SHOW TABLES; SHOW DATABASES;' install: make install script: make coverage && make lint && make demo diff --git a/setup.sql b/setup.sql new file mode 100644 index 00000000..ec26c880 --- /dev/null +++ b/setup.sql @@ -0,0 +1,9 @@ +-- create databases +CREATE DATABASE IF NOT EXISTS index_digest; +CREATE DATABASE IF NOT EXISTS index_digest_empty; -- #146 + +-- create a user and grant access to our databases +CREATE USER 'index_digest'@'%' IDENTIFIED BY 'qwerty'; + +GRANT ALL ON index_digest.* TO 'index_digest'@'%'; +GRANT ALL ON index_digest_empty.* TO 'index_digest'@'%'; From 7d0f1260dc537984d4f207d4a914f83f279bb962 Mon Sep 17 00:00:00 2001 From: macbre Date: Sun, 25 Mar 2018 15:59:45 +0200 Subject: [PATCH 2/5] data_not_updated_recently | fix the test --- sql/0028-data-not-updated-recently.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/0028-data-not-updated-recently.sql b/sql/0028-data-not-updated-recently.sql index 994918e8..93f1225b 100644 --- a/sql/0028-data-not-updated-recently.sql +++ b/sql/0028-data-not-updated-recently.sql @@ -14,4 +14,4 @@ CREATE TABLE `0028_data_not_updated_recently` ( INSERT INTO 0028_data_not_updated_recently(cnt, `timestamp`) VALUES (20, NOW() - INTERVAL 50 DAY), (20, NOW() - INTERVAL 45 DAY), - (20, NOW() - INTERVAL 40 DAY); + (20, NOW() - INTERVAL 41 DAY); From e96b95166252c4b8fdb93c6c238ff9821245075e Mon Sep 17 00:00:00 2001 From: macbre Date: Sun, 25 Mar 2018 16:00:36 +0200 Subject: [PATCH 3/5] empty_database | add a linter (resolves #164) --- .../linters/linter_0164_empty_database.py | 32 +++++++++++++++++++ .../test/linters/test_0164_empty_database.py | 19 +++++++++++ 2 files changed, 51 insertions(+) create mode 100644 indexdigest/linters/linter_0164_empty_database.py create mode 100644 indexdigest/test/linters/test_0164_empty_database.py diff --git a/indexdigest/linters/linter_0164_empty_database.py b/indexdigest/linters/linter_0164_empty_database.py new file mode 100644 index 00000000..7b73df1e --- /dev/null +++ b/indexdigest/linters/linter_0164_empty_database.py @@ -0,0 +1,32 @@ +""" +This linter checks for databases with no tables +""" +from indexdigest.utils import LinterEntry + + +def get_empty_databases(database): + """ + :type database indexdigest.database.Database + :rtype: list[str] + """ + for db_name in database.query_list('SHOW DATABASES'): + # skip "core" MySQL databases + if db_name in ['information_schema']: + continue + + tables_count = database.query_field('SELECT COUNT(*) FROM information_schema.TABLES ' + 'WHERE TABLE_SCHEMA = "{}" AND ' + 'TABLE_TYPE = "BASE TABLE"'.format(db_name)) + # print(db_name, tables_count) + if tables_count == 0: + yield db_name + + +def check_empty_database(database): + """ + :type database indexdigest.database.Database + :rtype: list[LinterEntry] + """ + for db_name in get_empty_databases(database): + yield LinterEntry(linter_type='empty_database', table_name=db_name, + message='"{}" database has no tables'.format(db_name)) diff --git a/indexdigest/test/linters/test_0164_empty_database.py b/indexdigest/test/linters/test_0164_empty_database.py new file mode 100644 index 00000000..f1023dd3 --- /dev/null +++ b/indexdigest/test/linters/test_0164_empty_database.py @@ -0,0 +1,19 @@ +from __future__ import print_function + +from unittest import TestCase + +from indexdigest.linters.linter_0164_empty_database import check_empty_database +from indexdigest.test import DatabaseTestMixin + + +class TestLinter(TestCase, DatabaseTestMixin): + + def test_empty_database(self): + reports = list(check_empty_database(self.connection)) + + print(reports, reports[0].context) + + assert len(reports) == 1 + + assert str(reports[0]) == 'index_digest_empty: "index_digest_empty" database has no tables' + assert reports[0].table_name == 'index_digest_empty' From 485f23b6b3c2975eb030d5f5987524223d77b970 Mon Sep 17 00:00:00 2001 From: macbre Date: Sun, 25 Mar 2018 16:10:55 +0200 Subject: [PATCH 4/5] empty_database | add to a CLI tool --- Makefile | 2 +- README.md | 15 ++++++++++++++- indexdigest/cli/script.py | 21 +++++++++++++++++---- indexdigest/linters/__init__.py | 1 + 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 218daa05..341f9364 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ lint: pylint $(project_name)/ --ignore=test demo: - index_digest mysql://index_digest:qwerty@127.0.0.1/index_digest --sql-log sql/0002-not-used-indices-log --analyze-data --skip-checks=non_utf_columns --skip-tables=0028_no_time + index_digest mysql://index_digest:qwerty@127.0.0.1/index_digest --sql-log sql/0002-not-used-indices-log --analyze-data --check-empty-databases --skip-checks=non_utf_columns --skip-tables=0028_no_time sql-console: mysql --prompt='mysql@\h[\d]>' --protocol=tcp -uindex_digest -pqwerty index_digest diff --git a/README.md b/README.md index 5a48186c..535f68ac 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ Analyses your database queries and schema and suggests indices improvements. You * if run with `--analyze-data` switch it: * reports tables with old data (by querying for `MIN()` value of time column) where data retency can be reviewed * reports tables with not up-to-date data (by querying for `MAX()` value of time column) +* if run with `--check-empty-databases` switch it: + * report empty databases on the current MySQL server This tool **supports MySQL 5.5, 5.6, 5.7, 8.0 and MariaDB 10.0, 10.2** and runs under **Python 2.7, 3.4, 3.5 and 3.6**. @@ -116,7 +118,7 @@ Outputs YML file with results and metadata. You can select which checks should be reported by the tool by using `--checks` command line option. Certain checks can also be skipped via `--skip-checks` option. Refer to `index_digest --help` for examples. -> **Number of checks**: 22 +> **Number of checks**: 23 * `redundant_indices`: reports indices that are redundant and covered by other * `non_utf_columns`: reports text columns that have characters encoding set to `latin1` (utf is the way to go) @@ -151,6 +153,12 @@ You can select which checks should be reported by the tool by using `--checks` c * `data_too_old`: reports tables that have really old data, maybe it's worth checking if such long data retention is actually needed (**defaults to three months threshold**, can be customized via `INDEX_DIGEST_DATA_TOO_OLD_THRESHOLD_DAYS` env variable) * `data_not_updated_recently`: reports tables that were not updated recently, check if it should be up-to-date (**defaults a month threshold**, can be customized via `INDEX_DIGEST_DATA_NOT_UPDATED_RECENTLY_THRESHOLD_DAYS` env variable) +### Additional checks performed across database on the current MySQL server + +> You need to use `--check-empty-databases` command line switch. + +* `empty_database`: reports databases that have no `BASE TABLE` tables (as provided by `information_schema.TABLES`) + ## An example report ```sql @@ -422,6 +430,11 @@ high_offset_selects → table affected: page - limit: 200 - offset: 927600 +------------------------------------------------------------ +empty_database → table affected: index_digest_empty + +✗ "index_digest_empty" database has no tables + ------------------------------------------------------------ Queries performed: 100 ``` diff --git a/indexdigest/cli/script.py b/indexdigest/cli/script.py index 4ef155d6..0e485f95 100644 --- a/indexdigest/cli/script.py +++ b/indexdigest/cli/script.py @@ -4,7 +4,7 @@ Analyses your database queries and schema and suggests indices improvements. Usage: - index_digest DSN [--sql-log=] [--format=] [--analyze-data] [--checks= | --skip-checks=] [--tables= | --skip-tables=] + index_digest DSN [--sql-log=] [--format=] [--analyze-data] [--check-empty-databases] [--checks= | --skip-checks=] [--tables= | --skip-tables=] index_digest (-h | --help) index_digest --version @@ -13,6 +13,7 @@ --sql-log= Text file with SQL queries to check against the database --format= Use a given results formatter (plain, syslog, yaml) --analyze-data Run additional checks that will query table data (can be slow!) + --check-empty-databases Detect empty databases on this MySQL server --checks= Comma-separated lists of checks to report --skip-checks= Comma-separated lists of checks to skip from report --tables= Comma-separated lists of tables to report @@ -63,14 +64,16 @@ check_data_not_updated_recently, \ check_generic_primary_key, \ check_high_offset_selects, \ - check_use_innodb + check_use_innodb, \ + check_empty_database -def get_reports(database, sql_log=None, analyze_data=False): +def get_reports(database, sql_log=None, analyze_data=False, check_empty_databases=False): """ :type database Database :type sql_log str :type analyze_data bool + :type check_empty_databases bool :rtype: list[indexdigest.utils.LinterEntry] """ logger = logging.getLogger(__name__) @@ -126,6 +129,15 @@ def get_reports(database, sql_log=None, analyze_data=False): check_data_not_updated_recently(database, env=environ), ) + # --check-empty-databases switch to be on to run "empty_database" (see #146) + if check_empty_databases is True: + logger.info("Will analyze databases on this MySQL server, can take a while...") + + reports = chain( + reports, + check_empty_database(database), + ) + return reports @@ -190,7 +202,8 @@ def main(): reports = get_reports( database, sql_log=arguments.get('--sql-log'), - analyze_data=arguments.get('--analyze-data') + analyze_data=arguments.get('--analyze-data'), + check_empty_databases=arguments.get('--check-empty-databases') ) # handle --checks / --skip-checks diff --git a/indexdigest/linters/__init__.py b/indexdigest/linters/__init__.py index 9e4f8722..c0630e32 100644 --- a/indexdigest/linters/__init__.py +++ b/indexdigest/linters/__init__.py @@ -23,3 +23,4 @@ from .linter_0093_having_clause import check_having_clause from .linter_0094_generic_primary_key import check_generic_primary_key from .linter_0118_high_offset_selects import check_high_offset_selects +from .linter_0164_empty_database import check_empty_database From bc1319d9b5419fbf1d7a15dcbaf272938795da83 Mon Sep 17 00:00:00 2001 From: macbre Date: Sun, 25 Mar 2018 16:16:50 +0200 Subject: [PATCH 5/5] test_0028_data_not_updated_recently.py: time saving fixes... --- .../test/linters/test_0028_data_not_updated_recently.py | 4 ++-- sql/0028-data-not-updated-recently.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/indexdigest/test/linters/test_0028_data_not_updated_recently.py b/indexdigest/test/linters/test_0028_data_not_updated_recently.py index f1014d75..0628fa66 100644 --- a/indexdigest/test/linters/test_0028_data_not_updated_recently.py +++ b/indexdigest/test/linters/test_0028_data_not_updated_recently.py @@ -36,9 +36,9 @@ def test_data_not_updated_recently(self): assert len(reports) == 1 assert str(reports[0]).startswith('0028_data_not_updated_recently: "0028_data_not_updated_recently" ' - 'has the latest row added 4') # 40 days ago + 'has the latest row added ') assert str(reports[0]).endswith('consider checking if it should be up-to-date') - self.assertAlmostEquals(reports[0].context['diff_days'], 40) + assert abs(reports[0].context['diff_days'] - 40) < 2, 'diff_days is around 40 days' assert reports[0].table_name == '0028_data_not_updated_recently' assert 'data_since' in reports[0].context diff --git a/sql/0028-data-not-updated-recently.sql b/sql/0028-data-not-updated-recently.sql index 93f1225b..994918e8 100644 --- a/sql/0028-data-not-updated-recently.sql +++ b/sql/0028-data-not-updated-recently.sql @@ -14,4 +14,4 @@ CREATE TABLE `0028_data_not_updated_recently` ( INSERT INTO 0028_data_not_updated_recently(cnt, `timestamp`) VALUES (20, NOW() - INTERVAL 50 DAY), (20, NOW() - INTERVAL 45 DAY), - (20, NOW() - INTERVAL 41 DAY); + (20, NOW() - INTERVAL 40 DAY);