diff --git a/indexdigest/database.py b/indexdigest/database.py index 3ebea59d..98233b5e 100644 --- a/indexdigest/database.py +++ b/indexdigest/database.py @@ -266,11 +266,13 @@ def get_table_metadata(self, table_name): :type table_name str :rtype: dict """ - # @see https://dev.mysql.com/doc/refman/5.7/en/tables-table.html + # https://dev.mysql.com/doc/refman/5.7/en/tables-table.html + # https://mariadb.com/kb/en/information-schema-tables-table/ stats = self.query_dict_row( "SELECT ENGINE, TABLE_ROWS, DATA_LENGTH, INDEX_LENGTH " "FROM information_schema.TABLES " + self._get_information_schema_where(table_name)) + # TODO: introduce dataclass return { 'engine': stats['ENGINE'], 'rows': stats['TABLE_ROWS'], # For InnoDB the row count is only a rough estimate diff --git a/indexdigest/linters/linter_0031_low_cardinality_index.py b/indexdigest/linters/linter_0031_low_cardinality_index.py index 1d65e4d3..399610c5 100644 --- a/indexdigest/linters/linter_0031_low_cardinality_index.py +++ b/indexdigest/linters/linter_0031_low_cardinality_index.py @@ -6,10 +6,10 @@ from indexdigest.utils import LinterEntry # skip small tables -ROWS_COUNT_THRESHOLD = 1000 +ROWS_COUNT_THRESHOLD = 100000 # cardinality threshold -INDEX_CARDINALITY_THRESHOLD = 5 +INDEX_CARDINALITY_THRESHOLD = 6 # the least frequent value should be used at most by x% rows INDEX_VALUE_PERCENTAGE_THRESHOLD = 20 @@ -40,7 +40,7 @@ def get_low_cardinality_indices(database): if index['CARDINALITY'] > INDEX_CARDINALITY_THRESHOLD: continue - yield (table_name, rows_count, index) + yield table_name, rows_count, index def check_low_cardinality_index(database): diff --git a/indexdigest/test/core/test_database.py b/indexdigest/test/core/test_database.py index b646fd8c..131299f0 100644 --- a/indexdigest/test/core/test_database.py +++ b/indexdigest/test/core/test_database.py @@ -150,7 +150,7 @@ def test_get_table_metadata(self): # stats self.assertEqual(meta['engine'], 'InnoDB') - self.assertEqual(meta['rows'], 3) + self.assertAlmostEqual(meta['rows'], 3, delta=1) self.assertTrue(meta['index_size'] > 0) self.assertTrue(meta['data_size'] > 0) @@ -180,7 +180,7 @@ def test_get_table_columns(self): # assert False def test_get_table_rows_estimate(self): - self.assertEqual(self.connection.get_table_rows_estimate(self.TABLE_NAME), 3) + self.assertAlmostEqual(self.connection.get_table_rows_estimate(self.TABLE_NAME), 3, delta=1) class TestsWithDatabaseMocked(TestCase): diff --git a/indexdigest/test/linters/test_0031_low_cardinality_index.py b/indexdigest/test/linters/test_0031_low_cardinality_index.py index 4a554ac1..f0970b7d 100644 --- a/indexdigest/test/linters/test_0031_low_cardinality_index.py +++ b/indexdigest/test/linters/test_0031_low_cardinality_index.py @@ -3,7 +3,7 @@ from unittest import TestCase from indexdigest.linters.linter_0031_low_cardinality_index import \ - check_low_cardinality_index, get_low_cardinality_indices + check_low_cardinality_index, get_low_cardinality_indices, INDEX_CARDINALITY_THRESHOLD from indexdigest.test import DatabaseTestMixin @@ -15,11 +15,13 @@ def test_get_low_cardinality_indices(self): print(indices) assert len(indices) == 1 - assert indices[0][0] == '0020_big_table' - assert indices[0][2]['INDEX_NAME'] == 'num_idx' - assert indices[0][2]['COLUMN_NAME'] == 'num' - assert indices[0][2]['CARDINALITY'] > 1 - assert indices[0][2]['CARDINALITY'] < 5 + + index = indices[0] + assert index[0] == '0020_big_table' + assert index[2]['INDEX_NAME'] == 'num_idx' + assert index[2]['COLUMN_NAME'] == 'num' + assert index[2]['CARDINALITY'] > 1 + assert index[2]['CARDINALITY'] <= INDEX_CARDINALITY_THRESHOLD def test_low_cardinality_index(self): reports = list(check_low_cardinality_index(self.connection))