Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 7 additions & 24 deletions indexdigest/linters/linter_0006_not_used_columns_and_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,39 +6,22 @@
from collections import defaultdict, OrderedDict
from sql_metadata import get_query_columns, get_query_tables

from indexdigest.database import IndexDigestQueryError
from indexdigest.utils import LinterEntry, is_select_query


def get_used_tables_from_queries(database, queries):
def get_used_tables_from_queries(queries):
"""
:type database indexdigest.database.Database
:type queries list[str]
:rtype: list[str]
"""
logger = logging.getLogger(__name__)

used_tables = []
queries = filter(is_select_query, queries)

for query in queries:
# run EXPLAIN for each query from the log
try:
for row in database.explain_query(query):
if row.get('table') is not None:
if row['table'] not in used_tables:
used_tables.append(row['table'])
else:
# EXPLAIN may return "no matching row in const table"
logger.warning('EXPLAIN %s returned no table, falling back to SQL parsing',
query)

# fall back to SQL query parsing
tables = get_query_tables(query)
if tables and tables[0] not in used_tables:
used_tables.append(tables[0])
except IndexDigestQueryError:
logger.error('Cannot explain the query: %s', query)
# parse each query from the log
tables = get_query_tables(query)
if tables and tables[0] not in used_tables:
used_tables.append(tables[0])

return used_tables

Expand All @@ -55,7 +38,7 @@ def check_not_used_tables(database, queries):
tables = database.get_tables()

# analyze only SELECT queries from the log
used_tables = get_used_tables_from_queries(database, queries)
used_tables = get_used_tables_from_queries(queries)
logger.info("These tables were used by provided queries: %s", used_tables)

# now check which tables were not used
Expand Down Expand Up @@ -88,7 +71,7 @@ def check_not_used_columns(database, queries):
# analyze only SELECT queries from the log
queries = list(filter(is_select_query, queries))

used_tables = get_used_tables_from_queries(database, queries)
used_tables = get_used_tables_from_queries(queries)
used_columns = defaultdict(list)

logger.info("Will check these tables: %s", used_tables)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,11 @@ def test_not_used_tables(self):

def test_get_used_tables_from_queries(self):
queries = [
'SELECT /* a comment */ foo FROM `0006_not_used_columns` WHERE id = 1;',
'SELECT /* a comment */ foo FROM `0006_not_used_columns` AS r WHERE id = 1;', # table alias
'SELECT 1 FROM `0006_not_used_tables` WHERE id = 3;',
]

tables = get_used_tables_from_queries(
database=self.connection, queries=queries)
tables = get_used_tables_from_queries(queries)

print(tables)

Expand Down
4 changes: 1 addition & 3 deletions indexdigest/test/test_0089_handle_sql_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ def queries(self):
return read_queries_from_log('0098-handle-sql-errors-log')

def test_get_used_tables_from_queries(self):
tables = get_used_tables_from_queries(
database=self.connection,
queries=self.queries)
tables = get_used_tables_from_queries(self.queries)

print(tables)

Expand Down
6 changes: 3 additions & 3 deletions sql/0098-handle-sql-errors-log
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
-- ERROR 1140 (42000): In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'index_digest.0020_big_table.val'; this is incompatible with sql_mode=only_full_group_by
SELECT val, count(*) FROM 0020_big_table WHERE id BETWEEN 10 AND 20;
SELECT val, count(*) FROM `0020_big_table` WHERE id BETWEEN 10 AND 20;

-- query with aliases
SELECT t.val as value, count(*) FROM 0020_big_table as t WHERE id BETWEEN 10 AND 20 GROUP BY val;
SELECT val as value, count(*) FROM 0020_big_table WHERE id BETWEEN 10 AND 20 GROUP BY val;
SELECT t.val as value, count(*) FROM `0020_big_table` as t WHERE id BETWEEN 10 AND 20 GROUP BY val;
SELECT val as value, count(*) FROM `0020_big_table` WHERE id BETWEEN 10 AND 20 GROUP BY val;

-- invalid syntax
SELEKT foo FROM bar;