diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 37703d0c1..51786832f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,11 @@ repos: hooks: - id: black language_version: python3 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + language_version: python3 - repo: https://github.com/pycqa/isort rev: 5.7.0 hooks: diff --git a/dask_sql/context.py b/dask_sql/context.py index 5456148d8..2f1c254d0 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -2,8 +2,7 @@ import inspect import logging import warnings -from collections import namedtuple -from typing import Any, Callable, Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Union import dask.dataframe as dd import pandas as pd @@ -30,6 +29,9 @@ from dask_sql.physical.rex import RexConverter, core from dask_sql.utils import ParsingException +if TYPE_CHECKING: + from dask_sql.java import org + logger = logging.getLogger(__name__) @@ -600,7 +602,7 @@ def stop_server(self): # pragma: no cover """ Stop a SQL server started by ``run_server`. """ - if not self.sql_server is None: + if self.sql_server is not None: loop = asyncio.get_event_loop() assert loop loop.create_task(self.sql_server.shutdown()) @@ -767,7 +769,8 @@ def _to_sql_string(self, s: "org.apache.calcite.sql.SqlNode", default_dialect=No try: return str(s.toSqlString(default_dialect)) - except: # pragma: no cover. Have not seen any instance so far, but better be safe than sorry. + # Have not seen any instance so far, but better be safe than sorry + except Exception: # pragma: no cover return str(s) def _get_tables_from_stack(self): diff --git a/dask_sql/input_utils/convert.py b/dask_sql/input_utils/convert.py index 29cb0fdd8..e43df2334 100644 --- a/dask_sql/input_utils/convert.py +++ b/dask_sql/input_utils/convert.py @@ -1,5 +1,5 @@ import logging -from typing import Union +from typing import TYPE_CHECKING, Union import dask.dataframe as dd import pandas as pd @@ -8,6 +8,11 @@ from dask_sql.input_utils.base import BaseInputPlugin from dask_sql.utils import Pluggable +if TYPE_CHECKING: + import cudf + import hive + import sqlalchemy + logger = logging.Logger(__name__) InputType = Union[ diff --git a/dask_sql/input_utils/hive.py b/dask_sql/input_utils/hive.py index 30255f492..4e1bdde62 100644 --- a/dask_sql/input_utils/hive.py +++ b/dask_sql/input_utils/hive.py @@ -126,7 +126,7 @@ def wrapped_read_function(location, column_information, **kwargs): else: # pragma: no cover # prevent python to optimize it away and make coverage not respect the # pragma - dummy = 0 + dummy = 0 # noqa: F841 df = read_function(location, **kwargs) logger.debug(f"Applying column information: {column_information}") @@ -251,7 +251,7 @@ def _parse_hive_table_description( else: # pragma: no cover # prevent python to optimize it away and make coverage not respect the # pragma - dummy = 0 + dummy = 0 # noqa: F841 elif value and last_field is not None: last_field[value] = value2 diff --git a/dask_sql/integrations/ipython.py b/dask_sql/integrations/ipython.py index 8f4c9931d..ff9c9b4ce 100644 --- a/dask_sql/integrations/ipython.py +++ b/dask_sql/integrations/ipython.py @@ -1,9 +1,12 @@ import json -from typing import Dict, List +from typing import TYPE_CHECKING, Dict, List from dask_sql.mappings import _SQL_TO_PYTHON_FRAMES from dask_sql.physical.rex.core import RexCallPlugin +if TYPE_CHECKING: + import dask_sql + # JS snippet to use the created mime type highlighthing _JS_ENABLE_DASK_SQL = r""" require(['notebook/js/codecell'], function(codecell) { @@ -128,6 +131,6 @@ def _register_syntax_highlighting(): # pragma: no cover display.display_javascript(js + _JS_ENABLE_DASK_SQL, raw=True) -def _create_set(l: List[str]) -> Dict[str, bool]: # pragma: no cover +def _create_set(keys: List[str]) -> Dict[str, bool]: # pragma: no cover """Small helper function to turn a list into the correct format for codemirror""" - return {key: True for key in l} + return {key: True for key in keys} diff --git a/dask_sql/java.py b/dask_sql/java.py index bcd4401a9..5101315e2 100644 --- a/dask_sql/java.py +++ b/dask_sql/java.py @@ -55,7 +55,7 @@ def _set_or_check_java_home(): from pyarrow.hdfs import _maybe_set_hadoop_classpath _maybe_set_hadoop_classpath() -except: # pragma: no cover +except Exception: # pragma: no cover pass # Define how to run the java virtual machine. diff --git a/dask_sql/physical/rel/base.py b/dask_sql/physical/rel/base.py index 969a4132c..20b2ee69b 100644 --- a/dask_sql/physical/rel/base.py +++ b/dask_sql/physical/rel/base.py @@ -1,11 +1,15 @@ import logging -from typing import List +from typing import TYPE_CHECKING, List import dask.dataframe as dd from dask_sql.datacontainer import ColumnContainer, DataContainer from dask_sql.mappings import cast_column_type, sql_to_python_type +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/convert.py b/dask_sql/physical/rel/convert.py index 92978ddc0..480580eee 100644 --- a/dask_sql/physical/rel/convert.py +++ b/dask_sql/physical/rel/convert.py @@ -1,4 +1,5 @@ import logging +from typing import TYPE_CHECKING import dask.dataframe as dd @@ -6,6 +7,10 @@ from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.utils import LoggableDataFrame, Pluggable +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/analyze.py b/dask_sql/physical/rel/custom/analyze.py index d59f95d95..6bd7a1bfa 100644 --- a/dask_sql/physical/rel/custom/analyze.py +++ b/dask_sql/physical/rel/custom/analyze.py @@ -1,3 +1,5 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd import pandas as pd @@ -5,6 +7,10 @@ from dask_sql.mappings import python_to_sql_type from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class AnalyzeTablePlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/custom/columns.py b/dask_sql/physical/rel/custom/columns.py index 216d78949..978a307bf 100644 --- a/dask_sql/physical/rel/custom/columns.py +++ b/dask_sql/physical/rel/custom/columns.py @@ -1,3 +1,5 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd import pandas as pd @@ -5,6 +7,10 @@ from dask_sql.mappings import python_to_sql_type from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class ShowColumnsPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/custom/create_experiment.py b/dask_sql/physical/rel/custom/create_experiment.py index f797d15f3..29537b4ff 100644 --- a/dask_sql/physical/rel/custom/create_experiment.py +++ b/dask_sql/physical/rel/custom/create_experiment.py @@ -1,4 +1,5 @@ import logging +from typing import TYPE_CHECKING import dask.dataframe as dd import pandas as pd @@ -8,6 +9,9 @@ from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.utils import convert_sql_kwargs, import_class +if TYPE_CHECKING: + import dask_sql + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/create_model.py b/dask_sql/physical/rel/custom/create_model.py index 79fd203fc..746a52428 100644 --- a/dask_sql/physical/rel/custom/create_model.py +++ b/dask_sql/physical/rel/custom/create_model.py @@ -1,10 +1,14 @@ import logging +from typing import TYPE_CHECKING from dask_sql.datacontainer import DataContainer from dask_sql.java import org from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.utils import convert_sql_kwargs, import_class +if TYPE_CHECKING: + import dask_sql + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/create_schema.py b/dask_sql/physical/rel/custom/create_schema.py index c9de21a00..6a8ae4e86 100644 --- a/dask_sql/physical/rel/custom/create_schema.py +++ b/dask_sql/physical/rel/custom/create_schema.py @@ -1,8 +1,12 @@ import logging +from typing import TYPE_CHECKING -from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/create_table.py b/dask_sql/physical/rel/custom/create_table.py index d459b4849..5baefcc36 100644 --- a/dask_sql/physical/rel/custom/create_table.py +++ b/dask_sql/physical/rel/custom/create_table.py @@ -1,9 +1,14 @@ import logging +from typing import TYPE_CHECKING from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.utils import convert_sql_kwargs +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/create_table_as.py b/dask_sql/physical/rel/custom/create_table_as.py index 999891c18..7a0c04044 100644 --- a/dask_sql/physical/rel/custom/create_table_as.py +++ b/dask_sql/physical/rel/custom/create_table_as.py @@ -1,8 +1,13 @@ import logging +from typing import TYPE_CHECKING from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/describe_model.py b/dask_sql/physical/rel/custom/describe_model.py index 551d07df9..3f22dc78d 100644 --- a/dask_sql/physical/rel/custom/describe_model.py +++ b/dask_sql/physical/rel/custom/describe_model.py @@ -1,9 +1,15 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd import pandas as pd from dask_sql.datacontainer import ColumnContainer, DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class ShowModelParamsPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/custom/drop_model.py b/dask_sql/physical/rel/custom/drop_model.py index faec9a2d7..f9d175976 100644 --- a/dask_sql/physical/rel/custom/drop_model.py +++ b/dask_sql/physical/rel/custom/drop_model.py @@ -1,8 +1,13 @@ import logging +from typing import TYPE_CHECKING from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/drop_schema.py b/dask_sql/physical/rel/custom/drop_schema.py index e7e1fcf34..a47e5fb3b 100644 --- a/dask_sql/physical/rel/custom/drop_schema.py +++ b/dask_sql/physical/rel/custom/drop_schema.py @@ -1,8 +1,12 @@ import logging +from typing import TYPE_CHECKING -from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/drop_table.py b/dask_sql/physical/rel/custom/drop_table.py index 4c4116a4a..cd3096e83 100644 --- a/dask_sql/physical/rel/custom/drop_table.py +++ b/dask_sql/physical/rel/custom/drop_table.py @@ -1,8 +1,13 @@ import logging +from typing import TYPE_CHECKING from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/export_model.py b/dask_sql/physical/rel/custom/export_model.py index 08ff3d51b..5b5a8acf5 100644 --- a/dask_sql/physical/rel/custom/export_model.py +++ b/dask_sql/physical/rel/custom/export_model.py @@ -1,9 +1,14 @@ import logging import pickle +from typing import TYPE_CHECKING from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.utils import convert_sql_kwargs +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) @@ -67,7 +72,7 @@ def convert( import mlflow except ImportError: # pragma: no cover raise ImportError( - f"For export in the mlflow format, you need to have mlflow installed" + "For export in the mlflow format, you need to have mlflow installed" ) try: import sklearn @@ -77,7 +82,7 @@ def convert( mlflow.sklearn.save_model(model, location, **kwargs) else: raise NotImplementedError( - f"dask-sql supports only sklearn compatible model i.e fit-predict style model" + "dask-sql supports only sklearn compatible model i.e fit-predict style model" ) elif format == "onnx": """ diff --git a/dask_sql/physical/rel/custom/predict.py b/dask_sql/physical/rel/custom/predict.py index 40b4ac881..3a1650d19 100644 --- a/dask_sql/physical/rel/custom/predict.py +++ b/dask_sql/physical/rel/custom/predict.py @@ -1,11 +1,15 @@ import copy import logging import uuid +from typing import TYPE_CHECKING from dask_sql.datacontainer import ColumnContainer, DataContainer from dask_sql.java import com, java, org from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/schemas.py b/dask_sql/physical/rel/custom/schemas.py index b588c9a58..0d0771582 100644 --- a/dask_sql/physical/rel/custom/schemas.py +++ b/dask_sql/physical/rel/custom/schemas.py @@ -1,9 +1,15 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd import pandas as pd from dask_sql.datacontainer import ColumnContainer, DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class ShowSchemasPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/custom/show_models.py b/dask_sql/physical/rel/custom/show_models.py index dee9971f0..c4a72a246 100644 --- a/dask_sql/physical/rel/custom/show_models.py +++ b/dask_sql/physical/rel/custom/show_models.py @@ -1,9 +1,15 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd import pandas as pd from dask_sql.datacontainer import ColumnContainer, DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class ShowModelsPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/custom/switch_schema.py b/dask_sql/physical/rel/custom/switch_schema.py index 80ab1ad4d..7a695eb03 100644 --- a/dask_sql/physical/rel/custom/switch_schema.py +++ b/dask_sql/physical/rel/custom/switch_schema.py @@ -1,6 +1,12 @@ -from dask_sql.datacontainer import ColumnContainer, DataContainer +from typing import TYPE_CHECKING + +from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class SwitchSchemaPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/custom/tables.py b/dask_sql/physical/rel/custom/tables.py index 5ba1a9229..8ec1a2009 100644 --- a/dask_sql/physical/rel/custom/tables.py +++ b/dask_sql/physical/rel/custom/tables.py @@ -1,9 +1,15 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd import pandas as pd from dask_sql.datacontainer import ColumnContainer, DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class ShowTablesPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/logical/aggregate.py b/dask_sql/physical/rel/logical/aggregate.py index 171e20c79..896aa42ee 100644 --- a/dask_sql/physical/rel/logical/aggregate.py +++ b/dask_sql/physical/rel/logical/aggregate.py @@ -2,7 +2,7 @@ import operator from collections import defaultdict from functools import reduce -from typing import Any, Callable, Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple import dask.dataframe as dd import pandas as pd @@ -13,6 +13,10 @@ from dask_sql.physical.utils.groupby import get_groupby_with_nulls_cols from dask_sql.utils import new_temporary_column +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/filter.py b/dask_sql/physical/rel/logical/filter.py index 41cf16951..053f50ffa 100644 --- a/dask_sql/physical/rel/logical/filter.py +++ b/dask_sql/physical/rel/logical/filter.py @@ -1,5 +1,5 @@ import logging -from typing import Union +from typing import TYPE_CHECKING, Union import dask.dataframe as dd import numpy as np @@ -8,6 +8,10 @@ from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.physical.rex import RexConverter +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/join.py b/dask_sql/physical/rel/logical/join.py index b0c079f3c..16630d3ad 100644 --- a/dask_sql/physical/rel/logical/join.py +++ b/dask_sql/physical/rel/logical/join.py @@ -2,7 +2,7 @@ import operator import warnings from functools import reduce -from typing import List, Tuple +from typing import TYPE_CHECKING, List, Tuple import dask.dataframe as dd from dask.base import tokenize @@ -14,6 +14,9 @@ from dask_sql.physical.rel.logical.filter import filter_or_scalar from dask_sql.physical.rex import RexConverter +if TYPE_CHECKING: + import dask_sql + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/project.py b/dask_sql/physical/rel/logical/project.py index d456c8d80..737e795f2 100644 --- a/dask_sql/physical/rel/logical/project.py +++ b/dask_sql/physical/rel/logical/project.py @@ -1,13 +1,15 @@ import logging -from uuid import uuid4 +from typing import TYPE_CHECKING from dask_sql.datacontainer import DataContainer from dask_sql.java import org from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.physical.rex import RexConverter -from dask_sql.physical.rex.core.input_ref import RexInputRefPlugin from dask_sql.utils import new_temporary_column +if TYPE_CHECKING: + import dask_sql + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/sample.py b/dask_sql/physical/rel/logical/sample.py index 49441d0c1..f78d46f25 100644 --- a/dask_sql/physical/rel/logical/sample.py +++ b/dask_sql/physical/rel/logical/sample.py @@ -1,10 +1,15 @@ import logging +from typing import TYPE_CHECKING import numpy as np from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/sort.py b/dask_sql/physical/rel/logical/sort.py index bbf74dde3..f10fc2531 100644 --- a/dask_sql/physical/rel/logical/sort.py +++ b/dask_sql/physical/rel/logical/sort.py @@ -1,6 +1,5 @@ -from typing import List +from typing import TYPE_CHECKING -import dask import dask.dataframe as dd from dask_sql.datacontainer import DataContainer @@ -9,7 +8,9 @@ from dask_sql.physical.rex import RexConverter from dask_sql.physical.utils.map import map_on_partition_index from dask_sql.physical.utils.sort import apply_sort -from dask_sql.utils import new_temporary_column + +if TYPE_CHECKING: + import dask_sql class LogicalSortPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/logical/table_scan.py b/dask_sql/physical/rel/logical/table_scan.py index 8113676f7..378858a05 100644 --- a/dask_sql/physical/rel/logical/table_scan.py +++ b/dask_sql/physical/rel/logical/table_scan.py @@ -1,8 +1,12 @@ -from typing import Dict +from typing import TYPE_CHECKING from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class LogicalTableScanPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/logical/union.py b/dask_sql/physical/rel/logical/union.py index ede4921cb..b8153fc28 100644 --- a/dask_sql/physical/rel/logical/union.py +++ b/dask_sql/physical/rel/logical/union.py @@ -1,8 +1,13 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd from dask_sql.datacontainer import ColumnContainer, DataContainer from dask_sql.physical.rel.base import BaseRelPlugin -from dask_sql.physical.rex import RexConverter + +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org class LogicalUnionPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/logical/values.py b/dask_sql/physical/rel/logical/values.py index 37b0c5fe2..1d8363201 100644 --- a/dask_sql/physical/rel/logical/values.py +++ b/dask_sql/physical/rel/logical/values.py @@ -1,3 +1,5 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd import pandas as pd @@ -5,6 +7,10 @@ from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.physical.rex import RexConverter +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class LogicalValuesPlugin(BaseRelPlugin): """ diff --git a/dask_sql/physical/rel/logical/window.py b/dask_sql/physical/rel/logical/window.py index ca23acfb1..e398fc77d 100644 --- a/dask_sql/physical/rel/logical/window.py +++ b/dask_sql/physical/rel/logical/window.py @@ -1,7 +1,7 @@ import logging from collections import namedtuple from functools import partial -from typing import Any, Callable, List, Optional, Tuple +from typing import TYPE_CHECKING, Callable, List, Optional, Tuple import dask.dataframe as dd import numpy as np @@ -14,7 +14,6 @@ from dask_sql.physical.rex.convert import RexConverter from dask_sql.physical.rex.core.literal import RexLiteralPlugin from dask_sql.physical.utils.groupby import get_groupby_with_nulls_cols -from dask_sql.physical.utils.map import map_on_partition_index from dask_sql.physical.utils.sort import sort_partition_func from dask_sql.utils import ( LoggableDataFrame, @@ -22,6 +21,9 @@ new_temporary_column, ) +if TYPE_CHECKING: + import dask_sql + logger = logging.getLogger(__name__) @@ -96,7 +98,7 @@ def to_bound_description( else: # pragma: no cover # prevent python to optimize it away and make coverage not respect the # pragma - dummy = 0 + dummy = 0 # noqa: F841 offset = int(RexLiteralPlugin().convert(offset, None, None)) else: offset = None diff --git a/dask_sql/physical/rex/convert.py b/dask_sql/physical/rex/convert.py index 5232bd04c..16f4b652f 100644 --- a/dask_sql/physical/rex/convert.py +++ b/dask_sql/physical/rex/convert.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Union +from typing import TYPE_CHECKING, Any, Union import dask.dataframe as dd @@ -8,6 +8,10 @@ from dask_sql.physical.rex.base import BaseRexPlugin from dask_sql.utils import LoggableDataFrame, Pluggable +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rex/core/call.py b/dask_sql/physical/rex/core/call.py index 606603c8c..0a4aa5759 100644 --- a/dask_sql/physical/rex/core/call.py +++ b/dask_sql/physical/rex/core/call.py @@ -2,7 +2,7 @@ import operator import re from functools import reduce -from typing import Any, Callable, Union +from typing import TYPE_CHECKING, Any, Callable, Union import dask.array as da import dask.dataframe as dd @@ -26,6 +26,10 @@ make_pickable_without_dask_sql, ) +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + logger = logging.getLogger(__name__) SeriesOrScalar = Union[dd.Series, Any] @@ -476,7 +480,6 @@ def __init__(self): super().__init__(self.extract) def extract(self, what, df: SeriesOrScalar): - input_df = df df = convert_to_datetime(df) if what == "CENTURY": @@ -522,7 +525,7 @@ def __init__(self, round_method: str): assert round_method in { "ceil", "floor", - }, f"Round method can only be either ceil or floor" + }, "Round method can only be either ceil or floor" super().__init__( is_datetime, # if the series is dt type diff --git a/dask_sql/physical/rex/core/input_ref.py b/dask_sql/physical/rex/core/input_ref.py index 0dcd784dd..142417626 100644 --- a/dask_sql/physical/rex/core/input_ref.py +++ b/dask_sql/physical/rex/core/input_ref.py @@ -1,8 +1,14 @@ +from typing import TYPE_CHECKING + import dask.dataframe as dd from dask_sql.datacontainer import DataContainer from dask_sql.physical.rex.base import BaseRexPlugin +if TYPE_CHECKING: + import dask_sql + from dask_sql.java import org + class RexInputRefPlugin(BaseRexPlugin): """ diff --git a/dask_sql/physical/rex/core/literal.py b/dask_sql/physical/rex/core/literal.py index 7baac7d5c..bca4eef46 100644 --- a/dask_sql/physical/rex/core/literal.py +++ b/dask_sql/physical/rex/core/literal.py @@ -1,13 +1,15 @@ -from typing import Any +from typing import TYPE_CHECKING, Any import dask.dataframe as dd -import numpy as np from dask_sql.datacontainer import DataContainer from dask_sql.java import com, org from dask_sql.mappings import sql_to_python_value from dask_sql.physical.rex.base import BaseRexPlugin +if TYPE_CHECKING: + import dask_sql + class SargPythonImplementation: """ diff --git a/dask_sql/server/responses.py b/dask_sql/server/responses.py index d3ec97cf6..7a008b165 100644 --- a/dask_sql/server/responses.py +++ b/dask_sql/server/responses.py @@ -1,9 +1,8 @@ import uuid -from typing import List import dask.dataframe as dd import numpy as np -from fastapi import FastAPI, Request +from fastapi import Request from dask_sql.mappings import python_to_sql_type diff --git a/dask_sql/utils.py b/dask_sql/utils.py index 098676d76..a67b4deda 100644 --- a/dask_sql/utils.py +++ b/dask_sql/utils.py @@ -1,11 +1,9 @@ import importlib import logging import re -import sys from collections import defaultdict -from contextlib import contextmanager from datetime import datetime -from typing import Any, Dict, List, Tuple +from typing import Any, Dict from unittest.mock import patch from uuid import uuid4 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..85d6e693d --- /dev/null +++ b/setup.cfg @@ -0,0 +1,21 @@ +[flake8] +# References: +# https://flake8.readthedocs.io/en/latest/user/configuration.html +# https://flake8.readthedocs.io/en/latest/user/error-codes.html +# https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes +exclude = __init__.py,versioneer.py +ignore = + E203, # whitespace before ':' + E231,E241, # Multiple spaces around "," + E731, # Assigning lambda expression + #E741, # Ambiguous variable names + W503, # line break before binary operator + W504, # line break after binary operator + ; F821, # undefined name +per-file-ignores = + tests/*: + # local variable is assigned to but never used + F841, + # Ambiguous variable name + E741, +max-line-length = 150 diff --git a/tests/integration/test_create.py b/tests/integration/test_create.py index 990c3beab..0e118e6e6 100644 --- a/tests/integration/test_create.py +++ b/tests/integration/test_create.py @@ -111,7 +111,7 @@ def test_create_from_csv_persist(c, df, temporary_data_file, gpu): def test_wrong_create(c): with pytest.raises(AttributeError): c.sql( - f""" + """ CREATE TABLE new_table WITH ( @@ -122,7 +122,7 @@ def test_wrong_create(c): with pytest.raises(AttributeError): c.sql( - f""" + """ CREATE TABLE new_table WITH ( @@ -135,7 +135,7 @@ def test_wrong_create(c): def test_create_from_query(c, df): c.sql( - f""" + """ CREATE OR REPLACE TABLE new_table AS ( @@ -153,7 +153,7 @@ def test_create_from_query(c, df): assert_frame_equal(df, return_df) c.sql( - f""" + """ CREATE OR REPLACE VIEW new_table AS ( @@ -202,7 +202,7 @@ def test_view_table_persist(c, temporary_data_file, df, gpu): # Views should change, when the original data changes # Tables should not change, when the original data changes c.sql( - f""" + """ CREATE VIEW count_view AS ( @@ -211,7 +211,7 @@ def test_view_table_persist(c, temporary_data_file, df, gpu): """ ) c.sql( - f""" + """ CREATE TABLE count_table AS ( @@ -245,7 +245,7 @@ def test_view_table_persist(c, temporary_data_file, df, gpu): def test_replace_and_error(c, temporary_data_file, df): c.sql( - f""" + """ CREATE TABLE new_table AS ( @@ -262,7 +262,7 @@ def test_replace_and_error(c, temporary_data_file, df): with pytest.raises(RuntimeError): c.sql( - f""" + """ CREATE TABLE new_table AS ( @@ -272,7 +272,7 @@ def test_replace_and_error(c, temporary_data_file, df): ) c.sql( - f""" + """ CREATE TABLE IF NOT EXISTS new_table AS ( @@ -288,7 +288,7 @@ def test_replace_and_error(c, temporary_data_file, df): ) c.sql( - f""" + """ CREATE OR REPLACE TABLE new_table AS ( @@ -309,7 +309,7 @@ def test_replace_and_error(c, temporary_data_file, df): c.sql("SELECT a FROM new_table") c.sql( - f""" + """ CREATE TABLE IF NOT EXISTS new_table AS ( @@ -381,7 +381,7 @@ def test_drop(c): c.sql("DROP TABLE IF EXISTS new_table") c.sql( - f""" + """ CREATE TABLE new_table AS ( diff --git a/tests/integration/test_fugue.py b/tests/integration/test_fugue.py index 5125b2824..83a97f909 100644 --- a/tests/integration/test_fugue.py +++ b/tests/integration/test_fugue.py @@ -1,14 +1,15 @@ +import dask.dataframe as dd import pandas as pd import pytest from pandas.testing import assert_frame_equal -fugue_sql = pytest.importorskip("fugue_sql") -import dask.dataframe as dd - from dask_sql import Context +fugue_sql = pytest.importorskip("fugue_sql") + # needs to be imported after the check for fugue -from dask_sql.integrations.fugue import DaskSQLExecutionEngine, fsql_dask +if fugue_sql: + from dask_sql.integrations.fugue import DaskSQLExecutionEngine, fsql_dask def test_simple_statement(): diff --git a/tests/integration/test_hive.py b/tests/integration/test_hive.py index 1db7c9a92..330888618 100644 --- a/tests/integration/test_hive.py +++ b/tests/integration/test_hive.py @@ -165,7 +165,7 @@ def hive_cursor(): try: container.kill() - except: + except Exception: pass container.remove() diff --git a/tests/integration/test_join.py b/tests/integration/test_join.py index 6437cde0f..e8984f355 100644 --- a/tests/integration/test_join.py +++ b/tests/integration/test_join.py @@ -1,6 +1,5 @@ import numpy as np import pandas as pd -import pytest from pandas.testing import assert_frame_equal diff --git a/tests/integration/test_model.py b/tests/integration/test_model.py index 68d926df3..6a3a22dbf 100644 --- a/tests/integration/test_model.py +++ b/tests/integration/test_model.py @@ -255,7 +255,7 @@ def test_replace_and_error(c, training_df): assert c.schema[c.schema_name].models["my_model"][0] == first_mock c.sql( - f""" + """ CREATE OR REPLACE MODEL my_model WITH ( model_class = 'mock.MagicMock', target_column = 'target' @@ -273,7 +273,7 @@ def test_replace_and_error(c, training_df): c.sql("DROP MODEL my_model") c.sql( - f""" + """ CREATE MODEL IF NOT EXISTS my_model WITH ( model_class = 'mock.MagicMock', target_column = 'target' @@ -295,7 +295,7 @@ def test_drop_model(c, training_df): c.sql("DROP MODEL IF EXISTS my_model") c.sql( - f""" + """ CREATE MODEL IF NOT EXISTS my_model WITH ( model_class = 'mock.MagicMock', target_column = 'target' @@ -360,7 +360,7 @@ def test_export_model(c, training_df, tmpdir): ) c.sql( - f""" + """ CREATE MODEL IF NOT EXISTS my_model WITH ( model_class = 'sklearn.ensemble.GradientBoostingClassifier', target_column = 'target' @@ -418,7 +418,7 @@ def test_mlflow_export(c, training_df, tmpdir): mlflow = pytest.importorskip("mlflow", reason="mlflow not installed") c.sql( - f""" + """ CREATE MODEL IF NOT EXISTS my_model WITH ( model_class = 'sklearn.ensemble.GradientBoostingClassifier', target_column = 'target' @@ -446,7 +446,7 @@ def test_mlflow_export(c, training_df, tmpdir): # test for non sklearn compatible model c.sql( - f""" + """ CREATE MODEL IF NOT EXISTS non_sklearn_model WITH ( model_class = 'mock.MagicMock', target_column = 'target' @@ -478,7 +478,7 @@ def test_mlflow_export_xgboost(c, client, training_df, tmpdir): mlflow = pytest.importorskip("mlflow", reason="mlflow not installed") xgboost = pytest.importorskip("xgboost", reason="xgboost not installed") c.sql( - f""" + """ CREATE MODEL IF NOT EXISTS my_model_xgboost WITH ( model_class = 'xgboost.dask.DaskXGBClassifier', target_column = 'target' @@ -509,7 +509,7 @@ def test_mlflow_export_lightgbm(c, training_df, tmpdir): mlflow = pytest.importorskip("mlflow", reason="mlflow not installed") lightgbm = pytest.importorskip("lightgbm", reason="lightgbm not installed") c.sql( - f""" + """ CREATE MODEL IF NOT EXISTS my_model_lightgbm WITH ( model_class = 'lightgbm.LGBMClassifier', target_column = 'target' diff --git a/tests/integration/test_over.py b/tests/integration/test_over.py index 9852be6a3..20fa5b72c 100644 --- a/tests/integration/test_over.py +++ b/tests/integration/test_over.py @@ -1,4 +1,3 @@ -import numpy as np import pandas as pd from pandas.testing import assert_frame_equal diff --git a/tests/integration/test_postgres.py b/tests/integration/test_postgres.py index 0cb62799e..639908ef0 100644 --- a/tests/integration/test_postgres.py +++ b/tests/integration/test_postgres.py @@ -42,7 +42,7 @@ def engine(): f"postgresql+psycopg2://postgres@{address}:{port}/postgres" ) yield engine - except: + except Exception: postgres.kill() network.remove() diff --git a/tests/integration/test_sample.py b/tests/integration/test_sample.py index 239d81feb..889aa420b 100644 --- a/tests/integration/test_sample.py +++ b/tests/integration/test_sample.py @@ -1,6 +1,3 @@ -from pandas.testing import assert_frame_equal - - def test_sample(c, df): # Fixed sample, check absolute numbers return_df = c.sql("SELECT * FROM df TABLESAMPLE SYSTEM (20) REPEATABLE (10)") diff --git a/tests/integration/test_select.py b/tests/integration/test_select.py index 0d457ef6a..4ca56a31e 100644 --- a/tests/integration/test_select.py +++ b/tests/integration/test_select.py @@ -1,4 +1,3 @@ -import dask.dataframe as dd import numpy as np import pandas as pd import pytest diff --git a/tests/integration/test_sort.py b/tests/integration/test_sort.py index fb8af592c..07cf33609 100644 --- a/tests/integration/test_sort.py +++ b/tests/integration/test_sort.py @@ -1,7 +1,7 @@ import dask.dataframe as dd import pandas as pd import pytest -from pandas.testing import assert_frame_equal, assert_series_equal +from pandas.testing import assert_frame_equal from dask_sql.context import Context diff --git a/tests/unit/test_call.py b/tests/unit/test_call.py index b7a88da39..255c465bb 100644 --- a/tests/unit/test_call.py +++ b/tests/unit/test_call.py @@ -68,11 +68,11 @@ def test_is_true(): check_names=False, ) - assert op(1) == True - assert op(0) == False - assert op(None) == False - assert op(np.NaN) == False - assert op(pd.NA) == False + assert op(1) + assert not op(0) + assert not op(None) + assert not op(np.NaN) + assert not op(pd.NA) def test_is_false(): @@ -87,40 +87,40 @@ def test_is_false(): check_names=False, ) - assert op(1) == False - assert op(0) == True - assert op(None) == False - assert op(np.NaN) == False - assert op(pd.NA) == False + assert not op(1) + assert op(0) + assert not op(None) + assert not op(np.NaN) + assert not op(pd.NA) def test_like(): op = call.LikeOperation() - assert op("a string", r"%a%") == True - assert op("another string", r"a%") == True - assert op("another string", r"s%") == False + assert op("a string", r"%a%") + assert op("another string", r"a%") + assert not op("another string", r"s%") op = call.SimilarOperation() - assert op("normal", r"n[a-z]rm_l") == True - assert op("not normal", r"n[a-z]rm_l") == False + assert op("normal", r"n[a-z]rm_l") + assert not op("not normal", r"n[a-z]rm_l") def test_not(): op = call.NotOperation() - assert op(False) == True - assert op(True) == False + assert op(False) + assert not op(True) - assert op(3) == False + assert not op(3) def test_nan(): op = call.IsNullOperation() - assert op(None) == True - assert op(np.NaN) == True - assert op(pd.NA) == True + assert op(None) + assert op(np.NaN) + assert op(pd.NA) assert_series_equal( op(pd.Series(["a", None, "c"])), pd.Series([False, True, False]) ) diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index e2bd0f471..a90f86424 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -4,7 +4,6 @@ import dask.dataframe as dd import pandas as pd import pytest -from pandas.testing import assert_frame_equal from dask_sql import Context