diff --git a/.github/cluster-upstream.yml b/.github/cluster-upstream.yml index e10582aa1..3bebbde0c 100644 --- a/.github/cluster-upstream.yml +++ b/.github/cluster-upstream.yml @@ -3,7 +3,7 @@ version: '3' services: dask-scheduler: container_name: dask-scheduler - image: daskdev/dask:dev + image: daskdev/dask:dev-py3.9 command: dask-scheduler environment: USE_MAMBA: "true" @@ -12,10 +12,10 @@ services: - "8786:8786" dask-worker: container_name: dask-worker - image: daskdev/dask:dev + image: daskdev/dask:dev-py3.9 command: dask-worker dask-scheduler:8786 environment: USE_MAMBA: "true" - EXTRA_CONDA_PACKAGES: "dask/label/dev::dask cloudpickle>=2.1.0 pyarrow>=1.0.0 libstdcxx-ng>=12.1.0" + EXTRA_CONDA_PACKAGES: "dask/label/dev::dask cloudpickle>=2.1.0 pyarrow>=3.0.0 libstdcxx-ng>=12.1.0" volumes: - /tmp:/tmp diff --git a/.github/cluster.yml b/.github/cluster.yml index d59624c7b..da9530ee7 100644 --- a/.github/cluster.yml +++ b/.github/cluster.yml @@ -3,16 +3,16 @@ version: '3' services: dask-scheduler: container_name: dask-scheduler - image: daskdev/dask:dev + image: daskdev/dask:dev-py3.9 command: dask-scheduler ports: - "8786:8786" dask-worker: container_name: dask-worker - image: daskdev/dask:dev + image: daskdev/dask:dev-py3.9 command: dask-worker dask-scheduler:8786 environment: USE_MAMBA: "true" - EXTRA_CONDA_PACKAGES: "cloudpickle>=2.1.0 pyarrow>=1.0.0 libstdcxx-ng>=12.1.0" + EXTRA_CONDA_PACKAGES: "cloudpickle>=2.1.0 pyarrow>=3.0.0 libstdcxx-ng>=12.1.0" volumes: - /tmp:/tmp diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index e2d0577bf..7f41a4d6c 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -111,11 +111,11 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.8" + python-version: "3.9" channel-priority: strict channels: dask/label/dev,conda-forge,nodefaults activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml + environment-file: continuous_integration/environment-3.9-jdk11-dev.yaml - name: Download the pre-build jar uses: actions/download-artifact@v1 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 693471e15..a6f862aed 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -147,11 +147,11 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.8" + python-version: "3.9" channel-priority: strict channels: ${{ needs.detect-ci-trigger.outputs.triggered == 'true' && 'dask/label/dev,conda-forge,nodefaults' || 'conda-forge,nodefaults' }} activate-environment: dask-sql - environment-file: continuous_integration/environment-3.8-jdk11-dev.yaml + environment-file: continuous_integration/environment-3.9-jdk11-dev.yaml - name: Download the pre-build jar uses: actions/download-artifact@v1 with: diff --git a/continuous_integration/environment-3.10-jdk11-dev.yaml b/continuous_integration/environment-3.10-jdk11-dev.yaml index d04d81130..8af02bb32 100644 --- a/continuous_integration/environment-3.10-jdk11-dev.yaml +++ b/continuous_integration/environment-3.10-jdk11-dev.yaml @@ -3,41 +3,41 @@ channels: - conda-forge - nodefaults dependencies: -- adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe -- black=22.3.0 -- ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2022.3.0 -- fastapi>=0.61.1 -- fs>=2.4.11 +- fastapi>=0.69.0 - intake>=0.6.0 -- isort=5.7.0 - jpype1>=1.0.2 - jsonschema -- lightgbm>=3.2.1 -- maven>=3.6.0 -- mlflow>=1.19.0 -- mock>=4.0.3 -- nest-asyncio>=1.4.3 +- lightgbm +- maven +- mlflow +- mock +- nest-asyncio - openjdk=11 -- pandas>=1.0.0 # below 1.0, there were no nullable ext. types -- pip=20.2.4 -- pre-commit>=2.11.1 -- prompt_toolkit>=3.0.8 -- psycopg2>=2.9.1 -- pyarrow>=1.0.0 -- pygments>=2.7.1 -- pyhive>=0.6.4 -- pytest-cov>=2.10.1 +- pandas>=1.1.2 +- pre-commit +- prompt_toolkit +- psycopg2 +- pyarrow>=3.0.0 +- pygments +- pyhive +- pytest-cov - pytest-xdist -- pytest>=6.0.1 +- pytest - python=3.10 -- scikit-learn>=0.24.2 -- sphinx>=3.2.1 -- tpot>=0.11.7 -- triad>=0.5.4 +- scikit-learn>=1.0.0 +- sphinx +- tpot - tzlocal>=2.1 - uvicorn>=0.11.3 +# fugue dependencies; remove when we conda install fugue +- adagio +- antlr4-python3-runtime<4.10 +- ciso8601 +- fs +- pip +- qpd +- triad - pip: - fugue[sql]>=0.5.3 diff --git a/continuous_integration/environment-3.10-jdk8-dev.yaml b/continuous_integration/environment-3.10-jdk8-dev.yaml index 4a56a9ccf..ed83ed51a 100644 --- a/continuous_integration/environment-3.10-jdk8-dev.yaml +++ b/continuous_integration/environment-3.10-jdk8-dev.yaml @@ -3,41 +3,41 @@ channels: - conda-forge - nodefaults dependencies: -- adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe -- black=22.3.0 -- ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2022.3.0 -- fastapi>=0.61.1 -- fs>=2.4.11 +- fastapi>=0.69.0 - intake>=0.6.0 -- isort=5.7.0 - jpype1>=1.0.2 - jsonschema -- lightgbm>=3.2.1 -- maven>=3.6.0 -- mlflow>=1.19.0 -- mock>=4.0.3 -- nest-asyncio>=1.4.3 +- lightgbm +- maven +- mlflow +- mock +- nest-asyncio - openjdk=8 -- pandas>=1.0.0 # below 1.0, there were no nullable ext. types -- pip=20.2.4 -- pre-commit>=2.11.1 -- prompt_toolkit>=3.0.8 -- psycopg2>=2.9.1 -- pyarrow>=1.0.0 -- pygments>=2.7.1 -- pyhive>=0.6.4 -- pytest-cov>=2.10.1 +- pandas>=1.1.2 +- pre-commit +- prompt_toolkit +- psycopg2 +- pyarrow>=3.0.0 +- pygments +- pyhive +- pytest-cov - pytest-xdist -- pytest>=6.0.1 +- pytest - python=3.10 -- scikit-learn>=0.24.2 -- sphinx>=3.2.1 -- tpot>=0.11.7 -- triad>=0.5.4 +- scikit-learn>=1.0.0 +- sphinx +- tpot - tzlocal>=2.1 - uvicorn>=0.11.3 +# fugue dependencies; remove when we conda install fugue +- adagio +- antlr4-python3-runtime<4.10 +- ciso8601 +- fs +- pip +- qpd +- triad - pip: - fugue[sql]>=0.5.3 diff --git a/continuous_integration/environment-3.8-jdk11-dev.yaml b/continuous_integration/environment-3.8-jdk11-dev.yaml index 0e0ef3d7d..b865bb82f 100644 --- a/continuous_integration/environment-3.8-jdk11-dev.yaml +++ b/continuous_integration/environment-3.8-jdk11-dev.yaml @@ -3,41 +3,41 @@ channels: - conda-forge - nodefaults dependencies: -- adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe -- black=22.3.0 -- ciso8601>=2.2.0 -- dask-ml>=2022.1.22 -- dask>=2022.3.0 -- fastapi>=0.61.1 -- fs>=2.4.11 -- intake>=0.6.0 -- isort=5.7.0 -- jpype1>=1.0.2 +- dask-ml=2022.1.22 +- dask=2022.3.0 +- fastapi=0.69.0 +- intake=0.6.0 +- jpype1=1.0.2 - jsonschema -- lightgbm>=3.2.1 -- maven>=3.6.0 -- mlflow>=1.19.0 -- mock>=4.0.3 -- nest-asyncio>=1.4.3 +- lightgbm +- maven +- mlflow +- mock +- nest-asyncio - openjdk=11 -- pandas>=1.0.0 # below 1.0, there were no nullable ext. types -- pip=20.2.4 -- pre-commit>=2.11.1 -- prompt_toolkit>=3.0.8 -- psycopg2>=2.9.1 -- pyarrow>=1.0.0 -- pygments>=2.7.1 -- pyhive>=0.6.4 -- pytest-cov>=2.10.1 +- pandas=1.1.2 +- pre-commit +- prompt_toolkit +- psycopg2 +- pyarrow=3.0.0 +- pygments +- pyhive +- pytest-cov - pytest-xdist -- pytest>=6.0.1 +- pytest - python=3.8 -- scikit-learn>=0.24.2 -- sphinx>=3.2.1 -- tpot>=0.11.7 -- triad>=0.5.4 -- tzlocal>=2.1 -- uvicorn>=0.11.3 +- scikit-learn=1.0.0 +- sphinx +- tpot +- tzlocal=2.1 +- uvicorn=0.11.3 +# fugue dependencies; remove when we conda install fugue +- adagio +- antlr4-python3-runtime<4.10 +- ciso8601 +- fs +- pip +- qpd +- triad - pip: - - fugue[sql]>=0.5.3 + - fugue[sql]==0.5.3 diff --git a/continuous_integration/environment-3.8-jdk8-dev.yaml b/continuous_integration/environment-3.8-jdk8-dev.yaml index 9f225bed5..b143fcc6c 100644 --- a/continuous_integration/environment-3.8-jdk8-dev.yaml +++ b/continuous_integration/environment-3.8-jdk8-dev.yaml @@ -3,41 +3,41 @@ channels: - conda-forge - nodefaults dependencies: -- adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe -- black=22.3.0 -- ciso8601>=2.2.0 -- dask-ml>=2022.1.22 -- dask>=2022.3.0 -- fastapi>=0.61.1 -- fs>=2.4.11 -- intake>=0.6.0 -- isort=5.7.0 -- jpype1>=1.0.2 +- dask-ml=2022.1.22 +- dask=2022.3.0 +- fastapi=0.69.0 +- intake=0.6.0 +- jpype1=1.0.2 - jsonschema -- lightgbm>=3.2.1 -- maven>=3.6.0 -- mlflow>=1.19.0 -- mock>=4.0.3 -- nest-asyncio>=1.4.3 +- lightgbm +- maven +- mlflow +- mock +- nest-asyncio - openjdk=8 -- pandas>=1.0.0 # below 1.0, there were no nullable ext. types -- pip=20.2.4 -- pre-commit>=2.11.1 -- prompt_toolkit>=3.0.8 -- psycopg2>=2.9.1 -- pyarrow>=1.0.0 -- pygments>=2.7.1 -- pyhive>=0.6.4 -- pytest-cov>=2.10.1 +- pandas=1.1.2 +- pre-commit +- prompt_toolkit +- psycopg2 +- pyarrow=3.0.0 +- pygments +- pyhive +- pytest-cov - pytest-xdist -- pytest>=6.0.1 +- pytest - python=3.8 -- scikit-learn>=0.24.2 -- sphinx>=3.2.1 -- tpot>=0.11.7 -- triad>=0.5.4 -- tzlocal>=2.1 -- uvicorn>=0.11.3 +- scikit-learn=1.0.0 +- sphinx +- tpot +- tzlocal=2.1 +- uvicorn=0.11.3 +# fugue dependencies; remove when we conda install fugue +- adagio +- antlr4-python3-runtime<4.10 +- ciso8601 +- fs +- pip +- qpd +- triad - pip: - - fugue[sql]>=0.5.3 + - fugue[sql]==0.5.3 diff --git a/continuous_integration/environment-3.9-jdk11-dev.yaml b/continuous_integration/environment-3.9-jdk11-dev.yaml index 1c368fbbd..1e4430cae 100644 --- a/continuous_integration/environment-3.9-jdk11-dev.yaml +++ b/continuous_integration/environment-3.9-jdk11-dev.yaml @@ -3,42 +3,41 @@ channels: - conda-forge - nodefaults dependencies: -- adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe -- black=22.3.0 -- ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2022.3.0 -- fastapi>=0.61.1 -- fs>=2.4.11 +- fastapi>=0.69.0 - intake>=0.6.0 -- isort=5.7.0 - jpype1>=1.0.2 - jsonschema -- lightgbm>=3.2.1 -- maven>=3.6.0 -- mlflow>=1.19.0 -- mock>=4.0.3 -- nest-asyncio>=1.4.3 +- lightgbm +- maven +- mlflow +- mock +- nest-asyncio - openjdk=11 -- pandas>=1.0.0 # below 1.0, there were no nullable ext. types -- pip=20.2.4 -- pre-commit>=2.11.1 -- prompt_toolkit>=3.0.8 -- psycopg2>=2.9.1 -- pyarrow>=1.0.0 -- pygments>=2.7.1 -- pyhive>=0.6.4 -- pytest-cov>=2.10.1 +- pandas>=1.1.2 +- pre-commit +- prompt_toolkit +- psycopg2 +- pyarrow>=3.0.0 +- pygments +- pyhive +- pytest-cov - pytest-xdist -- pytest>=6.0.1 +- pytest - python=3.9 -- scikit-learn>=0.24.2 -- sphinx>=3.2.1 -- tpot>=0.11.7 -- triad>=0.5.4 +- scikit-learn>=1.0.0 +- sphinx +- tpot - tzlocal>=2.1 - uvicorn>=0.11.3 -- cfn-lint>=0.4.0 +# fugue dependencies; remove when we conda install fugue +- adagio +- antlr4-python3-runtime<4.10 +- ciso8601 +- fs +- pip +- qpd +- triad - pip: - fugue[sql]>=0.5.3 diff --git a/continuous_integration/environment-3.9-jdk8-dev.yaml b/continuous_integration/environment-3.9-jdk8-dev.yaml index c0dbe1cae..314aaadd0 100644 --- a/continuous_integration/environment-3.9-jdk8-dev.yaml +++ b/continuous_integration/environment-3.9-jdk8-dev.yaml @@ -3,41 +3,41 @@ channels: - conda-forge - nodefaults dependencies: -- adagio>=0.2.3 -- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe -- black=22.3.0 -- ciso8601>=2.2.0 - dask-ml>=2022.1.22 - dask>=2022.3.0 -- fastapi>=0.61.1 -- fs>=2.4.11 +- fastapi>=0.69.0 - intake>=0.6.0 -- isort=5.7.0 - jpype1>=1.0.2 - jsonschema -- lightgbm>=3.2.1 -- maven>=3.6.0 -- mlflow>=1.19.0 -- mock>=4.0.3 -- nest-asyncio>=1.4.3 +- lightgbm +- maven +- mlflow +- mock +- nest-asyncio - openjdk=8 -- pandas>=1.0.0 # below 1.0, there were no nullable ext. types -- pip=20.2.4 -- pre-commit>=2.11.1 -- prompt_toolkit>=3.0.8 -- psycopg2>=2.9.1 -- pyarrow>=1.0.0 -- pygments>=2.7.1 -- pyhive>=0.6.4 -- pytest-cov>=2.10.1 +- pandas>=1.1.2 +- pre-commit +- prompt_toolkit +- psycopg2 +- pyarrow>=3.0.0 +- pygments +- pyhive +- pytest-cov - pytest-xdist -- pytest>=6.0.1 +- pytest - python=3.9 -- scikit-learn>=0.24.2 -- sphinx>=3.2.1 -- tpot>=0.11.7 -- triad>=0.5.4 +- scikit-learn>=1.0.0 +- sphinx +- tpot - tzlocal>=2.1 - uvicorn>=0.11.3 +# fugue dependencies; remove when we conda install fugue +- adagio +- antlr4-python3-runtime<4.10 +- ciso8601 +- fs +- pip +- qpd +- triad - pip: - fugue[sql]>=0.5.3 diff --git a/continuous_integration/recipe/meta.yaml b/continuous_integration/recipe/meta.yaml index b2a40190a..cd5abd580 100644 --- a/continuous_integration/recipe/meta.yaml +++ b/continuous_integration/recipe/meta.yaml @@ -29,10 +29,10 @@ requirements: run: - python - dask >=2022.3.0 - - pandas >=1.0.0 + - pandas >=1.1.2 - jpype1 >=1.0.2 - openjdk >=8 - - fastapi >=0.61.1 + - fastapi >=0.69.0 - uvicorn >=0.11.3 - tzlocal >=2.1 - prompt-toolkit diff --git a/dask_sql/physical/rel/logical/aggregate.py b/dask_sql/physical/rel/logical/aggregate.py index 0dc3bc926..5cad4476c 100644 --- a/dask_sql/physical/rel/logical/aggregate.py +++ b/dask_sql/physical/rel/logical/aggregate.py @@ -81,11 +81,8 @@ def get_supported_aggregation(self, series): if "cudf" in str(series._partition_type): return built_in_aggregation - # With pandas StringDtype built-in aggregations work - # while with pandas ObjectDtype and Nulls built-in aggregations fail - if isinstance(series, dd.Series) and isinstance( - series.dtype, pd.StringDtype - ): + # with pandas StringDtype built-in aggregations work + if isinstance(series.dtype, pd.StringDtype): return built_in_aggregation return self.custom_aggregation @@ -375,6 +372,7 @@ def _perform_aggregation( # format aggregations for Dask; also check if we can use fast path for # groupby, which is only supported if we are not using any custom aggregations + # and our pandas version support dropna for groupbys aggregations_dict = defaultdict(dict) fast_groupby = True for aggregation in aggregations: diff --git a/docker/conda.txt b/docker/conda.txt index e40d1069d..79477e40d 100644 --- a/docker/conda.txt +++ b/docker/conda.txt @@ -1,6 +1,6 @@ python>=3.8 dask>=2022.3.0 -pandas>=1.0.0 # below 1.0, there were no nullable ext. types +pandas>=1.1.2 jpype1>=1.0.2 openjdk>=8 maven>=3.6.0 @@ -10,14 +10,14 @@ pytest-xdist mock>=4.0.3 sphinx>=3.2.1 tzlocal>=2.1 -fastapi>=0.61.1 +fastapi>=0.69.0 nest-asyncio>=1.4.3 uvicorn>=0.11.3 -pyarrow>=1.0.0 +pyarrow>=3.0.0 prompt_toolkit>=3.0.8 pygments>=2.7.1 dask-ml>=2022.1.22 -scikit-learn>=0.24.2 +scikit-learn>=1.0.0 intake>=0.6.0 pre-commit>=2.11.1 black=22.3.0 diff --git a/docker/main.dockerfile b/docker/main.dockerfile index 848948dd5..cae7fb96e 100644 --- a/docker/main.dockerfile +++ b/docker/main.dockerfile @@ -11,13 +11,13 @@ RUN conda config --add channels conda-forge \ "openjdk>=11" \ "maven>=3.6.0" \ "tzlocal>=2.1" \ - "fastapi>=0.61.1" \ + "fastapi>=0.69.0" \ "uvicorn>=0.11.3" \ - "pyarrow>=1.0.0" \ + "pyarrow>=3.0.0" \ "prompt_toolkit>=3.0.8" \ "pygments>=2.7.1" \ "dask-ml>=2022.1.22" \ - "scikit-learn>=0.24.2" \ + "scikit-learn>=1.0.0" \ "intake>=0.6.0" \ && conda clean -ay diff --git a/docs/environment.yml b/docs/environment.yml index ffd29270b..f0b4eafac 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -9,10 +9,10 @@ dependencies: - dask-sphinx-theme>=2.0.3 - maven>=3.6.0 - dask>=2022.3.0 - - pandas>=1.0.0 + - pandas>=1.1.2 - fugue>=0.5.3 - jpype1>=1.0.2 - - fastapi>=0.61.1 + - fastapi>=0.69.0 - uvicorn>=0.11.3 - tzlocal>=2.1 - prompt_toolkit diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt index 3402cca82..3c6c7051c 100644 --- a/docs/requirements-docs.txt +++ b/docs/requirements-docs.txt @@ -2,10 +2,10 @@ sphinx>=4.0.0 sphinx-tabs dask-sphinx-theme>=3.0.0 dask>=2022.3.0 -pandas>=1.0.0 +pandas>=1.1.2 fugue>=0.5.3 jpype1>=1.0.2 -fastapi>=0.61.1 +fastapi>=0.69.0 uvicorn>=0.11.3 tzlocal>=2.1 prompt_toolkit diff --git a/setup.py b/setup.py index be50794e1..e2bc1d540 100755 --- a/setup.py +++ b/setup.py @@ -88,9 +88,9 @@ def build(self): setup_requires=sphinx_requirements, install_requires=[ "dask[dataframe,distributed]>=2022.3.0", - "pandas>=1.0.0", # below 1.0, there were no nullable ext. types + "pandas>=1.1.2", "jpype1>=1.0.2", - "fastapi>=0.61.1", + "fastapi>=0.69.0", "uvicorn>=0.11.3", "tzlocal>=2.1", "prompt_toolkit", @@ -104,9 +104,9 @@ def build(self): "pytest-cov>=2.10.1", "mock>=4.0.3", "sphinx>=3.2.1", - "pyarrow>=1.0.0", + "pyarrow>=3.0.0", "dask-ml>=2022.1.22", - "scikit-learn>=0.24.2", + "scikit-learn>=1.0.0", "intake>=0.6.0", "pre-commit", "black==22.3.0", diff --git a/tests/integration/test_analyze.py b/tests/integration/test_analyze.py index cd51ce1b8..d3b4bd08d 100644 --- a/tests/integration/test_analyze.py +++ b/tests/integration/test_analyze.py @@ -12,11 +12,12 @@ def test_analyze(c, df): 700.0, df.a.mean(), df.a.std(), - 1.0, + df.a.min(), + # Dask's approx quantiles do not match up with pandas and must be specified explicitly + 2.0, 2.0, - 2.0, # incorrect, but what Dask gives for approx quantile - 3.0, 3.0, + df.a.max(), "double", "a", ], @@ -25,9 +26,10 @@ def test_analyze(c, df): df.b.mean(), df.b.std(), df.b.min(), - df.b.quantile(0.25), - df.b.quantile(0.5), - df.b.quantile(0.75), + # Dask's approx quantiles do not match up with pandas and must be specified explicitly + 2.73108, + 5.20286, + 7.60595, df.b.max(), "double", "b", @@ -47,9 +49,8 @@ def test_analyze(c, df): ], ) - # The percentiles are calculated only approximately, therefore we do not use exact matching - assert_eq(result_df, expected_df, rtol=0.135) + assert_eq(result_df, expected_df) result_df = c.sql("ANALYZE TABLE df COMPUTE STATISTICS FOR COLUMNS a") - assert_eq(result_df, expected_df[["a"]], rtol=0.135) + assert_eq(result_df, expected_df[["a"]]) diff --git a/tests/integration/test_compatibility.py b/tests/integration/test_compatibility.py index 63c1668b2..ec2949229 100644 --- a/tests/integration/test_compatibility.py +++ b/tests/integration/test_compatibility.py @@ -22,8 +22,11 @@ def cast_datetime_to_string(df): cols = df.select_dtypes(include=["datetime64[ns]"]).columns.tolist() - # Casting to object first as - # directly converting to string looses second precision + + if not cols: + return df + + # Casting directly to string loses second precision df[cols] = df[cols].astype("object").astype("string") return df diff --git a/tests/integration/test_groupby.py b/tests/integration/test_groupby.py index 658ad4fa2..309d25b60 100644 --- a/tests/integration/test_groupby.py +++ b/tests/integration/test_groupby.py @@ -152,13 +152,8 @@ def test_group_by_nan(c): ) expected_df = pd.DataFrame({"c": [3, float("nan"), 1]}) - # The dtype in pandas 1.0.5 and pandas 1.1.0 are different, so - # we cannot check here - assert_eq( - return_df.sort_values("c").reset_index(drop=True), - expected_df.sort_values("c").reset_index(drop=True), - check_dtype=False, - ) + # we return nullable int dtype instead of float + assert_eq(return_df, expected_df, check_dtype=False) return_df = c.sql( """ diff --git a/tests/integration/test_jdbc.py b/tests/integration/test_jdbc.py index ce216da84..62ce7d772 100644 --- a/tests/integration/test_jdbc.py +++ b/tests/integration/test_jdbc.py @@ -100,89 +100,17 @@ def test_jdbc_has_columns(app_client, c): data=f"SELECT * from system.jdbc.columns where TABLE_NAME = '{table}'", ) assert response.status_code == 200 - result = get_result_or_error(app_client, response) + client_result = get_result_or_error(app_client, response) - assert_result(result, 24, 3) - assert result["data"] == [ - [ - "", - "a_schema", - "a_table", - "A_STR", - "VARCHAR", - "VARCHAR", - "", - "", - "", - "", - "", - "", - "", - "VARCHAR", - "", - "", - "1", - "", - "", - "", - "", - "", - "", - "", - ], - [ - "", - "a_schema", - "a_table", - "AN_INT", - "INTEGER", - "INTEGER", - "", - "", - "", - "", - "", - "", - "", - "INTEGER", - "", - "", - "2", - "", - "", - "", - "", - "", - "", - "", - ], - [ - "", - "a_schema", - "a_table", - "A_FLOAT", - "FLOAT", - "FLOAT", - "", - "", - "", - "", - "", - "", - "", - "FLOAT", - "", - "", - "3", - "", - "", - "", - "", - "", - "", - "", - ], - ] + # ordering of rows isn't consistent between fastapi versions + context_result = ( + c.sql("SELECT * FROM system_jdbc.columns WHERE TABLE_NAME = 'a_table'") + .compute() + .values.tolist() + ) + + assert_result(client_result, 24, 3) + assert client_result["data"] == context_result def assert_result(result, col_len, data_len): diff --git a/tests/integration/test_select.py b/tests/integration/test_select.py index 6f93692f9..f5c4b7911 100644 --- a/tests/integration/test_select.py +++ b/tests/integration/test_select.py @@ -204,6 +204,7 @@ def test_multi_case_when(c): FROM df """ ) - expected_df = pd.DataFrame({"C": [0, 1, 1, 1, 0]}, dtype=np.int32) + expected_df = pd.DataFrame({"C": [0, 1, 1, 1, 0]}) - assert_eq(actual_df, expected_df) + # dtype varies between int32/int64 depending on pandas version + assert_eq(actual_df, expected_df, check_dtype=False)