From 95461c34b562269ecd4ac65a8634ea6d23b97b4d Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 10 May 2023 13:48:55 -0700 Subject: [PATCH 01/30] Initial python version bump in CI --- .github/workflows/conda.yml | 2 +- .github/workflows/release.yml | 4 +- .github/workflows/test-upstream.yml | 8 ++-- .github/workflows/test.yml | 8 ++-- .../environment-3.10-dev.yaml | 38 ------------------- .../environment-3.8-dev.yaml | 37 ------------------ .../environment-3.9-dev.yaml | 38 ------------------- .../gpuci/environment-3.9.yaml | 27 +++++++------ .../recipe/conda_build_config.yaml | 2 +- dask_planner/pyproject.toml | 2 +- docker/conda.txt | 2 +- setup.py | 2 +- tests/integration/test_fugue.py | 3 +- 13 files changed, 30 insertions(+), 143 deletions(-) delete mode 100644 continuous_integration/environment-3.10-dev.yaml delete mode 100644 continuous_integration/environment-3.8-dev.yaml delete mode 100644 continuous_integration/environment-3.9-dev.yaml diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 4836ef2ed..9f4f9cc12 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -40,7 +40,7 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.8" + python-version: "3.9" channel-priority: strict - name: Install dependencies run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 631e73bd5..6762d5abc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -54,7 +54,7 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.8" + python-version: "3.9" channel-priority: strict - name: Check dist files run: | @@ -84,7 +84,7 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.8" + python-version: "3.9" channel-priority: strict - name: Build source distribution run: | diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index ff0296b15..df7ac79ff 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -42,15 +42,15 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.9", "3.10", "3.11"] distributed: [false] include: # run tests on a distributed client - os: "ubuntu-latest" - python: "3.8" + python: "3.9" distributed: true - os: "ubuntu-latest" - python: "3.10" + python: "3.11" distributed: true steps: - uses: actions/checkout@v3 @@ -110,7 +110,7 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.8" + python-version: "3.9" channel-priority: strict - name: Install Protoc uses: arduino/setup-protoc@v1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index be2d98126..e96b2da77 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,15 +43,15 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.9", "3.10", "3.11"] distributed: [false] include: # run tests on a distributed client - os: "ubuntu-latest" - python: "3.8" + python: "3.9" distributed: true - os: "ubuntu-latest" - python: "3.10" + python: "3.11" distributed: true steps: - uses: actions/checkout@v3 @@ -107,7 +107,7 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.8" + python-version: "3.9" channel-priority: strict - name: Install Protoc uses: arduino/setup-protoc@v1 diff --git a/continuous_integration/environment-3.10-dev.yaml b/continuous_integration/environment-3.10-dev.yaml deleted file mode 100644 index 33335cccc..000000000 --- a/continuous_integration/environment-3.10-dev.yaml +++ /dev/null @@ -1,38 +0,0 @@ -name: dask-sql -channels: -- conda-forge -- nodefaults -dependencies: -- dask>=2022.3.0 -# FIXME: handling is needed for httpx-based fastapi>=0.87.0 -- fastapi>=0.69.0,<0.87.0 -- fugue>=0.7.3 -- intake>=0.6.0 -- jsonschema -- lightgbm -- maturin>=0.12.8 -- mlflow -- mock -# tpot imports fail with numpy >=1.24.0 -# https://github.com/EpistasisLab/tpot/issues/1281 -- numpy<1.24.0 -- pandas>=1.4.0 -- pre-commit -- prompt_toolkit>=3.0.8 -- psycopg2 -- pyarrow>=6.0.1 -- pygments>=2.7.1 -- pyhive -- pytest-cov -- pytest-rerunfailures -- pytest-xdist -- pytest -- python=3.10 -- scikit-learn>=1.0.0 -- setuptools-rust>=1.5.2 -- sphinx -- sqlalchemy<2 -- tpot -- tzlocal>=2.1 -- uvicorn>=0.13.4 -- libprotobuf=3 diff --git a/continuous_integration/environment-3.8-dev.yaml b/continuous_integration/environment-3.8-dev.yaml deleted file mode 100644 index 758492150..000000000 --- a/continuous_integration/environment-3.8-dev.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: dask-sql -channels: -- conda-forge -- nodefaults -dependencies: -- dask=2022.3.0 -- fastapi=0.69.0 -- fugue=0.7.3 -- intake=0.6.0 -- jsonschema -- lightgbm -- maturin=0.12.8 -- mlflow -- mock -# tpot imports fail with numpy >=1.24.0 -# https://github.com/EpistasisLab/tpot/issues/1281 -- numpy<1.24.0 -- pandas=1.4.0 -- pre-commit -- prompt_toolkit=3.0.8 -- psycopg2 -- pyarrow=6.0.1 -- pygments=2.7.1 -- pyhive -- pytest-cov -- pytest-rerunfailures -- pytest-xdist -- pytest -- python=3.8 -- scikit-learn=1.0.0 -- setuptools-rust=1.5.2 -- sphinx -- sqlalchemy<2 -- tpot -- tzlocal=2.1 -- uvicorn=0.13.4 -- libprotobuf=3 diff --git a/continuous_integration/environment-3.9-dev.yaml b/continuous_integration/environment-3.9-dev.yaml deleted file mode 100644 index d8a9ccb0e..000000000 --- a/continuous_integration/environment-3.9-dev.yaml +++ /dev/null @@ -1,38 +0,0 @@ -name: dask-sql -channels: -- conda-forge -- nodefaults -dependencies: -- dask>=2022.3.0 -# FIXME: handling is needed for httpx-based fastapi>=0.87.0 -- fastapi>=0.69.0,<0.87.0 -- fugue>=0.7.3 -- intake>=0.6.0 -- jsonschema -- lightgbm -- maturin>=0.12.8 -- mlflow -- mock -# tpot imports fail with numpy >=1.24.0 -# https://github.com/EpistasisLab/tpot/issues/1281 -- numpy<1.24.0 -- pandas>=1.4.0 -- pre-commit -- prompt_toolkit>=3.0.8 -- psycopg2 -- pyarrow>=6.0.1 -- pygments>=2.7.1 -- pyhive -- pytest-cov -- pytest-rerunfailures -- pytest-xdist -- pytest -- python=3.9 -- scikit-learn>=1.0.0 -- setuptools-rust>=1.5.2 -- sphinx -- sqlalchemy<2 -- tpot -- tzlocal>=2.1 -- uvicorn>=0.13.4 -- libprotobuf=3 diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 4f4972c36..a6f303b54 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -6,35 +6,34 @@ channels: - conda-forge - nodefaults dependencies: -- dask>=2022.3.0 -# FIXME: handling is needed for httpx-based fastapi>=0.87.0 -- fastapi>=0.69.0,<0.87.0 -- fugue>=0.7.3 -- intake>=0.6.0 +- dask=2022.3.0 +- fastapi=0.69.0 +- fugue=0.7.3 +- intake=0.6.0 - jsonschema - lightgbm -- maturin>=0.12.8 +- maturin=0.12.8 - mlflow - mock -- pandas>=1.4.0 +- pandas=1.4.0 - pre-commit -- prompt_toolkit>=3.0.8 +- prompt_toolkit=3.0.8 - psycopg2 -- pyarrow>=6.0.1 -- pygments>=2.7.1 +- pyarrow=6.0.1 +- pygments=2.7.1 - pyhive - pytest-cov - pytest-rerunfailures - pytest-xdist - pytest - python=3.9 -- scikit-learn>=1.0.0 -- setuptools-rust>=1.5.2 +- scikit-learn=1.0.0 +- setuptools-rust=1.5.2 - sphinx - sqlalchemy<2 - tpot -- tzlocal>=2.1 -- uvicorn>=0.13.4 +- tzlocal=2.1 +- uvicorn=0.13.4 # GPU-specific requirements - cudatoolkit=11.5 - cudf=23.06 diff --git a/continuous_integration/recipe/conda_build_config.yaml b/continuous_integration/recipe/conda_build_config.yaml index 230dafb8f..6fc26542a 100644 --- a/continuous_integration/recipe/conda_build_config.yaml +++ b/continuous_integration/recipe/conda_build_config.yaml @@ -1,4 +1,4 @@ python: - - 3.8 - 3.9 - 3.10 + - 3.11 diff --git a/dask_planner/pyproject.toml b/dask_planner/pyproject.toml index f153e3f5a..1fa5119d1 100644 --- a/dask_planner/pyproject.toml +++ b/dask_planner/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel", "setuptools-rust"] [project] name = "datafusion_planner" -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", diff --git a/docker/conda.txt b/docker/conda.txt index d24d217aa..174991243 100644 --- a/docker/conda.txt +++ b/docker/conda.txt @@ -1,4 +1,4 @@ -python>=3.8 +python>=3.9 dask>=2022.3.0 pandas>=1.4.0 jpype1>=1.0.2 diff --git a/setup.py b/setup.py index d149ac5f0..0109a8714 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ debug=debug_build, ) ], - python_requires=">=3.8", + python_requires=">=3.9", setup_requires=sphinx_requirements, install_requires=[ "dask[dataframe]>=2022.3.0", diff --git a/tests/integration/test_fugue.py b/tests/integration/test_fugue.py index 7faf17ce1..a8c22aeb7 100644 --- a/tests/integration/test_fugue.py +++ b/tests/integration/test_fugue.py @@ -40,7 +40,8 @@ def test_fugue_fsql(client): assert_eq(return_df, pd.DataFrame({"a": [1], "b": ["world"]})) -@pytest.mark.flaky(reruns=4, condition="sys.version_info < (3, 9)") +# TODO: uncomment if flaky failures persist on python 3.9 +# @pytest.mark.flaky(reruns=4, condition="sys.version_info < (3, 9)") def test_dask_fsql(client): def assert_fsql(df: pd.DataFrame) -> None: assert_eq(df, pd.DataFrame({"a": [1]})) From 5060ae489b1289cc1f7ede95ba1b36f6766f692f Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 10 May 2023 13:52:56 -0700 Subject: [PATCH 02/30] Add libprotobuf to GPU CI environments --- continuous_integration/gpuci/environment-3.10.yaml | 1 + continuous_integration/gpuci/environment-3.9.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index dfd50c7b0..ebb54cd7d 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -12,6 +12,7 @@ dependencies: - fugue>=0.7.3 - intake>=0.6.0 - jsonschema +- libprotobuf=3 - lightgbm - maturin>=0.12.8 - mlflow diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index a6f303b54..e593ac59b 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -11,6 +11,7 @@ dependencies: - fugue=0.7.3 - intake=0.6.0 - jsonschema +- libprotobuf=3 - lightgbm - maturin=0.12.8 - mlflow From eb0d13b9a984fc3a6624367415962edcf3034377 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 10 May 2023 13:54:12 -0700 Subject: [PATCH 03/30] Replace mentions of old env files --- .github/workflows/test-upstream.yml | 2 +- .github/workflows/test.yml | 2 +- CONTRIBUTING.md | 2 +- README.md | 2 +- continuous_integration/environment-3.10.yaml | 38 ++++++++++++++++++++ continuous_integration/environment-3.11.yaml | 38 ++++++++++++++++++++ continuous_integration/environment-3.9.yaml | 37 +++++++++++++++++++ docs/source/installation.rst | 2 +- 8 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 continuous_integration/environment-3.10.yaml create mode 100644 continuous_integration/environment-3.11.yaml create mode 100644 continuous_integration/environment-3.9.yaml diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index df7ac79ff..a07761c76 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -36,7 +36,7 @@ jobs: name: "Test upstream dev (${{ matrix.os }}, python: ${{ matrix.python }}, distributed: ${{ matrix.distributed }})" runs-on: ${{ matrix.os }} env: - CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml + CONDA_FILE: continuous_integration/environment-${{ matrix.python }}.yaml DASK_SQL_DISTRIBUTED_TESTS: ${{ matrix.distributed }} strategy: fail-fast: false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e96b2da77..e3db94c6d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -37,7 +37,7 @@ jobs: needs: [detect-ci-trigger] runs-on: ${{ matrix.os }} env: - CONDA_FILE: continuous_integration/environment-${{ matrix.python }}-dev.yaml + CONDA_FILE: continuous_integration/environment-${{ matrix.python }}.yaml DASK_SQL_DISTRIBUTED_TESTS: ${{ matrix.distributed }} strategy: fail-fast: false diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9ab31230f..8ca9ae32a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,7 +19,7 @@ rustup update To initialize and activate the conda environment for a given Python version: ``` -conda env create -f dask-sql/continuous_integration/environment-{$PYTHON_VER}-dev.yaml +conda env create -f dask-sql/continuous_integration/environment-{$PYTHON_VER}.yaml conda activate dask-sql ``` diff --git a/README.md b/README.md index e978fadf8..3a034b9ff 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ If you want to have the newest (unreleased) `dask-sql` version or if you plan to Create a new conda environment and install the development environment: - conda env create -f continuous_integration/environment-3.9-dev.yaml + conda env create -f continuous_integration/environment-3.9.yaml It is not recommended to use `pip` instead of `conda` for the environment setup. diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml new file mode 100644 index 000000000..4b4dd5365 --- /dev/null +++ b/continuous_integration/environment-3.10.yaml @@ -0,0 +1,38 @@ +name: dask-sql +channels: +- conda-forge +- nodefaults +dependencies: +- dask>=2022.3.0 +# FIXME: handling is needed for httpx-based fastapi>=0.87.0 +- fastapi>=0.69.0,<0.87.0 +- fugue>=0.7.3 +- intake>=0.6.0 +- jsonschema +- libprotobuf=3 +- lightgbm +- maturin>=0.12.8 +- mlflow +- mock +# tpot imports fail with numpy >=1.24.0 +# https://github.com/EpistasisLab/tpot/issues/1281 +- numpy<1.24.0 +- pandas>=1.4.0 +- pre-commit +- prompt_toolkit>=3.0.8 +- psycopg2 +- pyarrow>=6.0.1 +- pygments>=2.7.1 +- pyhive +- pytest-cov +- pytest-rerunfailures +- pytest-xdist +- pytest +- python=3.10 +- scikit-learn>=1.0.0 +- setuptools-rust>=1.5.2 +- sphinx +- sqlalchemy<2 +- tpot +- tzlocal>=2.1 +- uvicorn>=0.13.4 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml new file mode 100644 index 000000000..eac3529ba --- /dev/null +++ b/continuous_integration/environment-3.11.yaml @@ -0,0 +1,38 @@ +name: dask-sql +channels: +- conda-forge +- nodefaults +dependencies: +- dask>=2022.3.0 +# FIXME: handling is needed for httpx-based fastapi>=0.87.0 +- fastapi>=0.69.0,<0.87.0 +- fugue>=0.7.3 +- intake>=0.6.0 +- jsonschema +- libprotobuf=3 +- lightgbm +- maturin>=0.12.8 +- mlflow +- mock +# tpot imports fail with numpy >=1.24.0 +# https://github.com/EpistasisLab/tpot/issues/1281 +- numpy<1.24.0 +- pandas>=1.4.0 +- pre-commit +- prompt_toolkit>=3.0.8 +- psycopg2 +- pyarrow>=6.0.1 +- pygments>=2.7.1 +- pyhive +- pytest-cov +- pytest-rerunfailures +- pytest-xdist +- pytest +- python=3.11 +- scikit-learn>=1.0.0 +- setuptools-rust>=1.5.2 +- sphinx +- sqlalchemy<2 +- tpot +- tzlocal>=2.1 +- uvicorn>=0.13.4 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml new file mode 100644 index 000000000..e23505a98 --- /dev/null +++ b/continuous_integration/environment-3.9.yaml @@ -0,0 +1,37 @@ +name: dask-sql +channels: +- conda-forge +- nodefaults +dependencies: +- dask=2022.3.0 +- fastapi=0.69.0 +- fugue=0.7.3 +- intake=0.6.0 +- jsonschema +- libprotobuf=3 +- lightgbm +- maturin=0.12.8 +- mlflow +- mock +# tpot imports fail with numpy >=1.24.0 +# https://github.com/EpistasisLab/tpot/issues/1281 +- numpy<1.24.0 +- pandas=1.4.0 +- pre-commit +- prompt_toolkit=3.0.8 +- psycopg2 +- pyarrow=6.0.1 +- pygments=2.7.1 +- pyhive +- pytest-cov +- pytest-rerunfailures +- pytest-xdist +- pytest +- python=3.9 +- scikit-learn=1.0.0 +- setuptools-rust=1.5.2 +- sphinx +- sqlalchemy<2 +- tpot +- tzlocal=2.1 +- uvicorn=0.13.4 diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 71ce17959..2ca0e99c5 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -74,7 +74,7 @@ Create a new conda environment and install the development environment: .. code-block:: bash - conda env create -f continuous_integration/environment-3.9-dev.yaml + conda env create -f continuous_integration/environment-3.9.yaml It is not recommended to use ``pip`` instead of ``conda``. From e4548339f7284915ed6a5eb79121ed6a2ae260d3 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 11 May 2023 11:27:52 -0400 Subject: [PATCH 04/30] Remove strict channel priority to try to unblock env solves? --- .github/workflows/test-upstream.yml | 4 +--- .github/workflows/test.yml | 4 +--- continuous_integration/environment-3.10.yaml | 1 - continuous_integration/environment-3.11.yaml | 3 +-- continuous_integration/environment-3.9.yaml | 1 - continuous_integration/gpuci/environment-3.10.yaml | 1 - continuous_integration/gpuci/environment-3.9.yaml | 1 - docs/environment.yml | 1 - 8 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index a07761c76..25b19314b 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -62,7 +62,6 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: ${{ matrix.python }} - channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} - name: Optionally update upstream cargo dependencies @@ -82,7 +81,7 @@ jobs: - name: Install upstream dev Dask if: env.which_upstream == 'Dask' run: | - mamba install --no-channel-priority dask/label/dev::dask + mamba install dask/label/dev::dask - name: Install pytest-reportlog run: | # TODO: add pytest-reportlog to testing environments if we move over to JSONL output @@ -111,7 +110,6 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: "3.9" - channel-priority: strict - name: Install Protoc uses: arduino/setup-protoc@v1 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e3db94c6d..fd448072a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -61,7 +61,6 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: ${{ matrix.python }} - channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} run-post: ${{ matrix.os != 'windows-latest' && 'true' || 'false' }} @@ -82,7 +81,7 @@ jobs: - name: Optionally install upstream dev Dask if: needs.detect-ci-trigger.outputs.triggered == 'true' run: | - mamba install --no-channel-priority dask/label/dev::dask + mamba install dask/label/dev::dask - name: Test with pytest run: | pytest --junitxml=junit/test-results.xml --cov-report=xml -n auto tests --dist loadfile @@ -108,7 +107,6 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: "3.9" - channel-priority: strict - name: Install Protoc uses: arduino/setup-protoc@v1 with: diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index a076caabe..59b1f7691 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -1,7 +1,6 @@ name: dask-sql channels: - conda-forge -- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index e1472caef..da60c012a 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -1,7 +1,6 @@ -name: dask-sql +name: dask-sql-py311 channels: - conda-forge -- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index 1f45af484..227f7d1ef 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -1,7 +1,6 @@ name: dask-sql channels: - conda-forge -- nodefaults dependencies: - c-compiler - dask=2022.3.0 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index 97ad8cfd3..7ec13cdc4 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -4,7 +4,6 @@ channels: - rapidsai-nightly - nvidia - conda-forge -- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 780e56dfb..6c3df0794 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -4,7 +4,6 @@ channels: - rapidsai-nightly - nvidia - conda-forge -- nodefaults dependencies: - c-compiler - dask=2022.3.0 diff --git a/docs/environment.yml b/docs/environment.yml index 96a727465..bb463d028 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -1,7 +1,6 @@ name: dask-sql-docs channels: - conda-forge - - nodefaults dependencies: - python=3.9 - sphinx>=4.0.0 From 231bbbf5bdc341857eb324a03e669c22703c9c0c Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 11 May 2023 08:49:07 -0700 Subject: [PATCH 05/30] Establish minimum version for mlflow --- continuous_integration/environment-3.10.yaml | 2 +- continuous_integration/environment-3.11.yaml | 4 ++-- continuous_integration/environment-3.9.yaml | 2 +- continuous_integration/gpuci/environment-3.10.yaml | 2 +- continuous_integration/gpuci/environment-3.9.yaml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index 59b1f7691..d808241c2 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -12,7 +12,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin>=0.12.8 -- mlflow +- mlflow>=1.13.1 - mock # tpot imports fail with numpy >=1.24.0 # https://github.com/EpistasisLab/tpot/issues/1281 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index da60c012a..bc7ef5a7d 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -1,4 +1,4 @@ -name: dask-sql-py311 +name: dask-sql channels: - conda-forge dependencies: @@ -12,7 +12,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin>=0.12.8 -- mlflow +- mlflow>=1.13.1 - mock # tpot imports fail with numpy >=1.24.0 # https://github.com/EpistasisLab/tpot/issues/1281 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index 227f7d1ef..6ce44b27c 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -11,7 +11,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin=0.12.8 -- mlflow +- mlflow=1.13.1 - mock # tpot imports fail with numpy >=1.24.0 # https://github.com/EpistasisLab/tpot/issues/1281 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index 7ec13cdc4..e8b46ce91 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -15,7 +15,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin>=0.12.8 -- mlflow +- mlflow>=1.13.1 - mock - pandas>=1.4.0 - pre-commit diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 6c3df0794..91e7710d2 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -14,7 +14,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin=0.12.8 -- mlflow +- mlflow>=1.13.1 - mock - pandas=1.4.0 - pre-commit From 70c7302bb3ae03351afeb29c428cf30c1f945e6e Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 11 May 2023 09:06:10 -0700 Subject: [PATCH 06/30] Revert "Remove strict channel priority to try to unblock env solves?" This reverts commit e4548339f7284915ed6a5eb79121ed6a2ae260d3. --- .github/workflows/test-upstream.yml | 4 +++- .github/workflows/test.yml | 4 +++- continuous_integration/environment-3.10.yaml | 1 + continuous_integration/environment-3.11.yaml | 1 + continuous_integration/environment-3.9.yaml | 1 + continuous_integration/gpuci/environment-3.10.yaml | 1 + continuous_integration/gpuci/environment-3.9.yaml | 1 + docs/environment.yml | 1 + 8 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index 25b19314b..a07761c76 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -62,6 +62,7 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: ${{ matrix.python }} + channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} - name: Optionally update upstream cargo dependencies @@ -81,7 +82,7 @@ jobs: - name: Install upstream dev Dask if: env.which_upstream == 'Dask' run: | - mamba install dask/label/dev::dask + mamba install --no-channel-priority dask/label/dev::dask - name: Install pytest-reportlog run: | # TODO: add pytest-reportlog to testing environments if we move over to JSONL output @@ -110,6 +111,7 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: "3.9" + channel-priority: strict - name: Install Protoc uses: arduino/setup-protoc@v1 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fd448072a..e3db94c6d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -61,6 +61,7 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: ${{ matrix.python }} + channel-priority: strict activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} run-post: ${{ matrix.os != 'windows-latest' && 'true' || 'false' }} @@ -81,7 +82,7 @@ jobs: - name: Optionally install upstream dev Dask if: needs.detect-ci-trigger.outputs.triggered == 'true' run: | - mamba install dask/label/dev::dask + mamba install --no-channel-priority dask/label/dev::dask - name: Test with pytest run: | pytest --junitxml=junit/test-results.xml --cov-report=xml -n auto tests --dist loadfile @@ -107,6 +108,7 @@ jobs: miniforge-variant: Mambaforge use-mamba: true python-version: "3.9" + channel-priority: strict - name: Install Protoc uses: arduino/setup-protoc@v1 with: diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index d808241c2..6f5fadd13 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -1,6 +1,7 @@ name: dask-sql channels: - conda-forge +- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index bc7ef5a7d..5437620ee 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -1,6 +1,7 @@ name: dask-sql channels: - conda-forge +- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index 6ce44b27c..8cbad40b7 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -1,6 +1,7 @@ name: dask-sql channels: - conda-forge +- nodefaults dependencies: - c-compiler - dask=2022.3.0 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index e8b46ce91..985ceb729 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -4,6 +4,7 @@ channels: - rapidsai-nightly - nvidia - conda-forge +- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 91e7710d2..5f8e4c00e 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -4,6 +4,7 @@ channels: - rapidsai-nightly - nvidia - conda-forge +- nodefaults dependencies: - c-compiler - dask=2022.3.0 diff --git a/docs/environment.yml b/docs/environment.yml index bb463d028..96a727465 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -1,6 +1,7 @@ name: dask-sql-docs channels: - conda-forge + - nodefaults dependencies: - python=3.9 - sphinx>=4.0.0 From 2783fc90885f9f8691b25e5d531e9beb3ec5e203 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 11 May 2023 09:06:57 -0700 Subject: [PATCH 07/30] Try strict channel priority without nodefaults --- continuous_integration/environment-3.10.yaml | 1 - continuous_integration/environment-3.11.yaml | 1 - continuous_integration/environment-3.9.yaml | 1 - continuous_integration/gpuci/environment-3.10.yaml | 1 - continuous_integration/gpuci/environment-3.9.yaml | 1 - docs/environment.yml | 1 - 6 files changed, 6 deletions(-) diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index 6f5fadd13..d808241c2 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -1,7 +1,6 @@ name: dask-sql channels: - conda-forge -- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index 5437620ee..bc7ef5a7d 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -1,7 +1,6 @@ name: dask-sql channels: - conda-forge -- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index 8cbad40b7..6ce44b27c 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -1,7 +1,6 @@ name: dask-sql channels: - conda-forge -- nodefaults dependencies: - c-compiler - dask=2022.3.0 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index 985ceb729..e8b46ce91 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -4,7 +4,6 @@ channels: - rapidsai-nightly - nvidia - conda-forge -- nodefaults dependencies: - c-compiler - dask>=2022.3.0 diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 5f8e4c00e..91e7710d2 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -4,7 +4,6 @@ channels: - rapidsai-nightly - nvidia - conda-forge -- nodefaults dependencies: - c-compiler - dask=2022.3.0 diff --git a/docs/environment.yml b/docs/environment.yml index 96a727465..bb463d028 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -1,7 +1,6 @@ name: dask-sql-docs channels: - conda-forge - - nodefaults dependencies: - python=3.9 - sphinx>=4.0.0 From 3b11f9cf8c76d6775f8177ea5ebc009ee60c389e Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 11 May 2023 13:25:32 -0400 Subject: [PATCH 08/30] Bump mlflow min version to fix windows failures --- continuous_integration/environment-3.10.yaml | 2 +- continuous_integration/environment-3.11.yaml | 2 +- continuous_integration/environment-3.9.yaml | 4 ++-- continuous_integration/gpuci/environment-3.10.yaml | 2 +- continuous_integration/gpuci/environment-3.9.yaml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index d808241c2..7add8b087 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -12,7 +12,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin>=0.12.8 -- mlflow>=1.13.1 +- mlflow>=1.20 - mock # tpot imports fail with numpy >=1.24.0 # https://github.com/EpistasisLab/tpot/issues/1281 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index bc7ef5a7d..256aa20ee 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -12,7 +12,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin>=0.12.8 -- mlflow>=1.13.1 +- mlflow>=1.20 - mock # tpot imports fail with numpy >=1.24.0 # https://github.com/EpistasisLab/tpot/issues/1281 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index 6ce44b27c..1d1de0aba 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -1,4 +1,4 @@ -name: dask-sql +name: dask-sql-py39 channels: - conda-forge dependencies: @@ -11,7 +11,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin=0.12.8 -- mlflow=1.13.1 +- mlflow=1.20 - mock # tpot imports fail with numpy >=1.24.0 # https://github.com/EpistasisLab/tpot/issues/1281 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index e8b46ce91..45f56ba6d 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -15,7 +15,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin>=0.12.8 -- mlflow>=1.13.1 +- mlflow>=1.20 - mock - pandas>=1.4.0 - pre-commit diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 91e7710d2..a6c585e6a 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -14,7 +14,7 @@ dependencies: - libprotobuf=3 - lightgbm - maturin=0.12.8 -- mlflow>=1.13.1 +- mlflow>=1.20 - mock - pandas=1.4.0 - pre-commit From c909c948719acc5cc78e0a980862a03a0da15648 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 18 May 2023 09:12:16 -0700 Subject: [PATCH 09/30] Build python 3.11 wheels --- .github/workflows/release.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6762d5abc..393987d92 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,7 +16,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python: ["8", "9", "10"] # 3.x + # corresponds to python 3.9, 3.10, 3.11 + python: ["9", "10", "11"] steps: - uses: actions/checkout@v3 with: From 1b48ab8568e98715da9eead68c63f63d76f930c9 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 18 May 2023 09:19:22 -0700 Subject: [PATCH 10/30] Run wheel builds in PR test --- .github/workflows/release.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 393987d92..5ffddc9e8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,6 +2,9 @@ name: Upload Python package on: release: types: [created] + pull_request: + paths: + - .github/workflows/release.yml # Required shell entrypoint to have properly activated conda environments defaults: @@ -69,6 +72,9 @@ jobs: name: wheels for py3.${{ matrix.python }} on ${{ matrix.os }} path: dist/* - name: Publish package + if: | + github.event_name == 'release' + && github.repository == 'dask-contrib/dask-sql' env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} @@ -97,6 +103,9 @@ jobs: twine check dist/* ls -lh dist/ - name: Publish source distribution + if: | + github.event_name == 'release' + && github.repository == 'dask-contrib/dask-sql' env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} From 5ce1507d5b4bcddc6c2170d0265c06f0f9065c41 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 18 May 2023 11:10:07 -0700 Subject: [PATCH 11/30] Try protoc action in wheels build to unblock --- .github/workflows/release.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5ffddc9e8..c2f83e965 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,6 +25,11 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Set up QEMU for linux aarch64 if: contains(matrix.os, 'ubuntu') uses: docker/setup-qemu-action@v2 From a0ccafb913392c0b02208408bb96565f42cc035d Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 19 May 2023 09:00:31 -0700 Subject: [PATCH 12/30] Skip hive testing on 3.11 for now --- .github/workflows/test.yml | 4 +++- tests/integration/test_hive.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e3db94c6d..4227081fc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -74,7 +74,9 @@ jobs: run: | python setup.py build install - name: Install hive testing dependencies - if: matrix.os == 'ubuntu-latest' + if: | + matrix.os == 'ubuntu-latest' + && matrix.os != '3.11' # FIXME: sasl is not available on python 3.11 run: | mamba install -c conda-forge "sasl>=0.3.1" docker pull bde2020/hive:2.3.2-postgresql-metastore diff --git a/tests/integration/test_hive.py b/tests/integration/test_hive.py index 1a86082c1..4c4ba5053 100644 --- a/tests/integration/test_hive.py +++ b/tests/integration/test_hive.py @@ -1,5 +1,4 @@ import shutil -import sys import tempfile import time @@ -10,8 +9,9 @@ from tests.utils import assert_eq pytestmark = pytest.mark.xfail( - condition=sys.platform in ("win32", "darwin"), - reason="hive testing not supported on Windows/macOS", + # FIXME: sasl is not available on python 3.11 + condition="sys.platform in ('win32', 'darwin') or sys.version_info == (3, 11)", + reason="hive testing only supported on linux with python<3.11", ) docker = pytest.importorskip("docker") sqlalchemy = pytest.importorskip("sqlalchemy") From 6b88ea602f7f017d5df5a015543a5d2abf892624 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 19 May 2023 09:01:50 -0700 Subject: [PATCH 13/30] Fix workflow syntax errors --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4227081fc..83916d15f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -74,9 +74,10 @@ jobs: run: | python setup.py build install - name: Install hive testing dependencies + # FIXME: sasl is not available on python 3.11 if: | matrix.os == 'ubuntu-latest' - && matrix.os != '3.11' # FIXME: sasl is not available on python 3.11 + && matrix.python != '3.11' run: | mamba install -c conda-forge "sasl>=0.3.1" docker pull bde2020/hive:2.3.2-postgresql-metastore From 97688420fa8632e4212b9354617b41903bee7e33 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 19 May 2023 09:33:21 -0700 Subject: [PATCH 14/30] Stop running wheel CI --- .github/workflows/release.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c2f83e965..393987d92 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,9 +2,6 @@ name: Upload Python package on: release: types: [created] - pull_request: - paths: - - .github/workflows/release.yml # Required shell entrypoint to have properly activated conda environments defaults: @@ -25,11 +22,6 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Install Protoc - uses: arduino/setup-protoc@v1 - with: - version: '3.x' - repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Set up QEMU for linux aarch64 if: contains(matrix.os, 'ubuntu') uses: docker/setup-qemu-action@v2 @@ -77,9 +69,6 @@ jobs: name: wheels for py3.${{ matrix.python }} on ${{ matrix.os }} path: dist/* - name: Publish package - if: | - github.event_name == 'release' - && github.repository == 'dask-contrib/dask-sql' env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} @@ -108,9 +97,6 @@ jobs: twine check dist/* ls -lh dist/ - name: Publish source distribution - if: | - github.event_name == 'release' - && github.repository == 'dask-contrib/dask-sql' env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} From 30c9818ebfae108df5bf24eec8bca36f7057ed7e Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 25 May 2023 21:42:41 -0700 Subject: [PATCH 15/30] Bump pyo3 abi minor version --- dask_planner/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dask_planner/Cargo.toml b/dask_planner/Cargo.toml index 0e45b0732..550fbe2ff 100644 --- a/dask_planner/Cargo.toml +++ b/dask_planner/Cargo.toml @@ -13,7 +13,7 @@ async-trait = "0.1.68" datafusion-python = { git = "https://github.com/apache/arrow-datafusion-python.git", rev = "9493638" } env_logger = "0.10" log = "^0.4" -pyo3 = { version = "0.18.3", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3 = { version = "0.18.3", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-log = "0.8.1" [build-dependencies] From 738bbd485cd4e5ffbcb8b09cb65c51c7b8dbe4f3 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 25 May 2023 21:52:30 -0700 Subject: [PATCH 16/30] Initial run of pyupgrade to py39 --- dask_sql/context.py | 18 ++++++------- dask_sql/datacontainer.py | 30 ++++++++++----------- dask_sql/integrations/fugue.py | 4 +-- dask_sql/integrations/ipython.py | 4 +-- dask_sql/physical/rel/base.py | 4 +-- dask_sql/physical/rel/custom/wrappers.py | 8 +++--- dask_sql/physical/rel/logical/aggregate.py | 18 ++++++------- dask_sql/physical/rel/logical/join.py | 8 +++--- dask_sql/physical/rel/logical/table_scan.py | 2 +- dask_sql/physical/rel/logical/window.py | 24 ++++++++--------- dask_sql/physical/utils/groupby.py | 4 +-- dask_sql/physical/utils/sort.py | 22 +++++++-------- dask_sql/physical/utils/statistics.py | 5 ++-- dask_sql/utils.py | 6 ++--- tests/integration/test_cmd.py | 3 ++- tests/integration/test_model.py | 2 +- tests/unit/test_ml_utils.py | 2 +- 17 files changed, 80 insertions(+), 84 deletions(-) diff --git a/dask_sql/context.py b/dask_sql/context.py index f277c4da0..053c4eea3 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -2,7 +2,7 @@ import inspect import logging from collections import Counter -from typing import Any, Callable, Dict, List, Tuple, Union +from typing import Any, Callable, Union import dask.dataframe as dd import pandas as pd @@ -295,7 +295,7 @@ def register_function( self, f: Callable, name: str, - parameters: List[Tuple[str, type]], + parameters: list[tuple[str, type]], return_type: type, replace: bool = False, schema_name: str = None, @@ -386,7 +386,7 @@ def register_aggregation( self, f: dd.Aggregation, name: str, - parameters: List[Tuple[str, type]], + parameters: list[tuple[str, type]], return_type: type, replace: bool = False, schema_name: str = None, @@ -453,9 +453,9 @@ def sql( self, sql: Any, return_futures: bool = True, - dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, + dataframes: dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, gpu: bool = False, - config_options: Dict[str, Any] = None, + config_options: dict[str, Any] = None, ) -> Union[dd.DataFrame, pd.DataFrame]: """ Query the registered tables with the given SQL. @@ -505,7 +505,7 @@ def sql( def explain( self, sql: str, - dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, + dataframes: dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, gpu: bool = False, ) -> str: """ @@ -592,7 +592,7 @@ def register_model( self, model_name: str, model: Any, - training_columns: List[str], + training_columns: list[str], schema_name: str = None, ): """ @@ -694,7 +694,7 @@ def stop_server(self): # pragma: no cover self.sql_server = None - def fqn(self, tbl: "DaskTable") -> Tuple[str, str]: + def fqn(self, tbl: "DaskTable") -> tuple[str, str]: """ Return the fully qualified name of an object, maybe including the schema name. @@ -886,7 +886,7 @@ def _register_callable( f: Any, name: str, aggregation: bool, - parameters: List[Tuple[str, type]], + parameters: list[tuple[str, type]], return_type: type, replace: bool = False, schema_name=None, diff --git a/dask_sql/datacontainer.py b/dask_sql/datacontainer.py index e4c93a8f5..db2c82abf 100644 --- a/dask_sql/datacontainer.py +++ b/dask_sql/datacontainer.py @@ -1,5 +1,5 @@ from collections import namedtuple -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Union import dask.dataframe as dd import pandas as pd @@ -28,8 +28,8 @@ class ColumnContainer: def __init__( self, - frontend_columns: List[str], - frontend_backend_mapping: Union[Dict[str, ColumnType], None] = None, + frontend_columns: list[str], + frontend_backend_mapping: Union[dict[str, ColumnType], None] = None, ): assert all( isinstance(col, str) for col in frontend_columns @@ -50,7 +50,7 @@ def _copy(self) -> ColumnContainer: self._frontend_columns.copy(), self._frontend_backend_mapping.copy() ) - def limit_to(self, fields: List[str]) -> ColumnContainer: + def limit_to(self, fields: list[str]) -> ColumnContainer: """ Create a new ColumnContainer, which has frontend columns limited to only the ones given as parameter. @@ -64,7 +64,7 @@ def limit_to(self, fields: List[str]) -> ColumnContainer: cc._frontend_columns = [str(x) for x in fields] return cc - def rename(self, columns: Dict[str, str]) -> ColumnContainer: + def rename(self, columns: dict[str, str]) -> ColumnContainer: """ Return a new ColumnContainer where the frontend columns are renamed according to the given mapping. @@ -84,7 +84,7 @@ def rename(self, columns: Dict[str, str]) -> ColumnContainer: return cc def rename_handle_duplicates( - self, from_columns: List[str], to_columns: List[str] + self, from_columns: list[str], to_columns: list[str] ) -> ColumnContainer: """ Same as `rename` but additionally handles presence of @@ -105,14 +105,14 @@ def rename_handle_duplicates( return cc - def mapping(self) -> List[Tuple[str, ColumnType]]: + def mapping(self) -> list[tuple[str, ColumnType]]: """ The mapping from frontend columns to backend columns. """ return list(self._frontend_backend_mapping.items()) @property - def columns(self) -> List[str]: + def columns(self) -> list[str]: """ The stored frontend columns in the correct order """ @@ -281,10 +281,10 @@ def __hash__(self): class SchemaContainer: def __init__(self, name: str): self.__name__ = name - self.tables: Dict[str, DataContainer] = {} - self.statistics: Dict[str, Statistics] = {} - self.experiments: Dict[str, pd.DataFrame] = {} - self.models: Dict[str, Tuple[Any, List[str]]] = {} - self.functions: Dict[str, UDF] = {} - self.function_lists: List[FunctionDescription] = [] - self.filepaths: Dict[str, str] = {} + self.tables: dict[str, DataContainer] = {} + self.statistics: dict[str, Statistics] = {} + self.experiments: dict[str, pd.DataFrame] = {} + self.models: dict[str, tuple[Any, list[str]]] = {} + self.functions: dict[str, UDF] = {} + self.function_lists: list[FunctionDescription] = [] + self.filepaths: dict[str, str] = {} diff --git a/dask_sql/integrations/fugue.py b/dask_sql/integrations/fugue.py index cdc5fbdae..c1123d652 100644 --- a/dask_sql/integrations/fugue.py +++ b/dask_sql/integrations/fugue.py @@ -11,7 +11,7 @@ "Can not load the fugue module. If you want to use this integration, you need to install it." ) -from typing import Any, Dict, Optional +from typing import Any, Optional import dask.dataframe as dd @@ -94,7 +94,7 @@ def fsql_dask( ctx: Optional[Context] = None, register: bool = False, fugue_conf: Any = None, -) -> Dict[str, dd.DataFrame]: +) -> dict[str, dd.DataFrame]: """FugueSQL utility function that can consume Context directly. FugueSQL is a language extending standard SQL. It makes SQL eligible to describe end to end workflows. It also enables you to invoke python extensions in the SQL like language. diff --git a/dask_sql/integrations/ipython.py b/dask_sql/integrations/ipython.py index 08843c00c..b68f7acf5 100644 --- a/dask_sql/integrations/ipython.py +++ b/dask_sql/integrations/ipython.py @@ -1,5 +1,5 @@ import time -from typing import TYPE_CHECKING, Dict, List +from typing import TYPE_CHECKING from dask_sql.mappings import _SQL_TO_PYTHON_FRAMES from dask_sql.physical.rex.core import RexCallPlugin @@ -153,6 +153,6 @@ def _register_syntax_highlighting(): # pragma: no cover display.display_javascript(js + _JS_ENABLE_DASK_SQL, raw=True) -def _create_set(keys: List[str]) -> Dict[str, bool]: # pragma: no cover +def _create_set(keys: list[str]) -> dict[str, bool]: # pragma: no cover """Small helper function to turn a list into the correct format for codemirror""" return {key: True for key in keys} diff --git a/dask_sql/physical/rel/base.py b/dask_sql/physical/rel/base.py index 520f14e6d..3d42a84b6 100644 --- a/dask_sql/physical/rel/base.py +++ b/dask_sql/physical/rel/base.py @@ -1,5 +1,5 @@ import logging -from typing import TYPE_CHECKING, List +from typing import TYPE_CHECKING import dask.dataframe as dd @@ -66,7 +66,7 @@ def assert_inputs( rel: "LogicalPlan", n: int = 1, context: "dask_sql.Context" = None, - ) -> List[dd.DataFrame]: + ) -> list[dd.DataFrame]: """ LogicalPlan nodes build on top of others. Those are called the "input" of the LogicalPlan. diff --git a/dask_sql/physical/rel/custom/wrappers.py b/dask_sql/physical/rel/custom/wrappers.py index a1ab18534..49d4adb64 100644 --- a/dask_sql/physical/rel/custom/wrappers.py +++ b/dask_sql/physical/rel/custom/wrappers.py @@ -3,7 +3,7 @@ """Meta-estimators for parallelizing estimators using the scikit-learn API.""" import logging import warnings -from typing import Any, Callable, Tuple, Union +from typing import Any, Callable, Union import dask.array as da import dask.dataframe as dd @@ -34,9 +34,9 @@ # Scorers -accuracy_scorer: Tuple[Any, Any] = (accuracy_score, {}) +accuracy_scorer: tuple[Any, Any] = (accuracy_score, {}) neg_mean_squared_error_scorer = (mean_squared_error, dict(greater_is_better=False)) -r2_scorer: Tuple[Any, Any] = (r2_score, {}) +r2_scorer: tuple[Any, Any] = (r2_score, {}) neg_log_loss_scorer = (log_loss, dict(greater_is_better=False, needs_proba=True)) @@ -504,7 +504,7 @@ def __init__( self.shuffle_blocks = shuffle_blocks self.random_state = random_state self.assume_equal_chunks = assume_equal_chunks - super(Incremental, self).__init__( + super().__init__( estimator=estimator, scoring=scoring, predict_meta=predict_meta, diff --git a/dask_sql/physical/rel/logical/aggregate.py b/dask_sql/physical/rel/logical/aggregate.py index 84c832177..24c339381 100644 --- a/dask_sql/physical/rel/logical/aggregate.py +++ b/dask_sql/physical/rel/logical/aggregate.py @@ -2,7 +2,7 @@ import operator from collections import defaultdict from functools import reduce -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple +from typing import TYPE_CHECKING, Any, Callable import dask.dataframe as dd import pandas as pd @@ -259,9 +259,9 @@ def _do_aggregations( self, rel: "LogicalPlan", dc: DataContainer, - group_columns: List[str], + group_columns: list[str], context: "dask_sql.Context", - ) -> Tuple[dd.DataFrame, List[str]]: + ) -> tuple[dd.DataFrame, list[str]]: """ Main functionality: return the result dataframe and the output column order @@ -351,9 +351,9 @@ def _collect_aggregations( cc: ColumnContainer, context: "dask_sql.Context", additional_column_name: str, - output_column_order: List[str], - ) -> Tuple[ - Dict[Tuple[str, str], List[Tuple[str, str, Any]]], List[str], dd.DataFrame + output_column_order: list[str], + ) -> tuple[ + dict[tuple[str, str], list[tuple[str, str, Any]]], list[str], dd.DataFrame ]: """ Collect all aggregations together, which have the same filter column @@ -494,10 +494,10 @@ def _perform_aggregation( dc: DataContainer, filter_column: str, distinct_column: str, - aggregations: List[Tuple[str, str, Any]], + aggregations: list[tuple[str, str, Any]], additional_column_name: str, - group_columns: List[str], - groupby_agg_options: Dict[str, Any] = {}, + group_columns: list[str], + groupby_agg_options: dict[str, Any] = {}, ): tmp_df = dc.df diff --git a/dask_sql/physical/rel/logical/join.py b/dask_sql/physical/rel/logical/join.py index ddb8d7349..e797be6da 100644 --- a/dask_sql/physical/rel/logical/join.py +++ b/dask_sql/physical/rel/logical/join.py @@ -2,7 +2,7 @@ import operator import warnings from functools import reduce -from typing import TYPE_CHECKING, List, Tuple +from typing import TYPE_CHECKING import dask.dataframe as dd from dask import config as dask_config @@ -212,8 +212,8 @@ def _join_on_columns( self, df_lhs_renamed: dd.DataFrame, df_rhs_renamed: dd.DataFrame, - lhs_on: List[str], - rhs_on: List[str], + lhs_on: list[str], + rhs_on: list[str], join_type: str, ) -> dd.DataFrame: @@ -267,7 +267,7 @@ def _join_on_columns( def _split_join_condition( self, join_condition: "Expression" - ) -> Tuple[List[str], List[str], List["Expression"]]: + ) -> tuple[list[str], list[str], list["Expression"]]: if str(join_condition.getRexType()) in ["RexType.Literal", "RexType.Reference"]: return [], [], [join_condition] elif not str(join_condition.getRexType()) == "RexType.Call": diff --git a/dask_sql/physical/rel/logical/table_scan.py b/dask_sql/physical/rel/logical/table_scan.py index 5c1718f62..62cb38256 100644 --- a/dask_sql/physical/rel/logical/table_scan.py +++ b/dask_sql/physical/rel/logical/table_scan.py @@ -41,7 +41,7 @@ def convert( # The table(s) we need to return dask_table = rel.getTable() - schema_name, table_name = [n.lower() for n in context.fqn(dask_table)] + schema_name, table_name = (n.lower() for n in context.fqn(dask_table)) dc = context.schema[schema_name].tables[table_name] diff --git a/dask_sql/physical/rel/logical/window.py b/dask_sql/physical/rel/logical/window.py index 331876c49..2ac20ec3b 100644 --- a/dask_sql/physical/rel/logical/window.py +++ b/dask_sql/physical/rel/logical/window.py @@ -1,7 +1,7 @@ import logging from collections import namedtuple from functools import partial -from typing import TYPE_CHECKING, Callable, List, Optional, Tuple +from typing import TYPE_CHECKING, Callable, Optional import dask.dataframe as dd import numpy as np @@ -109,7 +109,7 @@ def _get_window_bounds( min_periods: Optional[int] = None, center: Optional[bool] = None, closed: Optional[str] = None, - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: if self.start is None: start = np.zeros(num_values, dtype=np.int64) else: @@ -141,7 +141,7 @@ def get_window_bounds( center: Optional[bool] = None, closed: Optional[str] = None, step: Optional[int] = None, - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: return self._get_window_bounds(num_values, min_periods, center, closed) else: @@ -152,18 +152,18 @@ def get_window_bounds( min_periods: Optional[int] = None, center: Optional[bool] = None, closed: Optional[str] = None, - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: return self._get_window_bounds(num_values, min_periods, center, closed) def map_on_each_group( partitioned_group: pd.DataFrame, - sort_columns: List[str], - sort_ascending: List[bool], - sort_null_first: List[bool], + sort_columns: list[str], + sort_ascending: list[bool], + sort_null_first: list[bool], lower_bound: BoundDescription, upper_bound: BoundDescription, - operations: List[Tuple[Callable, str, List[str]]], + operations: list[tuple[Callable, str, list[str]]], ): """Internal function mapped on each group of the dataframe after partitioning""" # Apply sorting @@ -261,7 +261,7 @@ def _apply_window( rel, window, dc: DataContainer, - field_names: List[str], + field_names: list[str], context: "dask_sql.Context", ): temporary_columns = [] @@ -366,7 +366,7 @@ def _extract_groupby( window, dc: DataContainer, context: "dask_sql.Context", - ) -> Tuple[dd.DataFrame, str]: + ) -> tuple[dd.DataFrame, str]: """Prepare grouping columns we can later use while applying the main function""" partition_keys = rel.window().getPartitionExprs(window) if partition_keys: @@ -385,7 +385,7 @@ def _extract_groupby( def _extract_ordering( self, rel, window, cc: ColumnContainer - ) -> Tuple[str, str, str]: + ) -> tuple[str, str, str]: """Prepare sorting information we can later use while applying the main function""" logger.debug( "Error is about to be encountered, FIX me when bindings are available in subsequent PR" @@ -407,7 +407,7 @@ def _extract_operations( df: dd.DataFrame, dc: DataContainer, context: "dask_sql.Context", - ) -> List[Tuple[Callable, str, List[str]]]: + ) -> list[tuple[Callable, str, list[str]]]: # Finally apply the actual function on each group separately operations = [] diff --git a/dask_sql/physical/utils/groupby.py b/dask_sql/physical/utils/groupby.py index 97070bdd0..089219181 100644 --- a/dask_sql/physical/utils/groupby.py +++ b/dask_sql/physical/utils/groupby.py @@ -1,12 +1,10 @@ -from typing import List - import dask.dataframe as dd from dask_sql.utils import new_temporary_column def get_groupby_with_nulls_cols( - df: dd.DataFrame, group_columns: List[str], additional_column_name: str = None + df: dd.DataFrame, group_columns: list[str], additional_column_name: str = None ): """ SQL and dask are treating null columns a bit different: diff --git a/dask_sql/physical/utils/sort.py b/dask_sql/physical/utils/sort.py index 8ac103ff1..c54feae8f 100644 --- a/dask_sql/physical/utils/sort.py +++ b/dask_sql/physical/utils/sort.py @@ -1,5 +1,3 @@ -from typing import List - import dask.dataframe as dd import pandas as pd from dask import config as dask_config @@ -10,9 +8,9 @@ def apply_sort( df: dd.DataFrame, - sort_columns: List[str], - sort_ascending: List[bool], - sort_null_first: List[bool], + sort_columns: list[str], + sort_ascending: list[bool], + sort_null_first: list[bool], sort_num_rows: int = None, ) -> dd.DataFrame: # when sort_values doesn't support lists of ascending / null @@ -79,8 +77,8 @@ def apply_sort( def topk_sort( df: dd.DataFrame, - sort_columns: List[str], - sort_ascending: List[bool], + sort_columns: list[str], + sort_ascending: list[bool], sort_num_rows: int = None, ): if sort_ascending[0]: @@ -91,9 +89,9 @@ def topk_sort( def sort_partition_func( partition: pd.DataFrame, - sort_columns: List[str], - sort_ascending: List[bool], - sort_null_first: List[bool], + sort_columns: list[str], + sort_ascending: list[bool], + sort_null_first: list[bool], **kwargs, ): if partition.empty: @@ -121,9 +119,9 @@ def sort_partition_func( def is_topk_optimizable( df: dd.DataFrame, - sort_columns: List[str], + sort_columns: list[str], single_ascending: bool, - sort_null_first: List[bool], + sort_null_first: list[bool], sort_num_rows: int = None, ): if ( diff --git a/dask_sql/physical/utils/statistics.py b/dask_sql/physical/utils/statistics.py index 4dc06b91a..1ada03de8 100644 --- a/dask_sql/physical/utils/statistics.py +++ b/dask_sql/physical/utils/statistics.py @@ -4,7 +4,6 @@ import logging from collections import defaultdict from functools import lru_cache -from typing import List import dask import dask.dataframe as dd @@ -21,10 +20,10 @@ def parquet_statistics( ddf: dd.DataFrame, - columns: List | None = None, + columns: list | None = None, parallel: int | False | None = None, **compute_kwargs, -) -> List[dict] | None: +) -> list[dict] | None: """Extract Parquet statistics from a Dask DataFrame collection WARNING: This API is experimental diff --git a/dask_sql/utils.py b/dask_sql/utils.py index 39c165597..7426ca64d 100644 --- a/dask_sql/utils.py +++ b/dask_sql/utils.py @@ -2,7 +2,7 @@ import logging from collections import defaultdict from datetime import datetime -from typing import Any, Dict +from typing import Any from uuid import uuid4 import dask.dataframe as dd @@ -142,8 +142,8 @@ def __str__(self): def convert_sql_kwargs( - sql_kwargs: Dict[str, str], -) -> Dict[str, Any]: + sql_kwargs: dict[str, str], +) -> dict[str, Any]: """ Convert the Rust Vec of key/value pairs into a Dict containing the keys and values """ diff --git a/tests/integration/test_cmd.py b/tests/integration/test_cmd.py index fdef31bd3..847936e22 100644 --- a/tests/integration/test_cmd.py +++ b/tests/integration/test_cmd.py @@ -1,6 +1,7 @@ +from unittest.mock import MagicMock, patch + import pytest from dask import config as dask_config -from mock import MagicMock, patch from prompt_toolkit.application import create_app_session from prompt_toolkit.input import create_pipe_input from prompt_toolkit.output import DummyOutput diff --git a/tests/integration/test_model.py b/tests/integration/test_model.py index 9980fc103..5d47e2ec4 100644 --- a/tests/integration/test_model.py +++ b/tests/integration/test_model.py @@ -360,7 +360,7 @@ def test_correct_argument_passing(c): boolean=False, array=[1, 2], dict={"a": 1}, - set=set([1, 2, 3]), + set={1, 2, 3}, ) diff --git a/tests/unit/test_ml_utils.py b/tests/unit/test_ml_utils.py index d092c824d..7130b2bed 100644 --- a/tests/unit/test_ml_utils.py +++ b/tests/unit/test_ml_utils.py @@ -61,7 +61,7 @@ def check_random_state(random_state): elif isinstance(random_state, da.random.RandomState): return random_state else: - raise TypeError("Unexpected type '{}'".format(type(random_state))) + raise TypeError(f"Unexpected type '{type(random_state)}'") def make_classification( From 95e8ff9fd9eb585863775265f55094cc5d76b980 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 30 May 2023 10:51:40 -0700 Subject: [PATCH 17/30] Continue marking test_dask_fsql as flaky --- tests/integration/test_fugue.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_fugue.py b/tests/integration/test_fugue.py index a8c22aeb7..1e1bcd2c4 100644 --- a/tests/integration/test_fugue.py +++ b/tests/integration/test_fugue.py @@ -40,8 +40,7 @@ def test_fugue_fsql(client): assert_eq(return_df, pd.DataFrame({"a": [1], "b": ["world"]})) -# TODO: uncomment if flaky failures persist on python 3.9 -# @pytest.mark.flaky(reruns=4, condition="sys.version_info < (3, 9)") +@pytest.mark.flaky(reruns=4, condition="sys.version_info < (3, 10)") def test_dask_fsql(client): def assert_fsql(df: pd.DataFrame) -> None: assert_eq(df, pd.DataFrame({"a": [1]})) From 3a5842db42ba8f560083762871c0f664a89050a1 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 7 Nov 2023 11:53:18 -0800 Subject: [PATCH 18/30] More places to drop 3.8 --- .github/workflows/conda.yml | 2 +- Cargo.toml | 2 +- pyproject.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 63a67da6c..cdd1d5b27 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -31,7 +31,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.8", "3.9", "3.10"] + python: ["3.9", "3.10", "3.11"] arch: ["linux-64", "linux-aarch64"] steps: - name: Manage disk space diff --git a/Cargo.toml b/Cargo.toml index 867727871..dfcb4f66f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ async-trait = "0.1.74" datafusion-python = "28.0.0" env_logger = "0.10" log = "^0.4" -pyo3 = { version = "0.19.1", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3 = { version = "0.19.1", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-log = "0.9.0" [build-dependencies] diff --git a/pyproject.toml b/pyproject.toml index 3caa92ddb..14092b951 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,14 +17,14 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering", "Topic :: System :: Distributed Computing", ] readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "dask[dataframe]>=2022.3.0", "distributed>=2022.3.0", From 7bf20570bb1a3358dd23a7eafb9d319a8bbee8be Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 10:10:19 -0800 Subject: [PATCH 19/30] Try running tests on python 3.12 --- .github/workflows/conda.yml | 2 +- .github/workflows/test-upstream.yml | 2 +- .github/workflows/test.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 3690698ea..549b3411a 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -31,7 +31,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.9", "3.10", "3.11"] + python: ["3.9", "3.10", "3.11", "3.12"] arch: ["linux-64", "linux-aarch64"] steps: - name: Manage disk space diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index 7c228cfb0..91c8a3d34 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -42,7 +42,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python: ["3.9", "3.10", "3.11"] + python: ["3.9", "3.10", "3.11", "3.12"] distributed: [false] include: # run tests on a distributed client diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0dbb12269..f43e1b6fb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,7 +43,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python: ["3.9", "3.10", "3.11"] + python: ["3.9", "3.10", "3.11", "3.12"] distributed: [false] include: # run tests on a distributed client From 3e58de239d9ad12d9920998d517442edf973b362 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 10:14:54 -0800 Subject: [PATCH 20/30] Add environment file --- continuous_integration/environment-3.12.yaml | 40 ++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 continuous_integration/environment-3.12.yaml diff --git a/continuous_integration/environment-3.12.yaml b/continuous_integration/environment-3.12.yaml new file mode 100644 index 000000000..ff34fab5b --- /dev/null +++ b/continuous_integration/environment-3.12.yaml @@ -0,0 +1,40 @@ +name: dask-sql +channels: +- conda-forge +dependencies: +- c-compiler +- dask>=2022.3.0,<=2023.11.0 +- fastapi>=0.92.0 +- fugue>=0.7.3 +- httpx>=0.24.1 +- intake>=0.6.0 +- jsonschema +- libprotobuf=3 +- lightgbm +- maturin>=1.3,<1.4 +# FIXME: mlflow 2.6.0 has import issues related to pydantic +# https://github.com/mlflow/mlflow/issues/9331 +- mlflow<2.6 +- mock +- numpy>=1.21.6 +- pandas>=1.4.0 +- pre-commit +- prompt_toolkit>=3.0.8 +- psycopg2 +- pyarrow>=6.0.2 +- pygments>=2.7.1 +- pyhive +- pytest-cov +- pytest-rerunfailures +- pytest-xdist +- pytest +- python=3.12 +- scikit-learn>=1.0.0 +- sphinx +- sqlalchemy<2 +- tpot>=0.12.0 +# FIXME: https://github.com/fugue-project/fugue/issues/526 +- triad<0.9.2 +- tzlocal>=2.1 +- uvicorn>=0.13.4 +- zlib From e96115ad4352f78ce0f00dd446ab1d35a2f35444 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 10:23:28 -0800 Subject: [PATCH 21/30] Skip sasl installation --- .github/workflows/test-upstream.yml | 1 - .github/workflows/test.yml | 3 --- tests/integration/test_hive.py | 5 ++--- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index 91c8a3d34..f9c08bade 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -75,7 +75,6 @@ jobs: - name: Install hive testing dependencies if: matrix.os == 'ubuntu-latest' run: | - mamba install -c conda-forge "sasl>=0.3.1" docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 - name: Install upstream dev Dask diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f43e1b6fb..be3a0199b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -74,12 +74,9 @@ jobs: run: | maturin develop - name: Install hive testing dependencies - # FIXME: sasl is not available on python 3.11 if: | matrix.os == 'ubuntu-latest' - && matrix.python != '3.11' run: | - mamba install -c conda-forge "sasl>=0.3.1" docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 - name: Optionally install upstream dev Dask diff --git a/tests/integration/test_hive.py b/tests/integration/test_hive.py index 4c4ba5053..042b6f1a0 100644 --- a/tests/integration/test_hive.py +++ b/tests/integration/test_hive.py @@ -9,9 +9,8 @@ from tests.utils import assert_eq pytestmark = pytest.mark.xfail( - # FIXME: sasl is not available on python 3.11 - condition="sys.platform in ('win32', 'darwin') or sys.version_info == (3, 11)", - reason="hive testing only supported on linux with python<3.11", + condition="sys.platform in ('win32', 'darwin')", + reason="hive testing not supported on Windows/macOS", ) docker = pytest.importorskip("docker") sqlalchemy = pytest.importorskip("sqlalchemy") From 025dbd06223557cf23f7b50c2ac601802da151b7 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 10:55:38 -0800 Subject: [PATCH 22/30] Drop protoc build dep --- continuous_integration/environment-3.10.yaml | 1 - continuous_integration/environment-3.11.yaml | 1 - continuous_integration/environment-3.12.yaml | 1 - continuous_integration/environment-3.9.yaml | 1 - continuous_integration/gpuci/environment-3.10.yaml | 1 - continuous_integration/gpuci/environment-3.9.yaml | 1 - 6 files changed, 6 deletions(-) diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index f729b5b6d..908a80c60 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -9,7 +9,6 @@ dependencies: - httpx>=0.24.1 - intake>=0.6.0 - jsonschema -- libprotobuf=3 - lightgbm - maturin>=1.3,<1.4 # FIXME: mlflow 2.6.0 has import issues related to pydantic diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index ef75fce26..17aff1176 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -9,7 +9,6 @@ dependencies: - httpx>=0.24.1 - intake>=0.6.0 - jsonschema -- libprotobuf=3 - lightgbm - maturin>=1.3,<1.4 # FIXME: mlflow 2.6.0 has import issues related to pydantic diff --git a/continuous_integration/environment-3.12.yaml b/continuous_integration/environment-3.12.yaml index ff34fab5b..08f3da6d7 100644 --- a/continuous_integration/environment-3.12.yaml +++ b/continuous_integration/environment-3.12.yaml @@ -9,7 +9,6 @@ dependencies: - httpx>=0.24.1 - intake>=0.6.0 - jsonschema -- libprotobuf=3 - lightgbm - maturin>=1.3,<1.4 # FIXME: mlflow 2.6.0 has import issues related to pydantic diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index 7d6318bd5..1c4a76b57 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -9,7 +9,6 @@ dependencies: - httpx=0.24.1 - intake=0.6.0 - jsonschema -- libprotobuf=3 - lightgbm - maturin>=1.3,<1.4 # FIXME: mlflow 2.6.0 has import issues related to pydantic diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index b926617e3..370577a54 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -14,7 +14,6 @@ dependencies: - httpx>=0.24.1 - intake>=0.6.0 - jsonschema -- libprotobuf=3 - lightgbm - maturin>=1.3,<1.4 # FIXME: mlflow 2.6.0 has import issues related to pydantic diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index ae10b6943..75f44c29b 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -14,7 +14,6 @@ dependencies: - httpx=0.24.1 - intake=0.6.0 - jsonschema -- libprotobuf=3 - lightgbm - maturin=1.3 # FIXME: mlflow 2.6.0 has import issues related to pydantic From 44ed2efee9f39bf7493de57e8341ceff46cbf418 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 11:13:22 -0800 Subject: [PATCH 23/30] Drop mlflow constraint --- continuous_integration/environment-3.10.yaml | 4 +--- continuous_integration/environment-3.11.yaml | 4 +--- continuous_integration/environment-3.12.yaml | 4 +--- continuous_integration/environment-3.9.yaml | 4 +--- continuous_integration/gpuci/environment-3.10.yaml | 4 +--- continuous_integration/gpuci/environment-3.9.yaml | 4 +--- 6 files changed, 6 insertions(+), 18 deletions(-) diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index 908a80c60..92e939120 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -11,9 +11,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -# FIXME: mlflow 2.6.0 has import issues related to pydantic -# https://github.com/mlflow/mlflow/issues/9331 -- mlflow<2.6 +- mlflow - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index 17aff1176..b1525afb2 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -11,9 +11,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -# FIXME: mlflow 2.6.0 has import issues related to pydantic -# https://github.com/mlflow/mlflow/issues/9331 -- mlflow<2.6 +- mlflow - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/environment-3.12.yaml b/continuous_integration/environment-3.12.yaml index 08f3da6d7..6462e6e22 100644 --- a/continuous_integration/environment-3.12.yaml +++ b/continuous_integration/environment-3.12.yaml @@ -11,9 +11,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -# FIXME: mlflow 2.6.0 has import issues related to pydantic -# https://github.com/mlflow/mlflow/issues/9331 -- mlflow<2.6 +- mlflow - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index 1c4a76b57..d06e943d7 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -11,9 +11,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -# FIXME: mlflow 2.6.0 has import issues related to pydantic -# https://github.com/mlflow/mlflow/issues/9331 -- mlflow<2.6 +- mlflow - mock - numpy=1.21.6 - pandas=1.4.0 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index 370577a54..05bcecfce 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -16,9 +16,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -# FIXME: mlflow 2.6.0 has import issues related to pydantic -# https://github.com/mlflow/mlflow/issues/9331 -- mlflow<2.6 +- mlflow - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 75f44c29b..d386b9ead 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -16,9 +16,7 @@ dependencies: - jsonschema - lightgbm - maturin=1.3 -# FIXME: mlflow 2.6.0 has import issues related to pydantic -# https://github.com/mlflow/mlflow/issues/9331 -- mlflow<2.6 +- mlflow - mock - numpy=1.21.6 - pandas=1.4.0 From cc7f20951a3746fbe710d2961ae8ebd7bc7b674b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 11:36:24 -0800 Subject: [PATCH 24/30] Set min version for mlflow --- continuous_integration/environment-3.10.yaml | 2 +- continuous_integration/environment-3.11.yaml | 2 +- continuous_integration/environment-3.12.yaml | 2 +- continuous_integration/environment-3.9.yaml | 4 ++-- continuous_integration/gpuci/environment-3.10.yaml | 2 +- continuous_integration/gpuci/environment-3.9.yaml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index 92e939120..e77fb05ef 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -11,7 +11,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -- mlflow +- mlflow>=2.0 - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index b1525afb2..f873e505b 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -11,7 +11,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -- mlflow +- mlflow>=2.0 - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/environment-3.12.yaml b/continuous_integration/environment-3.12.yaml index 6462e6e22..a453149f4 100644 --- a/continuous_integration/environment-3.12.yaml +++ b/continuous_integration/environment-3.12.yaml @@ -11,7 +11,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -- mlflow +- mlflow>=2.0 - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml index d06e943d7..a627318c1 100644 --- a/continuous_integration/environment-3.9.yaml +++ b/continuous_integration/environment-3.9.yaml @@ -10,8 +10,8 @@ dependencies: - intake=0.6.0 - jsonschema - lightgbm -- maturin>=1.3,<1.4 -- mlflow +- maturin=1.3 +- mlflow=2.0 - mock - numpy=1.21.6 - pandas=1.4.0 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index 05bcecfce..baed484b2 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -16,7 +16,7 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -- mlflow +- mlflow>=2.0 - mock - numpy>=1.21.6 - pandas>=1.4.0 diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index d386b9ead..8a8b9387c 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -16,7 +16,7 @@ dependencies: - jsonschema - lightgbm - maturin=1.3 -- mlflow +- mlflow=2.0 - mock - numpy=1.21.6 - pandas=1.4.0 From 88ccaaa18b1acaa115703337bb391b14ea264440 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 12:06:14 -0800 Subject: [PATCH 25/30] Drop mlflow from 3.12 tests for now --- continuous_integration/environment-3.12.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/continuous_integration/environment-3.12.yaml b/continuous_integration/environment-3.12.yaml index a453149f4..2403ba8ef 100644 --- a/continuous_integration/environment-3.12.yaml +++ b/continuous_integration/environment-3.12.yaml @@ -11,7 +11,8 @@ dependencies: - jsonschema - lightgbm - maturin>=1.3,<1.4 -- mlflow>=2.0 +# TODO: add once mlflow 3.12 builds are available +# - mlflow>=2.0 - mock - numpy>=1.21.6 - pandas>=1.4.0 From 0eeb3389bbb1a9a707e4a83aad66ef64e64521ad Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 8 Jan 2024 12:29:03 -0800 Subject: [PATCH 26/30] Relocate docker/server files to continuous_integration --- .github/workflows/docker.yml | 6 +++--- {docker => continuous_integration/docker}/cloud.dockerfile | 0 {docker => continuous_integration/docker}/conda.txt | 0 {docker => continuous_integration/docker}/main.dockerfile | 4 ++-- .../scripts}/startup_script.py | 0 docs/source/server.rst | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) rename {docker => continuous_integration/docker}/cloud.dockerfile (100%) rename {docker => continuous_integration/docker}/conda.txt (100%) rename {docker => continuous_integration/docker}/main.dockerfile (90%) rename {scripts => continuous_integration/scripts}/startup_script.py (100%) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 658dc93e6..8b59c589f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -11,7 +11,7 @@ on: - Cargo.toml - Cargo.lock - pyproject.toml - - docker/** + - continuous_integration/docker/** - .github/workflows/docker.yml # When this workflow is queued, automatically cancel any previous running @@ -47,7 +47,7 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: ./docker/main.dockerfile + file: ./continuous_integration/docker/main.dockerfile build-args: DOCKER_META_VERSION=${{ steps.docker_meta_main.outputs.version }} platforms: ${{ matrix.platform }} tags: ${{ steps.docker_meta_main.outputs.tags }} @@ -68,7 +68,7 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: ./docker/cloud.dockerfile + file: ./continuous_integration/docker/cloud.dockerfile build-args: DOCKER_META_VERSION=${{ steps.docker_meta_main.outputs.version }} platforms: ${{ matrix.platform }} tags: ${{ steps.docker_meta_cloud.outputs.tags }} diff --git a/docker/cloud.dockerfile b/continuous_integration/docker/cloud.dockerfile similarity index 100% rename from docker/cloud.dockerfile rename to continuous_integration/docker/cloud.dockerfile diff --git a/docker/conda.txt b/continuous_integration/docker/conda.txt similarity index 100% rename from docker/conda.txt rename to continuous_integration/docker/conda.txt diff --git a/docker/main.dockerfile b/continuous_integration/docker/main.dockerfile similarity index 90% rename from docker/main.dockerfile rename to continuous_integration/docker/main.dockerfile index 98fb970b4..dfe2ed282 100644 --- a/docker/main.dockerfile +++ b/continuous_integration/docker/main.dockerfile @@ -11,7 +11,7 @@ RUN sh /rustup-init.sh -y --default-toolchain=stable --profile=minimal \ ENV PATH="/root/.cargo/bin:${PATH}" # Install conda dependencies for dask-sql -COPY docker/conda.txt /opt/dask_sql/ +COPY continuous_integration/docker/conda.txt /opt/dask_sql/ RUN mamba install -y \ # build requirements "maturin>=1.3,<1.4" \ @@ -44,7 +44,7 @@ RUN cd /opt/dask_sql/ \ && CONDA_PREFIX="/opt/conda/" maturin develop # Set the script to execute -COPY scripts/startup_script.py /opt/dask_sql/startup_script.py +COPY continuous_integration/scripts/startup_script.py /opt/dask_sql/startup_script.py EXPOSE 8080 ENTRYPOINT [ "/usr/bin/prepare.sh", "/opt/conda/bin/python", "/opt/dask_sql/startup_script.py" ] diff --git a/scripts/startup_script.py b/continuous_integration/scripts/startup_script.py similarity index 100% rename from scripts/startup_script.py rename to continuous_integration/scripts/startup_script.py diff --git a/docs/source/server.rst b/docs/source/server.rst index 70ad902e9..5e26b04c6 100644 --- a/docs/source/server.rst +++ b/docs/source/server.rst @@ -121,7 +121,7 @@ To run a standalone SQL server in your ``dask`` cluster, follow these three step FROM nbraun/dask-sql - COPY startup_script.py /opt/dask_sql/startup_script.py + COPY continuous_integration/docker/startup_script.py /opt/dask_sql/startup_script.py ENTRYPOINT [ "/opt/conda/bin/python", "/opt/dask_sql/startup_script.py" ] From 5d501c4570f024cd7a1b4598ee532b6cbf358190 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 9 Jan 2024 05:57:03 -0800 Subject: [PATCH 27/30] Unpin dask/distributed --- .github/workflows/test.yml | 3 +-- continuous_integration/docker/conda.txt | 2 +- continuous_integration/docker/main.dockerfile | 2 +- continuous_integration/environment-3.10.yaml | 2 +- continuous_integration/environment-3.11.yaml | 2 +- continuous_integration/environment-3.12.yaml | 2 +- continuous_integration/gpuci/environment-3.10.yaml | 11 +++++------ continuous_integration/gpuci/environment-3.9.yaml | 9 ++++----- continuous_integration/recipe/meta.yaml | 2 +- docs/environment.yml | 2 +- docs/requirements-docs.txt | 2 +- pyproject.toml | 4 ++-- 12 files changed, 20 insertions(+), 23 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index be3a0199b..ef1398881 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -74,8 +74,7 @@ jobs: run: | maturin develop - name: Install hive testing dependencies - if: | - matrix.os == 'ubuntu-latest' + if: matrix.os == 'ubuntu-latest' run: | docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 diff --git a/continuous_integration/docker/conda.txt b/continuous_integration/docker/conda.txt index 1687d0d50..270c2febd 100644 --- a/continuous_integration/docker/conda.txt +++ b/continuous_integration/docker/conda.txt @@ -1,5 +1,5 @@ python>=3.9 -dask>=2022.3.0,<=2023.11.0 +dask>=2022.3.0 pandas>=1.4.0 jpype1>=1.0.2 openjdk>=8 diff --git a/continuous_integration/docker/main.dockerfile b/continuous_integration/docker/main.dockerfile index dfe2ed282..78cd46938 100644 --- a/continuous_integration/docker/main.dockerfile +++ b/continuous_integration/docker/main.dockerfile @@ -16,7 +16,7 @@ RUN mamba install -y \ # build requirements "maturin>=1.3,<1.4" \ # core dependencies - "dask>=2022.3.0,<=2023.11.0" \ + "dask>=2022.3.0" \ "pandas>=1.4.0" \ "fastapi>=0.92.0" \ "httpx>=0.24.1" \ diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml index e77fb05ef..b0557a915 100644 --- a/continuous_integration/environment-3.10.yaml +++ b/continuous_integration/environment-3.10.yaml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - c-compiler -- dask>=2022.3.0,<=2023.11.0 +- dask>=2022.3.0 - fastapi>=0.92.0 - fugue>=0.7.3 - httpx>=0.24.1 diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index f873e505b..1bcf46d45 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - c-compiler -- dask>=2022.3.0,<=2023.11.0 +- dask>=2022.3.0 - fastapi>=0.92.0 - fugue>=0.7.3 - httpx>=0.24.1 diff --git a/continuous_integration/environment-3.12.yaml b/continuous_integration/environment-3.12.yaml index 2403ba8ef..18a67409b 100644 --- a/continuous_integration/environment-3.12.yaml +++ b/continuous_integration/environment-3.12.yaml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - c-compiler -- dask>=2022.3.0,<=2023.11.0 +- dask>=2022.3.0 - fastapi>=0.92.0 - fugue>=0.7.3 - httpx>=0.24.1 diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index baed484b2..2420e949f 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -8,7 +8,7 @@ channels: dependencies: - c-compiler - zlib -- dask>=2022.3.0,<=2023.11.0 +- dask>=2022.3.0 - fastapi>=0.92.0 - fugue>=0.7.3 - httpx>=0.24.1 @@ -49,8 +49,7 @@ dependencies: - ucx-py=0.36 - xgboost=*=rapidsai_py* - libxgboost=*=rapidsai_h* -# TODO: unpin after RAPIDS 24.02 release -# - pip -# - pip: -# - git+https://github.com/dask/dask -# - git+https://github.com/dask/distributed +- pip +- pip: + - git+https://github.com/dask/dask + - git+https://github.com/dask/distributed diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 8a8b9387c..c7eceb01a 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -49,8 +49,7 @@ dependencies: - ucx-py=0.36 - xgboost=*=rapidsai_py* - libxgboost=*=rapidsai_h* -# TODO: unpin after RAPIDS 24.02 release -# - pip -# - pip: -# - git+https://github.com/dask/dask -# - git+https://github.com/dask/distributed +- pip +- pip: + - git+https://github.com/dask/dask + - git+https://github.com/dask/distributed diff --git a/continuous_integration/recipe/meta.yaml b/continuous_integration/recipe/meta.yaml index 16c943c38..60a5aa299 100644 --- a/continuous_integration/recipe/meta.yaml +++ b/continuous_integration/recipe/meta.yaml @@ -32,7 +32,7 @@ requirements: - xz # [linux64] run: - python - - dask >=2022.3.0,<=2023.11.0 + - dask >=2022.3.0 - pandas >=1.4.0 - fastapi >=0.92.0 - httpx >=0.24.1 diff --git a/docs/environment.yml b/docs/environment.yml index 7ca05ffcf..2d0e08ba0 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -6,7 +6,7 @@ dependencies: - sphinx>=4.0.0 - sphinx-tabs - dask-sphinx-theme>=2.0.3 - - dask>=2022.3.0,<=2023.11.0 + - dask>=2022.3.0 - pandas>=1.4.0 - fugue>=0.7.3 # FIXME: https://github.com/fugue-project/fugue/issues/526 diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt index ed931135a..1f2052a92 100644 --- a/docs/requirements-docs.txt +++ b/docs/requirements-docs.txt @@ -1,7 +1,7 @@ sphinx>=4.0.0 sphinx-tabs dask-sphinx-theme>=3.0.0 -dask>=2022.3.0,<=2023.11.0 +dask>=2022.3.0 pandas>=1.4.0 fugue>=0.7.3 # FIXME: https://github.com/fugue-project/fugue/issues/526 diff --git a/pyproject.toml b/pyproject.toml index 2dba6bee4..67b1dd606 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,8 @@ classifiers = [ readme = "README.md" requires-python = ">=3.9" dependencies = [ - "dask[dataframe]>=2022.3.0,<=2023.11.0", - "distributed>=2022.3.0,<=2023.11.0", + "dask[dataframe]>=2022.3.0", + "distributed>=2022.3.0", "pandas>=1.4.0", "fastapi>=0.92.0", "httpx>=0.24.1", From bdcb0e3315e5126a877df1a2cc252f4f6cb51e26 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 9 Jan 2024 06:52:52 -0800 Subject: [PATCH 28/30] unpin 3.9 gpu environment --- .../gpuci/environment-3.9.yaml | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index c7eceb01a..60e870bc2 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -8,37 +8,37 @@ channels: dependencies: - c-compiler - zlib -- dask=2022.3.0 -- fastapi=0.92.0 -- fugue=0.7.3 -- httpx=0.24.1 -- intake=0.6.0 +- dask>=2022.3.0 +- fastapi>=0.92.0 +- fugue>=0.7.3 +- httpx>=0.24.1 +- intake>=0.6.0 - jsonschema - lightgbm -- maturin=1.3 -- mlflow=2.0 +- maturin>=1.3,<1.4 +- mlflow>=2.0 - mock -- numpy=1.21.6 -- pandas=1.4.0 +- numpy>=1.21.6 +- pandas>=1.4.0 - pre-commit -- prompt_toolkit=3.0.8 +- prompt_toolkit>=3.0.8 - psycopg2 -- pyarrow=6.0.2 -- pygments=2.7.1 +- pyarrow>=6.0.2 +- pygments>=2.7.1 - pyhive - pytest-cov - pytest-rerunfailures - pytest-xdist - pytest - python=3.9 -- scikit-learn=1.0.0 +- scikit-learn>=1.0.0 - sphinx - sqlalchemy<2 -- tpot=0.12.0 +- tpot>=0.12.0 # FIXME: https://github.com/fugue-project/fugue/issues/526 - triad<0.9.2 -- tzlocal=2.1 -- uvicorn=0.13.4 +- tzlocal>=2.1 +- uvicorn>=0.13.4 # GPU-specific requirements - cudatoolkit=11.5 - cudf=24.02 @@ -49,7 +49,8 @@ dependencies: - ucx-py=0.36 - xgboost=*=rapidsai_py* - libxgboost=*=rapidsai_h* -- pip -- pip: - - git+https://github.com/dask/dask - - git+https://github.com/dask/distributed +# TODO: unpin after RAPIDS 24.02 release +# - pip +# - pip: +# - git+https://github.com/dask/dask +# - git+https://github.com/dask/distributed From 78df128317ee308e9afce66fdd3dea9883bbd75e Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 9 Jan 2024 06:57:34 -0800 Subject: [PATCH 29/30] add 3.12 to classifiers --- pyproject.toml | 1 + tests/integration/test_hive.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 67b1dd606..75ec4519f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", "Topic :: System :: Distributed Computing", ] diff --git a/tests/integration/test_hive.py b/tests/integration/test_hive.py index 042b6f1a0..1a86082c1 100644 --- a/tests/integration/test_hive.py +++ b/tests/integration/test_hive.py @@ -1,4 +1,5 @@ import shutil +import sys import tempfile import time @@ -9,7 +10,7 @@ from tests.utils import assert_eq pytestmark = pytest.mark.xfail( - condition="sys.platform in ('win32', 'darwin')", + condition=sys.platform in ("win32", "darwin"), reason="hive testing not supported on Windows/macOS", ) docker = pytest.importorskip("docker") From ae174c178303bc7d82bb0000ad15c130a3cc8717 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 9 Jan 2024 07:21:52 -0800 Subject: [PATCH 30/30] unpin dask in gpuci 3.9 --- continuous_integration/gpuci/environment-3.9.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 60e870bc2..f88cf57c7 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -49,8 +49,7 @@ dependencies: - ucx-py=0.36 - xgboost=*=rapidsai_py* - libxgboost=*=rapidsai_h* -# TODO: unpin after RAPIDS 24.02 release -# - pip -# - pip: -# - git+https://github.com/dask/dask -# - git+https://github.com/dask/distributed +- pip +- pip: + - git+https://github.com/dask/dask + - git+https://github.com/dask/distributed