Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
23603de
Add basic predicate-pushdown optimization (#433)
rjzamora Mar 25, 2022
09c7bdf
Add workflow to keep datafusion dev branch up to date (#440)
charlesbluca Mar 25, 2022
1b0b6f7
Update gpuCI `RAPIDS_VER` to `22.06` (#434)
github-actions[bot] Apr 1, 2022
a05138d
Bump black to 22.3.0 (#443)
charlesbluca Apr 4, 2022
ab2aa5a
Check for ucx-py nightlies when updating gpuCI (#441)
charlesbluca Apr 5, 2022
a28f757
Add handling for newer `prompt_toolkit` versions in cmd tests (#447)
charlesbluca Apr 6, 2022
486fc66
Fix version for gha-find-replace (#446)
charlesbluca Apr 6, 2022
ce176e0
Update versions of Java dependencies (#445)
ayushdg Apr 7, 2022
50d95d2
Update jackson databind version (#449)
ayushdg Apr 7, 2022
37a3a61
Disable SQL server functionality (#448)
charlesbluca Apr 7, 2022
ffdc42f
Update dask pinnings for release (#450)
charlesbluca Apr 7, 2022
fa74aef
Add Java source code to source distribution (#451)
charlesbluca Apr 7, 2022
37ea6b6
Bump `httpclient` dependency (#453)
charlesbluca Apr 8, 2022
f19ee4d
Unpin Dask/distributed versions (#452)
charlesbluca Apr 11, 2022
1eb30c1
Add jsonschema to ci testing (#454)
ayushdg Apr 11, 2022
2bd1d18
Switch tests from `pd.testing.assert_frame_equal` to `dd.assert_eq` (…
charlesbluca Apr 11, 2022
95b0dd0
Set max pin on antlr4-python-runtime (#456)
ayushdg Apr 12, 2022
031c04c
Move / minimize number of cudf / dask-cudf imports (#480)
charlesbluca Apr 19, 2022
48eb983
Use `map_partitions` to compute LIMIT / OFFSET (#517)
charlesbluca May 13, 2022
7b4bc55
Use `dev` images for independent cluster testing (#518)
charlesbluca May 16, 2022
b58989f
Add documentation for FugueSQL integrations (#523)
charlesbluca May 16, 2022
cb3d903
Timestampdiff support (#495)
ayushdg May 17, 2022
8ec3ed5
Relax jsonschema testing dependency (#546)
charlesbluca May 20, 2022
ff4a8a5
Update upstream testing workflows (#536)
charlesbluca May 23, 2022
cb55c07
Fix pyarrow / cloudpickle failures in cluster testing (#553)
charlesbluca May 24, 2022
d8302e9
Use bash -l as default entrypoint for all jobs (#552)
charlesbluca May 24, 2022
0d0394a
Constrain dask/distributed for release (#563)
charlesbluca Jun 3, 2022
1e881ee
Unpin dask/distributed for development (#564)
charlesbluca Jun 3, 2022
243c809
update dask-sphinx-theme (#567)
scharlottej13 Jun 6, 2022
463ee3e
Introduce subquery.py to handle subquery expressions
jdye64 Jun 6, 2022
d5ded60
update ordering
jdye64 Jun 7, 2022
ec3d5da
Make sure scheduler has Dask nightlies in upstream cluster testing (#…
charlesbluca Jun 7, 2022
c19315a
Update gpuCI `RAPIDS_VER` to `22.08` (#565)
github-actions[bot] Jun 7, 2022
4832cff
Merge remote-tracking branch 'upstream/datafusion-sql-planner' into d…
jdye64 Jun 8, 2022
0ea3129
updates
jdye64 Jun 8, 2022
333d255
Merge with upstream/main after release
jdye64 Jun 13, 2022
39d66b5
Remove startswith function merged by mistake
jdye64 Jun 13, 2022
5256e55
[REVIEW] - Remove instance that are meant for the currently removed t…
jdye64 Jun 13, 2022
8f871f8
Merge remote-tracking branch 'upstream/datafusion-sql-planner' into d…
jdye64 Jun 14, 2022
416ef68
Merge remote-tracking branch 'upstream/datafusion-sql-planner' into d…
jdye64 Jun 14, 2022
bc1cadc
Modify test environment pinnings to cover minimum versions (#555)
charlesbluca Jun 15, 2022
0db4506
Don't move jar to local mvn repo (#579)
ksonj Jun 15, 2022
eb09e26
Add tests for intersection
jdye64 Jun 15, 2022
1ddac23
Add tests for intersection
jdye64 Jun 15, 2022
d899304
Add another intersection test, even more simple but for testing raw i…
jdye64 Jun 16, 2022
dbdc8c0
Use Timedelta when doing ReduceOperation(s) against datetime64 dtypes
jdye64 Jun 16, 2022
0e57dd4
Cleanup
jdye64 Jun 16, 2022
f033c56
Use an either/or strategy for converting to Timedelta objects
jdye64 Jun 16, 2022
0d48766
Support more than 2 operands for Timedelta conversions
jdye64 Jun 16, 2022
62703b5
Merge with datafusion-filter to address dtype issues
jdye64 Jun 17, 2022
1bdb97a
fix merge issues, is_frame() function of call.py was removed accident…
jdye64 Jun 17, 2022
fa418c5
Remove pytest that was testing Calcite exception messages. Calcite is…
jdye64 Jun 17, 2022
6ed2836
comment out gpu tests, will be enabled in datafusion-filter PR
jdye64 Jun 17, 2022
0fca3b9
Merge remote-tracking branch 'upstream/main' into datafusion-upstream…
charlesbluca Jun 17, 2022
77fa8df
Don't check dtype for failing test
charlesbluca Jun 17, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/cluster-upstream.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Docker-compose setup used during tests
version: '3'
services:
dask-scheduler:
container_name: dask-scheduler
image: daskdev/dask:dev-py3.9
command: dask-scheduler
environment:
USE_MAMBA: "true"
EXTRA_CONDA_PACKAGES: "dask/label/dev::dask cloudpickle>=2.1.0"
ports:
- "8786:8786"
dask-worker:
container_name: dask-worker
image: daskdev/dask:dev-py3.9
command: dask-worker dask-scheduler:8786
environment:
USE_MAMBA: "true"
EXTRA_CONDA_PACKAGES: "dask/label/dev::dask cloudpickle>=2.1.0 pyarrow>=3.0.0 libstdcxx-ng>=12.1.0"
volumes:
- /tmp:/tmp
6 changes: 3 additions & 3 deletions .github/docker-compose.yaml → .github/cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ version: '3'
services:
dask-scheduler:
container_name: dask-scheduler
image: daskdev/dask:dev
image: daskdev/dask:dev-py3.9
command: dask-scheduler
environment:
USE_MAMBA: "true"
Expand All @@ -12,10 +12,10 @@ services:
- "8786:8786"
dask-worker:
container_name: dask-worker
image: daskdev/dask:dev
image: daskdev/dask:dev-py3.9
command: dask-worker dask-scheduler:8786
environment:
USE_MAMBA: "true"
EXTRA_CONDA_PACKAGES: "pyarrow>=4.0.0" # required for parquet IO
EXTRA_CONDA_PACKAGES: "cloudpickle>=2.1.0 pyarrow>=3.0.0 libstdcxx-ng>=12.1.0"
volumes:
- /tmp:/tmp
30 changes: 30 additions & 0 deletions .github/workflows/datafusion-sync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Keep datafusion branch up to date
on:
push:
branches:
- main

# When this workflow is queued, automatically cancel any previous running
# or pending jobs
concurrency:
group: datafusion-sync
cancel-in-progress: true

jobs:
sync-branches:
runs-on: ubuntu-latest
if: github.repository == 'dask-contrib/dask-sql'
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Node
uses: actions/setup-node@v2
with:
node-version: 12
- name: Opening pull request
id: pull
uses: tretuna/[email protected]
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
FROM_BRANCH: main
TO_BRANCH: datafusion-sql-planner
107 changes: 100 additions & 7 deletions .github/workflows/test-upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ on:
- cron: "0 0 * * *" # Daily “At 00:00” UTC
workflow_dispatch: # allows you to trigger the workflow run manually

# Required shell entrypoint to have properly activated conda environments
defaults:
run:
shell: bash -l {0}

jobs:
test-dev:
name: "Test upstream dev (${{ matrix.os }}, python: ${{ matrix.python }})"
Expand All @@ -29,6 +34,7 @@ jobs:
use-mamba: true
python-version: ${{ matrix.python }}
channel-priority: strict
channels: dask/label/dev,conda-forge,nodefaults
activate-environment: dask-sql
environment-file: ${{ env.CONDA_FILE }}
- name: Install hive testing dependencies for Linux
Expand All @@ -39,23 +45,110 @@ jobs:
docker pull bde2020/hive-metastore-postgresql:2.3.0
- name: Install upstream dev Dask / dask-ml
run: |
python -m pip install --no-deps git+https://github.com/dask/dask
python -m pip install --no-deps git+https://github.com/dask/distributed
mamba update dask
python -m pip install --no-deps git+https://github.com/dask/dask-ml
- name: Test with pytest
run: |
pytest --junitxml=junit/test-results.xml --cov-report=xml -n auto tests --dist loadfile

cluster-dev:
name: "Test upstream dev in a dask cluster"
needs: build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
miniforge-variant: Mambaforge
use-mamba: true
python-version: "3.9"
channel-priority: strict
channels: dask/label/dev,conda-forge,nodefaults
activate-environment: dask-sql
environment-file: continuous_integration/environment-3.9-jdk11-dev.yaml
- name: Download the pre-build jar
uses: actions/download-artifact@v1
with:
name: jar
path: dask_sql/jar/
- name: Install cluster dependencies
run: |
mamba install python-blosc lz4 -c conda-forge

which python
pip list
mamba list
- name: Install upstream dev dask-ml
run: |
mamba update dask
python -m pip install --no-deps git+https://github.com/dask/dask-ml
- name: run a dask cluster
run: |
docker-compose -f .github/cluster-upstream.yml up -d

# periodically ping logs until a connection has been established; assume failure after 2 minutes
timeout 2m bash -c 'until docker logs dask-worker 2>&1 | grep -q "Starting established connection"; do sleep 1; done'

docker logs dask-scheduler
docker logs dask-worker
- name: Test with pytest while running an independent dask cluster
run: |
DASK_SQL_TEST_SCHEDULER="tcp://127.0.0.1:8786" pytest --junitxml=junit/test-cluster-results.xml --cov-report=xml -n auto tests --dist loadfile

import-dev:
name: "Test importing with bare requirements and upstream dev"
needs: build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-v1-jdk11-${{ hashFiles('**/pom.xml') }}
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
with:
python-version: "3.8"
mamba-version: "*"
channels: dask/label/dev,conda-forge,nodefaults
channel-priority: strict
- name: Download the pre-build jar
uses: actions/download-artifact@v1
with:
name: jar
path: dask_sql/jar/
- name: Install upstream dev Dask / dask-ml
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
mamba update dask
python -m pip install --no-deps git+https://github.com/dask/dask-ml
- name: Install dependencies and nothing else
run: |
pip install -e .

which python
pip list
mamba list
- name: Try to import dask-sql
run: |
python -c "import dask_sql; print('ok')"

report-failures:
name: Open issue for upstream dev failures
needs: test-dev
needs: [test-dev, cluster-dev]
if: |
always()
&& needs.test-dev.result == 'failure'
&& (
needs.test-dev.result == 'failure' || needs.cluster-dev.result == 'failure'
)
runs-on: ubuntu-latest
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v2
- name: Report failures
Expand Down
34 changes: 17 additions & 17 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
use-mamba: true
python-version: ${{ matrix.python }}
channel-priority: strict
channels: ${{ needs.detect-ci-trigger.outputs.triggered == 'true' && 'dask/label/dev,conda-forge,nodefaults' || 'conda-forge,nodefaults' }}
activate-environment: dask-sql
environment-file: ${{ env.CONDA_FILE }}
- name: Setup Rust Toolchain
Expand All @@ -77,8 +78,7 @@ jobs:
- name: Optionally install upstream dev Dask / dask-ml
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
python -m pip install --no-deps git+https://github.com/dask/dask
python -m pip install --no-deps git+https://github.com/dask/distributed
mamba update dask
python -m pip install --no-deps git+https://github.com/dask/dask-ml
- name: Test with pytest
run: |
Expand Down Expand Up @@ -107,10 +107,11 @@ jobs:
with:
miniforge-variant: Mambaforge
use-mamba: true
python-version: "3.8"
python-version: "3.9"
channel-priority: strict
channels: ${{ needs.detect-ci-trigger.outputs.triggered == 'true' && 'dask/label/dev,conda-forge,nodefaults' || 'conda-forge,nodefaults' }}
activate-environment: dask-sql
environment-file: continuous_integration/environment-3.8-dev.yaml
environment-file: continuous_integration/environment-3.9-dev.yaml
- name: Setup Rust Toolchain
uses: actions-rs/toolchain@v1
id: rust-toolchain
Expand All @@ -127,18 +128,23 @@ jobs:
which python
pip list
mamba list
- name: Optionally install upstream dev Dask / dask-ml
- name: Optionally install upstream dev dask-ml
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
python -m pip install --no-deps git+https://github.com/dask/dask
python -m pip install --no-deps git+https://github.com/dask/distributed
mamba update dask
python -m pip install --no-deps git+https://github.com/dask/dask-ml
- name: run a dask cluster
env:
UPSTREAM: ${{ needs.detect-ci-trigger.outputs.triggered }}
run: |
docker-compose -f .github/docker-compose.yaml up -d
if [[ $UPSTREAM == "true" ]]; then
docker-compose -f .github/cluster-upstream.yml up -d
else
docker-compose -f .github/cluster.yml up -d
fi

# Wait for installation
sleep 40
# periodically ping logs until a connection has been established; assume failure after 2 minutes
timeout 2m bash -c 'until docker logs dask-worker 2>&1 | grep -q "Starting established connection"; do sleep 1; done'

docker logs dask-scheduler
docker logs dask-worker
Expand All @@ -157,7 +163,7 @@ jobs:
with:
python-version: "3.8"
mamba-version: "*"
channels: conda-forge,defaults
channels: ${{ needs.detect-ci-trigger.outputs.triggered == 'true' && 'dask/label/dev,conda-forge,nodefaults' || 'conda-forge,nodefaults' }}
channel-priority: strict
- name: Install dependencies and nothing else
run: |
Expand All @@ -167,12 +173,6 @@ jobs:
which python
pip list
mamba list
- name: Optionally install upstream dev Dask / dask-ml
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
python -m pip install --no-deps git+https://github.com/dask/dask
python -m pip install --no-deps git+https://github.com/dask/distributed
python -m pip install --no-deps git+https://github.com/dask/dask-ml
- name: Try to import dask-sql
run: |
python -c "import dask_sql; print('ok')"
57 changes: 29 additions & 28 deletions continuous_integration/environment-3.10-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,41 @@ channels:
- conda-forge
- nodefaults
dependencies:
- adagio>=0.2.3
- antlr4-python3-runtime>=4.9.2, <4.10.0 # Remove max pin after qpd(fugue dependency) updates their conda recipe
- black=22.3.0
- ciso8601>=2.2.0
- dask-ml>=2022.1.22
- dask>=2022.3.0
- fastapi>=0.61.1
- fs>=2.4.11
- fastapi>=0.69.0
- intake>=0.6.0
- isort=5.7.0
- jsonschema>=4.4.0
- lightgbm>=3.2.1
- mlflow>=1.19.0
- mock>=4.0.3
- nest-asyncio>=1.4.3
- pandas>=1.0.0 # below 1.0, there were no nullable ext. types
- pip=20.2.4
- pre-commit>=2.11.1
- prompt_toolkit>=3.0.8
- psycopg2>=2.9.1
- pygments>=2.7.1
- pyhive>=0.6.4
- pytest-cov>=2.10.1
- jsonschema
- lightgbm
- maturin>=0.12.8
- mlflow
- mock
- nest-asyncio
- pandas>=1.1.2
- pre-commit
- prompt_toolkit
- psycopg2
- pyarrow>=3.0.0
- pygments
- pyhive
- pytest-cov
- pytest-xdist
- pytest>=6.0.1
- pytest
- python=3.10
- scikit-learn>=0.24.2
- sphinx>=3.2.1
- tpot>=0.11.7
- triad>=0.5.4
- rust>=1.60.0
- scikit-learn>=1.0.0
- setuptools-rust>=1.1.2
- sphinx
- tpot
- tzlocal>=2.1
- uvicorn>=0.11.3
- maturin>=0.12.8
- setuptools-rust>=1.1.2
- rust>=1.60.0
# fugue dependencies; remove when we conda install fugue
- adagio
- antlr4-python3-runtime<4.10
- ciso8601
- fs
- pip
- qpd
- triad
- pip:
- fugue[sql]>=0.5.3
Loading