diff --git a/dask_planner/.cargo/config.toml b/.cargo/config.toml similarity index 100% rename from dask_planner/.cargo/config.toml rename to .cargo/config.toml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 527d01fa2..1ff63a673 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,4 +2,7 @@ * @ayushdg @charlesbluca @galipremsagar # rust codeowners -dask_planner/ @ayushdg @charlesbluca @galipremsagar @jdye64 +.cargo/ @ayushdg @charlesbluca @galipremsagar @jdye64 +src/ @ayushdg @charlesbluca @galipremsagar @jdye64 +Cargo.toml @ayushdg @charlesbluca @galipremsagar @jdye64 +Cargo.lock @ayushdg @charlesbluca @galipremsagar @jdye64 diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 20294124a..833f37293 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -6,10 +6,9 @@ on: pull_request: paths: - setup.py - - dask_planner/Cargo.toml - - dask_planner/Cargo.lock - - dask_planner/pyproject.toml - - dask_planner/rust-toolchain.toml + - Cargo.toml + - Cargo.lock + - pyproject.toml - continuous_integration/recipe/** - .github/workflows/conda.yml schedule: @@ -34,7 +33,9 @@ jobs: fail-fast: false matrix: python: ["3.8", "3.9", "3.10"] - arch: ["linux-64", "linux-aarch64"] + # FIXME: aarch64 builds are consuming too much memory to run on GHA + # arch: ["linux-64", "linux-aarch64"] + arch: ["linux-64"] steps: - uses: actions/checkout@v3 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3795b4823..6d63f6373 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,83 +15,135 @@ concurrency: env: upload: ${{ github.event_name == 'release' && github.repository == 'dask-contrib/dask-sql' }} -# Required shell entrypoint to have properly activated conda environments -defaults: - run: - shell: bash -l {0} - jobs: - wheels: - name: Build and publish py3.${{ matrix.python }} wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} + linux: + name: Build and publish wheels for linux ${{ matrix.target }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python: ["8", "9", "10"] # 3.x + target: [x86_64, aarch64] steps: - uses: actions/checkout@v3 + - name: Install Protoc + uses: arduino/setup-protoc@v1 + if: matrix.target == 'aarch64' + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Build wheels for x86_64 + if: matrix.target == 'x86_64' + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist + sccache: 'true' + manylinux: '2_17' + before-script-linux: > + DOWNLOAD_URL=$(curl --retry 6 --retry-delay 10 -s https://api.github.com/repos/protocolbuffers/protobuf/releases/latest | grep -o '"browser_download_url": "[^"]*' | cut -d'"' -f4 | grep "\linux-x86_64.zip$") && + curl --retry 6 --retry-delay 10 -LO $DOWNLOAD_URL && + unzip protoc-*-linux-x86_64.zip -d $HOME/.local + docker-options: --env PROTOC=/root/.local/bin/protoc + - name: Build wheels for aarch64 + if: matrix.target == 'aarch64' + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --zig + sccache: 'true' + manylinux: '2_17' + - name: Check dist files + run: | + pip install twine + + twine check dist/* + ls -lh dist/ + - name: Upload binary wheels + uses: actions/upload-artifact@v3 with: - fetch-depth: 0 + name: wheels for linux ${{ matrix.target }} + path: dist/* + - name: Publish package + if: env.upload == 'true' + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: twine upload dist/* + + windows: + name: Build and publish wheels for windows + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 - name: Install Protoc - if: matrix.os != 'ubuntu-latest' uses: arduino/setup-protoc@v1 with: version: '3.x' repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Set up QEMU for linux-aarch64 - if: matrix.os == 'ubuntu-latest' - uses: docker/setup-qemu-action@v2 + - uses: actions/setup-python@v4 with: - platforms: arm64 - - name: Add rust toolchain target for macos-aarch64 - if: matrix.os == 'macos-latest' - run: rustup target add aarch64-apple-darwin + python-version: '3.10' + architecture: x64 - name: Build wheels - uses: pypa/cibuildwheel@v2.11.3 + uses: PyO3/maturin-action@v1 + with: + target: x64 + args: --release --out dist + sccache: 'true' + - name: Check dist files + run: | + pip install twine + + twine check dist/* + ls dist/ + - name: Upload binary wheels + uses: actions/upload-artifact@v3 + with: + name: wheels for windows + path: dist/* + - name: Publish package + if: env.upload == 'true' env: - CIBW_BUILD: 'cp3${{ matrix.python }}-*' - CIBW_SKIP: '*musllinux*' - CIBW_ARCHS_LINUX: 'aarch64 x86_64' - CIBW_ARCHS_WINDOWS: 'AMD64' - CIBW_ARCHS_MACOS: 'x86_64 arm64' - # Without CARGO_NET_GIT_FETCH_WITH_CLI we oom (https://github.com/rust-lang/cargo/issues/10583) - CIBW_ENVIRONMENT_LINUX: > - CARGO_NET_GIT_FETCH_WITH_CLI="true" - PATH="$HOME/.cargo/bin:$HOME/.local/bin:$PATH" - CIBW_ENVIRONMENT_WINDOWS: 'PATH="$UserProfile\.cargo\bin;$PATH"' - CIBW_BEFORE_BUILD: 'pip install -U setuptools-rust' - CIBW_BEFORE_BUILD_LINUX: > - ARCH=$([ $(uname -m) == x86_64 ] && echo x86_64 || echo aarch_64) && - DOWNLOAD_URL=$(curl --retry 6 --retry-delay 10 -s https://api.github.com/repos/protocolbuffers/protobuf/releases/latest | grep -o '"browser_download_url": "[^"]*' | cut -d'"' -f4 | grep "\linux-${ARCH}.zip$") && - curl --retry 6 --retry-delay 10 -LO $DOWNLOAD_URL && - unzip protoc-*-linux-$ARCH.zip -d $HOME/.local && - protoc --version && - pip install -U setuptools-rust && - pip list && - curl --retry 6 --retry-delay 10 https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=stable --profile=minimal -y && - rustup show + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: twine upload dist/* + + macos: + name: Build and publish wheels for macos ${{ matrix.target }} + runs-on: macos-latest + strategy: + fail-fast: false + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v3 + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + version: '3.x' + repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: actions/setup-python@v4 with: - package-dir: . - output-dir: dist - config-file: "dask_planner/pyproject.toml" - - name: Set up Python - uses: conda-incubator/setup-miniconda@v2.2.0 + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 with: - miniforge-variant: Mambaforge - use-mamba: true - python-version: "3.8" - channel-priority: strict + target: ${{ matrix.target }} + args: --release --out dist + sccache: 'true' - name: Check dist files run: | - mamba install twine + pip install twine twine check dist/* ls -lh dist/ - name: Upload binary wheels uses: actions/upload-artifact@v3 with: - name: wheels for py3.${{ matrix.python }} on ${{ matrix.os }} + name: wheels for macos ${{ matrix.target }} path: dist/* - name: Publish package if: env.upload == 'true' @@ -99,27 +151,23 @@ jobs: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: twine upload dist/* + sdist: - name: Build and publish source distribution runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Build sdist + uses: PyO3/maturin-action@v1 with: - fetch-depth: 0 - - name: Set up Python - uses: conda-incubator/setup-miniconda@v2.2.0 + command: sdist + args: --out dist + - uses: actions/setup-python@v4 with: - miniforge-variant: Mambaforge - use-mamba: true - python-version: "3.8" - channel-priority: strict - - name: Build source distribution - run: | - mamba install setuptools-rust twine - - python setup.py sdist + python-version: '3.10' - name: Check dist files run: | + pip install twine + twine check dist/* ls -lh dist/ - name: Publish source distribution diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7e983172b..a9eeab1ab 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -51,7 +51,6 @@ jobs: - name: Optionally update upstream dependencies if: needs.detect-ci-trigger.outputs.triggered == 'true' run: | - cd dask_planner bash update-dependencies.sh - name: Install Protoc uses: arduino/setup-protoc@v1 @@ -60,11 +59,9 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Check workspace in debug mode run: | - cd dask_planner cargo check - name: Check workspace in release mode run: | - cd dask_planner cargo check --release # test the crate @@ -84,7 +81,6 @@ jobs: - name: Optionally update upstream dependencies if: needs.detect-ci-trigger.outputs.triggered == 'true' run: | - cd dask_planner bash update-dependencies.sh - name: Install Protoc uses: arduino/setup-protoc@v1 @@ -93,5 +89,4 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Run tests run: | - cd dask_planner cargo test diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index 0305a3ab4..e6c30bf1a 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -68,11 +68,10 @@ jobs: - name: Optionally update upstream cargo dependencies if: env.which_upstream == 'DataFusion' run: | - cd dask_planner bash update-dependencies.sh - name: Build the Rust DataFusion bindings run: | - python setup.py build install + maturin develop - name: Install hive testing dependencies if: matrix.os == 'ubuntu-latest' run: | @@ -124,11 +123,9 @@ jobs: env: UPDATE_ALL_CARGO_DEPS: false run: | - cd dask_planner bash update-dependencies.sh - name: Install dependencies and nothing else run: | - mamba install setuptools-rust pip install -e . -vv which python diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e9080cf2f..745cdcfe0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,7 +72,7 @@ jobs: shared-key: test - name: Build the Rust DataFusion bindings run: | - python setup.py build install + maturin develop - name: Install hive testing dependencies if: matrix.os == 'ubuntu-latest' run: | @@ -118,7 +118,6 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Install dependencies and nothing else run: | - mamba install "setuptools-rust>=1.5.2" pip install -e . -vv which python diff --git a/.gitignore b/.gitignore index 245817fc1..d41df8a68 100644 --- a/.gitignore +++ b/.gitignore @@ -46,23 +46,15 @@ venv # IDE .idea .vscode -planner/.classpath -planner/.project -planner/.settings/ -planner/.idea -planner/*.iml *.swp # project specific -planner/dependency-reduced-pom.xml -planner/target/ -dask_sql/jar -.next/ dask-worker-space/ node_modules/ docs/source/_build/ tests/unit/queries tests/unit/data +target/* # Ignore development specific local testing files dev_tests diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ed701014a..094c4ada1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,9 +20,9 @@ repos: rev: v1.0 hooks: - id: cargo-check - args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] + args: ['--manifest-path', './Cargo.toml', '--verbose', '--'] - id: clippy - args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--', '-D', 'warnings'] + args: ['--manifest-path', './Cargo.toml', '--verbose', '--', '-D', 'warnings'] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.2.0 hooks: @@ -39,4 +39,4 @@ repos: entry: cargo +nightly fmt language: system types: [rust] - args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] + args: ['--manifest-path', './Cargo.toml', '--verbose', '--'] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9ab31230f..0f5adc85a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,17 +39,17 @@ DataFusion provides Dask-SQL with key functionality. ### Building Building the Dask-SQL Rust codebase is a straightforward process. If you create and activate the Dask-SQL Conda environment the Rust compiler and all necessary components will be installed for you during that process and therefore requires no further manual setup. -`setuptools-rust` is used by Dask-SQL for building and bundling the resulting Rust binaries. This helps make building and installing the Rust binaries feel much more like a native Python workflow. +`maturin` is used by Dask-SQL for building and bundling the resulting Rust binaries. This helps make building and installing the Rust binaries feel much more like a native Python workflow. -More details about the building setup can be found at [setup.py](setup.py) and searching for `rust_extensions` which is the hook for the Rust code build and inclusion. +More details about the building setup can be found in [pyproject.toml](pyproject.toml) and [Cargo.toml](Cargo.toml) -Note that while `setuptools-rust` is used by CI and should be used during your development cycle, if the need arises to do something more specific that is not yet supported by `setuptools-rust` you can opt to use `cargo` directly from the command line. +Note that while `maturin` is used by CI and should be used during your development cycle, if the need arises to do something more specific that is not yet supported by `maturin` you can opt to use `cargo` directly from the command line. #### Building with Python -Building Dask-SQL is straightforward with Python. To build run ```python setup.py install```. This will build both the Rust and Python codebase and install it into your locally activated conda environment. While not required, if you have updated dependencies for Rust you might prefer a clean build. To clean your setup run ```python setup.py clean``` and then run ```python setup.py install``` +Building Dask-SQL is straightforward with Python. To build run ```pip install .```. This will build both the Rust and Python codebase and install it into your locally activated conda environment; note that if your Rust dependencies have been updated, this command must be rerun to rebuild the Rust codebase. #### DataFusion Modules -DataFusion is broken down into a few modules. We consume those modules in our [Cargo.toml](dask_planner/Cargo.toml). The modules that we use currently are +DataFusion is broken down into a few modules. We consume those modules in our [Cargo.toml](Cargo.toml). The modules that we use currently are - `datafusion-common` - Datastructures and core logic - `datafusion-expr` - Expression based logic and operators @@ -57,9 +57,7 @@ DataFusion is broken down into a few modules. We consume those modules in our [C - `datafusion-optimizer` - Optimization logic and datastructures for modifying current plans into more efficient ones. #### Retrieving Upstream Dependencies -During development you might find yourself needing some upstream DataFusion changes not present in the projects current version. Luckily this can easily be achieved by updating [Cargo.toml](dask_planner/Cargo.toml) and changing the `rev` to the SHA of the version you need. Note that the same SHA should be used for all DataFusion modules. - -After updating the `Cargo.toml` file the codebase can be re-built to reflect those changes by running `python setup.py install` +During development you might find yourself needing some upstream DataFusion changes not present in the projects current version. Luckily this can easily be achieved by updating [Cargo.toml](Cargo.toml) and changing the `rev` to the SHA of the version you need. Note that the same SHA should be used for all DataFusion modules. #### Local Documentation Sometimes when building against the latest Github commits for DataFusion you may find that the features you are consuming do not have their documentation public yet. In this case it can be helpful to build the DataFusion documentation locally so that it can be referenced to assist with development. Here is a rough outline for building that documentation locally. @@ -72,40 +70,40 @@ Sometimes when building against the latest Github commits for DataFusion you may ### Datastructures While working in the Rust codebase there are a few datastructures that you should make yourself familiar with. This section does not aim to verbosely list out all of the datastructure with in the project but rather just the key datastructures that you are likely to encounter while working on almost any feature/issue. The aim is to give you a better overview of the codebase without having to manually dig through the all the source code. -- [`PyLogicalPlan`](dask_planner/src/sql/logical.rs) -> [DataFusion LogicalPlan](https://docs.rs/datafusion/latest/datafusion/logical_plan/enum.LogicalPlan.html) +- [`PyLogicalPlan`](src/sql/logical.rs) -> [DataFusion LogicalPlan](https://docs.rs/datafusion/latest/datafusion/logical_plan/enum.LogicalPlan.html) - Often encountered in Python code with variable name `rel` - Python serializable umbrella representation of the entire LogicalPlan that was generated by DataFusion - Provides access to `DaskTable` instances and type information for each table - Access to individual nodes in the logical plan tree. Ex: `TableScan` -- [`DaskSQLContext`](dask_planner/src/sql.rs) +- [`DaskSQLContext`](src/sql.rs) - Analogous to Python `Context` - Contains metadata about the tables, schemas, functions, operators, and configurations that are persent within the current execution context - When adding custom functions/UDFs this is the location that you would register them - Entry point for parsing SQL strings to sql node trees. This is the location Python will begin its interactions with Rust -- [`PyExpr`](dask_planner/src/expression.rs) -> [DataFusion Expr](https://docs.rs/datafusion/latest/datafusion/prelude/enum.Expr.html) +- [`PyExpr`](src/expression.rs) -> [DataFusion Expr](https://docs.rs/datafusion/latest/datafusion/prelude/enum.Expr.html) - Arguably where most of your time will be spent - Represents a single node in sql tree. Ex: `avg(age)` from `SELECT avg(age) FROM people` - Is associate with a single `RexType` - Can contain literal values or represent function calls, `avg()` for example - The expressions "index" in the tree can be retrieved by calling `PyExpr.index()` on an instance. This is useful when mapping frontend column names in Dask code to backend Dataframe columns - Certain `PyExpr`s contain operands. Ex: `2 + 2` would contain 3 operands. 1) A literal `PyExpr` instance with value 2 2) Another literal `PyExpr` instance with a value of 2. 3) A `+` `PyExpr` representing the addition of the 2 literals. -- [`DaskSqlOptimizer`](dask_planner/src/sql/optimizer.rs) +- [`DaskSqlOptimizer`](src/sql/optimizer.rs) - Registering location for all Dask-SQL specific logical plan optimizations - Optimizations that are written either custom or use from another source, DataFusion, are registered here in the order they are wished to be executed - Represents functions that modify/convert an original `PyLogicalPlan` into another `PyLogicalPlan` that would be more efficient when running in the underlying Dask framework -- [`RelDataType`](dask_planner/src/sql/types/rel_data_type.rs) +- [`RelDataType`](src/sql/types/rel_data_type.rs) - Not a fan of this name, was chosen to match existing Calcite logic - Represents a "row" in a table - Contains a list of "columns" that are present in that row - - [RelDataTypeField](dask_planner/src/sql/types/rel_data_type_field.rs) -- [RelDataTypeField](dask_planner/src/sql/types/rel_data_type_field.rs) + - [RelDataTypeField](src/sql/types/rel_data_type_field.rs) +- [RelDataTypeField](src/sql/types/rel_data_type_field.rs) - Represents an individual column in a table - Contains: - `qualifier` - schema the field belongs to - `name` - name of the column/field - `data_type` - `DaskTypeMap` instance containing information about the SQL type and underlying Arrow DataType - `index` - location of the field in the LogicalPlan -- [DaskTypeMap](dask_planner/src/sql/types.rs) +- [DaskTypeMap](src/sql/types.rs) - Maps a conventional SQL type to an underlying Arrow DataType diff --git a/dask_planner/Cargo.lock b/Cargo.lock similarity index 68% rename from dask_planner/Cargo.lock rename to Cargo.lock index ee67cfb89..c3f7d8600 100644 --- a/dask_planner/Cargo.lock +++ b/Cargo.lock @@ -29,9 +29,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" dependencies = [ "memchr", ] @@ -51,6 +51,18 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -85,8 +97,8 @@ dependencies = [ "serde", "serde_json", "snap", - "strum", - "strum_macros", + "strum 0.24.1", + "strum_macros 0.24.3", "thiserror", "typed-builder", "uuid", @@ -107,15 +119,15 @@ checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" [[package]] name = "arrayvec" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +checksum = "8868f09ff8cea88b079da74ae569d9b8c62a23c68c746240b704ee6f7525c89c" [[package]] name = "arrow" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990dfa1a9328504aa135820da1c95066537b69ad94c04881b785f64328e0fa6b" +checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a" dependencies = [ "ahash", "arrow-arith", @@ -136,9 +148,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b2e52de0ab54173f9b08232b7184c26af82ee7ab4ac77c83396633c90199fa" +checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e" dependencies = [ "arrow-array", "arrow-buffer", @@ -151,9 +163,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10849b60c17dbabb334be1f4ef7550701aa58082b71335ce1ed586601b2f423" +checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0" dependencies = [ "ahash", "arrow-buffer", @@ -162,15 +174,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "num", ] [[package]] name = "arrow-buffer" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0746ae991b186be39933147117f8339eb1c4bbbea1c8ad37e7bf5851a1a06ba" +checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722" dependencies = [ "half", "num", @@ -178,9 +190,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b88897802515d7b193e38b27ddd9d9e43923d410a9e46307582d756959ee9595" +checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1" dependencies = [ "arrow-array", "arrow-buffer", @@ -189,15 +201,16 @@ dependencies = [ "arrow-select", "chrono", "comfy-table", + "half", "lexical-core", "num", ] [[package]] name = "arrow-csv" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c8220d9741fc37961262710ceebd8451a5b393de57c464f0267ffdda1775c0a" +checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,9 +227,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f937efa1aaad9dc86f6a0e382c2fa736a4943e2090c946138079bdf060cef" +checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b" dependencies = [ "arrow-buffer", "arrow-schema", @@ -226,9 +239,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b75296ff01833f602552dff26a423fc213db8e5049b540ca4a00b1c957e41c" +checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18" dependencies = [ "arrow-array", "arrow-buffer", @@ -240,9 +253,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e501d3de4d612c90677594896ca6c0fa075665a7ff980dc4189bb531c17e19f6" +checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d" dependencies = [ "arrow-array", "arrow-buffer", @@ -251,17 +264,18 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap", + "indexmap 2.0.0", "lexical-core", "num", + "serde", "serde_json", ] [[package]] name = "arrow-ord" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d2671eb3793f9410230ac3efb0e6d36307be8a2dac5fad58ac9abde8e9f01e" +checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26" dependencies = [ "arrow-array", "arrow-buffer", @@ -274,9 +288,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc11fa039338cebbf4e29cf709c8ac1d6a65c7540063d4a25f991ab255ca85c8" +checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3" dependencies = [ "ahash", "arrow-array", @@ -284,23 +298,23 @@ dependencies = [ "arrow-data", "arrow-schema", "half", - "hashbrown 0.13.2", + "hashbrown 0.14.0", ] [[package]] name = "arrow-schema" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d04f17f7b86ded0b5baf98fe6123391c4343e031acc3ccc5fa604cc180bff220" +checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e" dependencies = [ - "bitflags 2.2.1", + "bitflags 2.3.2", ] [[package]] name = "arrow-select" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "163e35de698098ff5f5f672ada9dc1f82533f10407c7a11e2cd09f3bcf31d18a" +checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221" dependencies = [ "arrow-array", "arrow-buffer", @@ -311,24 +325,25 @@ dependencies = [ [[package]] name = "arrow-string" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdfbed1b10209f0dc68e6aa4c43dc76079af65880965c7c3b73f641f23d4aba" +checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "num", "regex", - "regex-syntax 0.6.29", + "regex-syntax", ] [[package]] name = "async-compression" -version = "0.3.15" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a" +checksum = "5b0122885821398cc923ece939e24d1056a2384ee719432397fa9db87230ff11" dependencies = [ "bzip2", "flate2", @@ -338,8 +353,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd 0.11.2+zstd.1.5.2", - "zstd-safe 5.0.2+zstd.1.5.2", + "zstd", + "zstd-safe", ] [[package]] @@ -372,9 +387,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "base64" -version = "0.21.0" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" [[package]] name = "bitflags" @@ -384,9 +399,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.2.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a6904aef64d73cf10ab17ebace7befb918b82164785cb89907993be7f83813" +checksum = "6dbe3c979c178231552ecba20214a8272df4e09f232a87aef4320cf06539aded" [[package]] name = "blake2" @@ -399,9 +414,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.3.3" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef" +checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888" dependencies = [ "arrayref", "arrayvec", @@ -441,32 +456,11 @@ dependencies = [ "alloc-stdlib", ] -[[package]] -name = "bstr" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" -dependencies = [ - "memchr", - "once_cell", - "regex-automata", - "serde", -] - -[[package]] -name = "btoi" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd6407f73a9b8b6162d8a2ef999fe6afd7cc15902ebf42c5cd296addf17e0ad" -dependencies = [ - "num-traits", -] - [[package]] name = "bumpalo" -version = "3.12.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "byteorder" @@ -518,17 +512,14 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.24" +version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" +checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" dependencies = [ + "android-tzdata", "iana-time-zone", - "js-sys", - "num-integer", "num-traits", "serde", - "time 0.1.45", - "wasm-bindgen", "winapi", ] @@ -554,30 +545,14 @@ dependencies = [ "phf_codegen", ] -[[package]] -name = "clru" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" - -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - [[package]] name = "comfy-table" -version = "6.1.4" +version = "7.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d" +checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" dependencies = [ - "strum", - "strum_macros", + "strum 0.24.1", + "strum_macros 0.24.3", "unicode-width", ] @@ -605,9 +580,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" +checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6" [[package]] name = "core-foundation-sys" @@ -651,9 +626,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" dependencies = [ "csv-core", "itoa", @@ -670,50 +645,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "cxx" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" -dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2", - "quote", - "scratch", - "syn 2.0.23", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.23", -] - [[package]] name = "dashmap" version = "5.4.0" @@ -728,27 +659,29 @@ dependencies = [ ] [[package]] -name = "dask_planner" -version = "0.1.0" +name = "dask-sql" +version = "2023.6.0" dependencies = [ "async-trait", "datafusion-python", "env_logger", "log", "pyo3", - "pyo3-build-config 0.19.1", + "pyo3-build-config", "pyo3-log", ] [[package]] name = "datafusion" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bdb93fee4f30368f1f71bfd5cd28882ec9fab0183db7924827b76129d33227c" +checksum = "5ddbcb2dda5b5033537457992ebde78938014390b2b19f9f4282e3be0e18b0c3" dependencies = [ "ahash", "apache-avro", "arrow", + "arrow-array", + "arrow-schema", "async-compression", "async-trait", "bytes", @@ -760,14 +693,14 @@ dependencies = [ "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", - "datafusion-row", "datafusion-sql", "flate2", "futures", "glob", - "hashbrown 0.13.2", - "indexmap", - "itertools", + "half", + "hashbrown 0.14.0", + "indexmap 2.0.0", + "itertools 0.11.0", "lazy_static", "log", "num-traits", @@ -782,19 +715,18 @@ dependencies = [ "sqlparser", "tempfile", "tokio", - "tokio-stream", "tokio-util", "url", "uuid", "xz2", - "zstd 0.12.3+zstd.1.5.2", + "zstd", ] [[package]] name = "datafusion-common" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e82401ce129e601d406012b6d718f8978ba84c386e1c342fa155877120d68824" +checksum = "85fbb7b4da925031311743ab96662d55f0f7342d3692744f184f99b2257ef435" dependencies = [ "apache-avro", "arrow", @@ -809,14 +741,14 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b08b2078aed21a27239cd93f3015e492a58b0d50ebeeaf8d2236cf108ef583ce" +checksum = "5bb3617466d894eb0ad11d06bab1e6e89c571c0a27d660685d327d0c6e1e1ccd" dependencies = [ "dashmap", "datafusion-common", "datafusion-expr", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "log", "object_store", "parking_lot", @@ -827,21 +759,24 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b5b977ce9695fb4c67614266ec57f384fc11e9a9f9b3e6d0e62b9c5a9f2c1f" +checksum = "3bd8220a0dfcdfddcc785cd7e71770ef1ce54fbe1e08984e5adf537027ecb6de" dependencies = [ "ahash", "arrow", "datafusion-common", + "lazy_static", "sqlparser", + "strum 0.25.0", + "strum_macros 0.25.1", ] [[package]] name = "datafusion-optimizer" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0b2bb9e73ed778d1bc5af63a270f0154bf6eab5099c77668a6362296888e46b" +checksum = "1d685a100c66952aaadd0cbe766df46d1887d58fc8bcf3589e6387787f18492b" dependencies = [ "arrow", "async-trait", @@ -849,34 +784,37 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.13.2", - "itertools", + "hashbrown 0.14.0", + "itertools 0.11.0", "log", - "regex-syntax 0.6.29", + "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80cd8ea5ab0a07b1b2a3e17d5909f1b1035bd129ffeeb5c66842a32e682f8f79" +checksum = "0f2c635da9b05b4b4c6c8d935f46fd99f9b6225f834091cf4e3c8a045b68beab" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", "arrow-schema", + "base64", "blake2", "blake3", "chrono", "datafusion-common", "datafusion-expr", - "datafusion-row", "half", - "hashbrown 0.13.2", - "indexmap", - "itertools", + "hashbrown 0.14.0", + "hex", + "indexmap 2.0.0", + "itertools 0.11.0", "lazy_static", + "libc", + "log", "md-5", "paste", "petgraph", @@ -889,8 +827,9 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "22.0.0" -source = "git+https://github.com/apache/arrow-datafusion-python.git?rev=9493638#94936380e58a266f5dd5de6b70a06d3aa36fbe22" +version = "28.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a2441774e84875ae16a8b5277090ed6ab77ce94ab1820c315ed02cd3813de29" dependencies = [ "async-trait", "datafusion", @@ -903,33 +842,23 @@ dependencies = [ "mimalloc", "object_store", "parking_lot", + "prost", + "prost-types", "pyo3", - "pyo3-build-config 0.18.3", + "pyo3-build-config", "rand", - "regex-syntax 0.6.29", + "regex-syntax", "syn 2.0.23", "tokio", "url", "uuid", ] -[[package]] -name = "datafusion-row" -version = "22.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a95d6badab19fd6e9195fdc5209ac0a7e5ce9bcdedc67767b9ffc1b4e645760" -dependencies = [ - "arrow", - "datafusion-common", - "paste", - "rand", -] - [[package]] name = "datafusion-sql" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a78f8fc67123c4357e63bc0c87622a2a663d26f074958d749a633d0ecde90f" +checksum = "b3ef8abf4dd84d3f20c910822b52779c035ab7f4f2d5e7125ede3bae618e9de8" dependencies = [ "arrow", "arrow-schema", @@ -941,63 +870,38 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "22.0.0" +version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae6ed64a2005f0d78f2b1b3ec3f8148183f4523d5d364e5367115f8d8a82b7df" +checksum = "2c97d351bbd6bd6497e7c9606ddd3c00cd63e9d185d7ab96fc8a66cf3c449177" dependencies = [ "async-recursion", "chrono", "datafusion", - "itertools", + "itertools 0.11.0", "object_store", "prost", + "prost-types", "substrait", "tokio", ] [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", "subtle", ] -[[package]] -name = "dirs" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - [[package]] name = "doc-comment" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "dunce" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" - [[package]] name = "dyn-clone" version = "1.0.11" @@ -1032,6 +936,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.1" @@ -1040,7 +950,7 @@ checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -1062,18 +972,6 @@ dependencies = [ "instant", ] -[[package]] -name = "filetime" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall 0.2.16", - "windows-sys 0.48.0", -] - [[package]] name = "fixedbitset" version = "0.4.2" @@ -1082,9 +980,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "23.1.21" +version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1108,9 +1006,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -1216,555 +1114,28 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", -] - -[[package]] -name = "gix" -version = "0.43.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c256ea71cc1967faaefdaad15f334146b7c806f12460dcafd3afed845c8c78dd" -dependencies = [ - "gix-actor", - "gix-attributes", - "gix-config", - "gix-credentials", - "gix-date", - "gix-diff", - "gix-discover", - "gix-features 0.28.1", - "gix-glob", - "gix-hash 0.10.4", - "gix-hashtable", - "gix-index", - "gix-lock", - "gix-mailmap", - "gix-object", - "gix-odb", - "gix-pack", - "gix-path", - "gix-prompt", - "gix-ref", - "gix-refspec", - "gix-revision", - "gix-sec", - "gix-tempfile", - "gix-traverse", - "gix-url", - "gix-validate", - "gix-worktree", - "log", - "once_cell", - "signal-hook", - "smallvec", - "thiserror", - "unicode-normalization", -] - -[[package]] -name = "gix-actor" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc22b0cdc52237667c301dd7cdc6ead8f8f73c9f824e9942c8ebd6b764f6c0bf" -dependencies = [ - "bstr", - "btoi", - "gix-date", - "itoa", - "nom", - "thiserror", + "wasi", ] [[package]] -name = "gix-attributes" -version = "0.10.0" +name = "git2" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2231a25934a240d0a4b6f4478401c73ee81d8be52de0293eedbc172334abf3e1" -dependencies = [ - "bstr", - "gix-features 0.28.1", - "gix-glob", - "gix-path", - "gix-quote", - "thiserror", - "unicode-bom", -] - -[[package]] -name = "gix-bitmap" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55a95f4942360766c3880bdb2b4b57f1ef73b190fc424755e7fdf480430af618" -dependencies = [ - "thiserror", -] - -[[package]] -name = "gix-chunk" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d39583cab06464b8bf73b3f1707458270f0e7383cb24c3c9c1a16e6f792978" -dependencies = [ - "thiserror", -] - -[[package]] -name = "gix-command" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2c6f75c1e0f924de39e750880a6e21307194bb1ab773efe3c7d2d787277f8ab" -dependencies = [ - "bstr", -] - -[[package]] -name = "gix-config" -version = "0.20.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fbad5ce54a8fc997acc50febd89ec80fa6e97cb7f8d0654cb229936407489d8" -dependencies = [ - "bstr", - "gix-config-value", - "gix-features 0.28.1", - "gix-glob", - "gix-path", - "gix-ref", - "gix-sec", - "log", - "memchr", - "nom", - "once_cell", - "smallvec", - "thiserror", - "unicode-bom", -] - -[[package]] -name = "gix-config-value" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09154c0c8677e4da0ec35e896f56ee3e338e741b9599fae06075edd83a4081c" +checksum = "7b989d6a7ca95a362cf2cfc5ad688b3a467be1f87e480b8dad07fee8c79b0044" dependencies = [ "bitflags 1.3.2", - "bstr", - "gix-path", "libc", - "thiserror", -] - -[[package]] -name = "gix-credentials" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "750b684197374518ea057e0a0594713e07683faa0a3f43c0f93d97f64130ad8d" -dependencies = [ - "bstr", - "gix-command", - "gix-config-value", - "gix-path", - "gix-prompt", - "gix-sec", - "gix-url", - "thiserror", -] - -[[package]] -name = "gix-date" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b96271912ce39822501616f177dea7218784e6c63be90d5f36322ff3a722aae2" -dependencies = [ - "bstr", - "itoa", - "thiserror", - "time 0.3.20", -] - -[[package]] -name = "gix-diff" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "103a0fa79b0d438f5ecb662502f052e530ace4fe1fe8e1c83c0c6da76d728e67" -dependencies = [ - "gix-hash 0.10.4", - "gix-object", - "imara-diff", - "thiserror", -] - -[[package]] -name = "gix-discover" -version = "0.16.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eba8ba458cb8f4a6c33409b0fe650b1258655175a7ffd1d24fafd3ed31d880b" -dependencies = [ - "bstr", - "dunce", - "gix-hash 0.10.4", - "gix-path", - "gix-ref", - "gix-sec", - "thiserror", -] - -[[package]] -name = "gix-features" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b76f9a80f6dd7be66442ae86e1f534effad9546676a392acc95e269d0c21c22" -dependencies = [ - "crc32fast", - "flate2", - "gix-hash 0.10.4", - "libc", - "once_cell", - "prodash", - "sha1_smol", - "thiserror", - "walkdir", -] - -[[package]] -name = "gix-features" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf69b0f5c701cc3ae22d3204b671907668f6437ca88862d355eaf9bc47a4f897" -dependencies = [ - "gix-hash 0.11.1", - "libc", -] - -[[package]] -name = "gix-fs" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b37a1832f691fdc09910bd267f9a2e413737c1f9ec68c6e31f9e802616278a9" -dependencies = [ - "gix-features 0.29.0", -] - -[[package]] -name = "gix-glob" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93e43efd776bc543f46f0fd0ca3d920c37af71a764a16f2aebd89765e9ff2993" -dependencies = [ - "bitflags 1.3.2", - "bstr", -] - -[[package]] -name = "gix-hash" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a258595457bc192d1f1c59d0d168a1e34e2be9b97a614e14995416185de41a7" -dependencies = [ - "hex", - "thiserror", -] - -[[package]] -name = "gix-hash" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "078eec3ac2808cc03f0bddd2704cb661da5c5dc33b41a9d7947b141d499c7c42" -dependencies = [ - "hex", - "thiserror", -] - -[[package]] -name = "gix-hashtable" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e55e40dfd694884f0eb78796c5bddcf2f8b295dace47039099dd7e76534973" -dependencies = [ - "gix-hash 0.10.4", - "hashbrown 0.13.2", - "parking_lot", -] - -[[package]] -name = "gix-index" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "717ab601ece7921f59fe86849dbe27d44a46ebb883b5885732c4f30df4996177" -dependencies = [ - "bitflags 1.3.2", - "bstr", - "btoi", - "filetime", - "gix-bitmap", - "gix-features 0.28.1", - "gix-hash 0.10.4", - "gix-lock", - "gix-object", - "gix-traverse", - "itoa", - "memmap2", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-lock" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c693d7f05730fa74a7c467150adc7cea393518410c65f0672f80226b8111555" -dependencies = [ - "gix-tempfile", - "gix-utils", - "thiserror", -] - -[[package]] -name = "gix-mailmap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b66aea5e52875cd4915f4957a6f4b75831a36981e2ec3f5fad9e370e444fe1a" -dependencies = [ - "bstr", - "gix-actor", - "thiserror", -] - -[[package]] -name = "gix-object" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df068db9180ee935fbb70504848369e270bdcb576b05c0faa8b9fd3b86fc017" -dependencies = [ - "bstr", - "btoi", - "gix-actor", - "gix-features 0.28.1", - "gix-hash 0.10.4", - "gix-validate", - "hex", - "itoa", - "nom", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-odb" -version = "0.43.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83af2e3e36005bfe010927f0dff41fb5acc3e3d89c6f1174135b3a34086bda2" -dependencies = [ - "arc-swap", - "gix-features 0.28.1", - "gix-hash 0.10.4", - "gix-object", - "gix-pack", - "gix-path", - "gix-quote", - "parking_lot", - "tempfile", - "thiserror", -] - -[[package]] -name = "gix-pack" -version = "0.33.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9401911c7fe032ad7b31c6a6b5be59cb283d1d6c999417a8215056efe6d635f3" -dependencies = [ - "clru", - "gix-chunk", - "gix-diff", - "gix-features 0.28.1", - "gix-hash 0.10.4", - "gix-hashtable", - "gix-object", - "gix-path", - "gix-tempfile", - "gix-traverse", - "memmap2", - "parking_lot", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-path" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32370dce200bb951df013e03dff35b4233fc7a89458642b047629b91734a7e19" -dependencies = [ - "bstr", - "thiserror", -] - -[[package]] -name = "gix-prompt" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3034d4d935aef2c7bf719aaa54b88c520e82413118d886ae880a31d5bdee57" -dependencies = [ - "gix-command", - "gix-config-value", - "nix", - "parking_lot", - "thiserror", -] - -[[package]] -name = "gix-quote" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a282f5a8d9ee0b09ec47390ac727350c48f2f5c76d803cd8da6b3e7ad56e0bcb" -dependencies = [ - "bstr", - "btoi", - "thiserror", -] - -[[package]] -name = "gix-ref" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e909396ed3b176823991ccc391c276ae2a015e54edaafa3566d35123cfac9d" -dependencies = [ - "gix-actor", - "gix-features 0.28.1", - "gix-hash 0.10.4", - "gix-lock", - "gix-object", - "gix-path", - "gix-tempfile", - "gix-validate", - "memmap2", - "nom", - "thiserror", -] - -[[package]] -name = "gix-refspec" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aba332462bda2e8efeae4302b39a6ed01ad56ef772fd5b7ef197cf2798294d65" -dependencies = [ - "bstr", - "gix-hash 0.10.4", - "gix-revision", - "gix-validate", - "smallvec", - "thiserror", -] - -[[package]] -name = "gix-revision" -version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6f6ff53f888858afc24bf12628446a14279ceec148df6194481f306f553ad2" -dependencies = [ - "bstr", - "gix-date", - "gix-hash 0.10.4", - "gix-hashtable", - "gix-object", - "thiserror", -] - -[[package]] -name = "gix-sec" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8ffa5bf0772f9b01de501c035b6b084cf9b8bb07dec41e3afc6a17336a65f47" -dependencies = [ - "bitflags 1.3.2", - "dirs", - "gix-path", - "libc", - "windows 0.43.0", -] - -[[package]] -name = "gix-tempfile" -version = "5.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71a0d32f34e71e86586124225caefd78dabc605d0486de580d717653addf182" -dependencies = [ - "gix-fs", - "libc", - "once_cell", - "parking_lot", - "signal-hook", - "signal-hook-registry", - "tempfile", -] - -[[package]] -name = "gix-traverse" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9a4a07bb22168dc79c60e1a6a41919d198187ca83d8a5940ad8d7122a45df3" -dependencies = [ - "gix-hash 0.10.4", - "gix-hashtable", - "gix-object", - "thiserror", -] - -[[package]] -name = "gix-url" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6a22b4b32ad14d68f7b7fb6458fa58d44b01797d94c1b8f4db2d9c7b3c366b5" -dependencies = [ - "bstr", - "gix-features 0.28.1", - "gix-path", - "home", - "thiserror", + "libgit2-sys", + "log", "url", ] -[[package]] -name = "gix-utils" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c10b69beac219acb8df673187a1f07dde2d74092f974fb3f9eb385aeb667c909" -dependencies = [ - "fastrand", -] - -[[package]] -name = "gix-validate" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd629d3680773e1785e585d76fd4295b740b559cad9141517300d99a0c8c049" -dependencies = [ - "bstr", - "thiserror", -] - -[[package]] -name = "gix-worktree" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54ec9a000b4f24af706c3cc680c7cda235656cbe3216336522f5692773b8a301" -dependencies = [ - "bstr", - "gix-attributes", - "gix-features 0.28.1", - "gix-glob", - "gix-hash 0.10.4", - "gix-index", - "gix-object", - "gix-path", - "io-close", - "thiserror", -] - [[package]] name = "glob" version = "0.3.1" @@ -1773,9 +1144,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21" +checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" dependencies = [ "bytes", "fnv", @@ -1783,7 +1154,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 1.9.3", "slab", "tokio", "tokio-util", @@ -1815,6 +1186,16 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "heck" version = "0.4.1" @@ -1842,15 +1223,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "home" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" -dependencies = [ - "windows-sys 0.48.0", -] - [[package]] name = "http" version = "0.2.9" @@ -1917,9 +1289,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.23.2" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" +checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7" dependencies = [ "http", "hyper", @@ -1930,56 +1302,55 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows 0.48.0", + "windows", ] [[package]] name = "iana-time-zone-haiku" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "cxx", - "cxx-build", + "cc", ] [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", ] [[package]] -name = "imara-diff" -version = "0.1.5" +name = "indexmap" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e98c1d0ad70fc91b8b9654b1f33db55e59579d3b3de2bffdced0fdb810570cb8" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ - "ahash", + "autocfg", "hashbrown 0.12.3", ] [[package]] name = "indexmap" -version = "1.9.3" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" dependencies = [ - "autocfg", - "hashbrown 0.12.3", + "equivalent", + "hashbrown 0.14.0", ] [[package]] @@ -2003,25 +1374,15 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -[[package]] -name = "io-close" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cadcf447f06744f8ce713d2d6239bb5bde2c357a452397a9ed90c625da390bc" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "io-lifetimes" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi 0.3.1", "libc", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -2039,7 +1400,7 @@ dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", "rustix", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -2051,6 +1412,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -2068,9 +1438,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -2147,15 +1517,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.142" +version = "0.2.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" [[package]] name = "libflate" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97822bf791bd4d5b403713886a5fbe8bf49520fe78e323b0dc480ca1a03e50b0" +checksum = "5ff4ae71b685bbad2f2f391fe74f6b7659a34871c08b210fdc039e43bee07d18" dependencies = [ "adler32", "crc32fast", @@ -2171,11 +1541,23 @@ dependencies = [ "rle-decode-fast", ] +[[package]] +name = "libgit2-sys" +version = "0.15.2+1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a80df2e11fb4a61f4ba2ab42dbe7f74468da143f1a75c74e11dee7c813f694fa" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] + [[package]] name = "libm" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libmimalloc-sys" @@ -2188,25 +1570,28 @@ dependencies = [ ] [[package]] -name = "link-cplusplus" -version = "1.0.8" +name = "libz-sys" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" dependencies = [ "cc", + "libc", + "pkg-config", + "vcpkg", ] [[package]] name = "linux-raw-sys" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "lock_api" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" dependencies = [ "autocfg", "scopeguard", @@ -2264,20 +1649,11 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] - [[package]] name = "memoffset" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" dependencies = [ "autocfg", ] @@ -2297,12 +1673,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.7.1" @@ -2314,14 +1684,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" dependencies = [ "libc", - "log", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.45.0", + "wasi", + "windows-sys", ] [[package]] @@ -2330,28 +1699,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "nix" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" -dependencies = [ - "bitflags 1.3.2", - "cfg-if", - "libc", - "static_assertions", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num" version = "0.4.0" @@ -2439,27 +1786,20 @@ dependencies = [ "libc", ] -[[package]] -name = "num_threads" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" -dependencies = [ - "libc", -] - [[package]] name = "object_store" -version = "0.5.6" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec9cd6ca25e796a49fa242876d1c4de36a24a6da5258e9f0bc062dbf5e81c53b" +checksum = "27c776db4f332b571958444982ff641d2531417a326ca368995073b639205d58" dependencies = [ "async-trait", "base64", "bytes", "chrono", "futures", - "itertools", + "humantime", + "hyper", + "itertools 0.10.5", "parking_lot", "percent-encoding", "quick-xml", @@ -2478,9 +1818,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "ordered-float" @@ -2503,22 +1843,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.7" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.2.16", + "redox_syscall", "smallvec", - "windows-sys 0.45.0", + "windows-targets", ] [[package]] name = "parquet" -version = "36.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321a15f8332645759f29875b07f8233d16ed8ec1b3582223de81625a9f8506b7" +checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750" dependencies = [ "ahash", "arrow-array", @@ -2534,17 +1874,18 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "lz4", "num", "num-bigint", + "object_store", "paste", "seq-macro", "snap", "thrift", "tokio", "twox-hash", - "zstd 0.12.3+zstd.1.5.2", + "zstd", ] [[package]] @@ -2564,9 +1905,9 @@ checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "petgraph" @@ -2575,7 +1916,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -2642,9 +1983,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1" dependencies = [ "proc-macro2", "syn 2.0.23", @@ -2665,12 +2006,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prodash" -version = "23.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9516b775656bc3e8985e19cd4b8c0c0de045095074e453d2c0a513b5f978392d" - [[package]] name = "prost" version = "0.11.9" @@ -2689,7 +2024,7 @@ checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck", - "itertools", + "itertools 0.10.5", "lazy_static", "log", "multimap", @@ -2708,7 +2043,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "proc-macro2", "quote", "syn 1.0.109", @@ -2725,31 +2060,21 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.18.3" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109" +checksum = "ffb88ae05f306b4bfcde40ac4a51dc0b05936a9207a4b75b798c7729c4258a59" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", "parking_lot", - "pyo3-build-config 0.18.3", + "pyo3-build-config", "pyo3-ffi", "pyo3-macros", "unindent", ] -[[package]] -name = "pyo3-build-config" -version = "0.18.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" -dependencies = [ - "once_cell", - "target-lexicon", -] - [[package]] name = "pyo3-build-config" version = "0.19.1" @@ -2762,12 +2087,12 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.18.3" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c" +checksum = "922ede8759e8600ad4da3195ae41259654b9c55da4f7eec84a0ccc7d067a70a4" dependencies = [ "libc", - "pyo3-build-config 0.18.3", + "pyo3-build-config", ] [[package]] @@ -2783,9 +2108,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.18.3" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d" +checksum = "8a5caec6a1dd355964a841fcbeeb1b89fe4146c87295573f94228911af3cc5a2" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2795,9 +2120,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.18.3" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918" +checksum = "e0b78ccbb160db1556cdb6fd96c50334c5d4ec44dc5e0a968d0a1208fa0efa8b" dependencies = [ "proc-macro2", "quote", @@ -2859,15 +2184,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.3.5" @@ -2877,51 +2193,28 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_users" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" -dependencies = [ - "getrandom", - "redox_syscall 0.2.16", - "thiserror", -] - [[package]] name = "regex" -version = "1.8.1" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.1", + "regex-syntax", ] -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "regress" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d995d590bd8ec096d1893f414bf3f5e8b0ee4c9eed9a5642b9766ef2c8e2e8e9" +checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb" dependencies = [ "hashbrown 0.13.2", "memchr", @@ -2929,9 +2222,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.17" +version = "0.11.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13293b639a097af28fc8a90f22add145a9c954e49d77da06263d58cf44d5fb91" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" dependencies = [ "base64", "bytes", @@ -3000,28 +2293,28 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.19" +version = "0.37.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" +checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" dependencies = [ "bitflags 1.3.2", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] name = "rustls" -version = "0.20.8" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" +checksum = "c911ba11bc8433e811ce56fde130ccf32f5127cab0e0194e9c68c5a5b671791e" dependencies = [ "log", "ring", + "rustls-webpki", "sct", - "webpki", ] [[package]] @@ -3033,6 +2326,16 @@ dependencies = [ "base64", ] +[[package]] +name = "rustls-webpki" +version = "0.100.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.12" @@ -3084,12 +2387,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "scratch" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" - [[package]] name = "sct" version = "0.7.0" @@ -3114,18 +2411,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.160" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.160" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", @@ -3156,13 +2453,14 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.1.7" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "797ba1d80299b264f3aac68ab5d12e5825a561749db4df7cd7c8083900c5d4e9" +checksum = "8a00ffd23fd882d096f09fcaae2a9de8329a328628e86027e049ee051dc1621f" dependencies = [ "proc-macro2", + "quote", "serde", - "syn 1.0.109", + "syn 2.0.23", ] [[package]] @@ -3183,19 +2481,13 @@ version = "0.9.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9d684e3ec7de3bf5466b32bd75303ac16f0736426e5a4e0d6e489559ce1249c" dependencies = [ - "indexmap", + "indexmap 1.9.3", "itoa", "ryu", "serde", "unsafe-libyaml", ] -[[package]] -name = "sha1_smol" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" - [[package]] name = "sha2" version = "0.10.6" @@ -3207,25 +2499,6 @@ dependencies = [ "digest", ] -[[package]] -name = "signal-hook" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" -dependencies = [ - "libc", - "signal-hook-registry", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" -dependencies = [ - "libc", -] - [[package]] name = "siphasher" version = "0.3.10" @@ -3293,9 +2566,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.32.0" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0366f270dbabb5cc2e4c88427dc4c08bba144f81e32fbd459a013f26a4d16aa0" +checksum = "ca597d77c98894be1f965f2e4e2d2a61575d4998088e655476c73715c54b2b43" dependencies = [ "log", "sqlparser_derive", @@ -3324,6 +2597,15 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.1", +] + [[package]] name = "strum_macros" version = "0.24.3" @@ -3337,13 +2619,26 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "strum_macros" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.23", +] + [[package]] name = "substrait" -version = "0.7.5" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ae64fb7ad0670c7d6d53d57b1b91beb2212afc30e164cc8edb02d6b2cff32a" +checksum = "2ac1ce8315086b127ca0abf162c62279550942bb26ebf7946fe17fe114446472" dependencies = [ - "gix", + "git2", "heck", "prettyplease", "prost", @@ -3361,9 +2656,9 @@ dependencies = [ [[package]] name = "subtle" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" [[package]] name = "syn" @@ -3395,15 +2690,16 @@ checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" [[package]] name = "tempfile" -version = "3.5.0" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" dependencies = [ + "autocfg", "cfg-if", "fastrand", - "redox_syscall 0.3.5", + "redox_syscall", "rustix", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] @@ -3446,46 +2742,6 @@ dependencies = [ "ordered-float", ] -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" -dependencies = [ - "itoa", - "libc", - "num_threads", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" - -[[package]] -name = "time-macros" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" -dependencies = [ - "time-core", -] - [[package]] name = "tiny-keccak" version = "2.0.2" @@ -3512,9 +2768,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.28.0" +version = "1.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c786bf8134e5a3a166db9b29ab8f48134739014a3eca7bc6bfa95d673b136f" +checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" dependencies = [ "autocfg", "bytes", @@ -3525,7 +2781,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -3541,24 +2797,12 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ "rustls", "tokio", - "webpki", -] - -[[package]] -name = "tokio-stream" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", ] [[package]] @@ -3606,9 +2850,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", ] @@ -3648,9 +2892,9 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" -version = "0.0.11" +version = "0.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bfde96849e25d7feef1bbf652e9cfc51deb63203fdc07b115b8bc3bcfe20b9" +checksum = "be9bb640c0eece20cac2028ebbc2ca1a3d17e3b1ddd98540309c309ed178d158" dependencies = [ "typify-impl", "typify-macro", @@ -3658,9 +2902,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.11" +version = "0.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95d27d749378ceab6ec22188ed7ad102205c89ddb92ab662371c850ffc71aa1a" +checksum = "5c8d9ecedde2fd77e975c38eeb9ca40b34ad0247b2259c6e6bbd2a8d6cc2444f" dependencies = [ "heck", "log", @@ -3669,16 +2913,16 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 1.0.109", + "syn 2.0.23", "thiserror", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.0.11" +version = "0.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35db6fc2bd9220ecdac6eeb88158824b83610de3dda0c6d0f2142b49efd858b0" +checksum = "c08942cd65d458d2da15777a649cb6400cb545f17964f1ca965583f22e9cc3a9" dependencies = [ "proc-macro2", "quote", @@ -3686,7 +2930,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 1.0.109", + "syn 2.0.23", "typify-impl", ] @@ -3696,17 +2940,11 @@ version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" -[[package]] -name = "unicode-bom" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63ec69f541d875b783ca40184d655f2927c95f0bffd486faa83cd3ac3529ec32" - [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-normalization" @@ -3749,9 +2987,9 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" dependencies = [ "form_urlencoded", "idna", @@ -3760,14 +2998,20 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dad5567ad0cf5b760e5665964bec1b47dfd077ba8a2544b513f3556d3d239a2" +checksum = "0fa2982af2eec27de306107c027578ff7f423d65f7250e40ce0fea8f45248b81" dependencies = [ "getrandom", "serde", ] +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.4" @@ -3794,12 +3038,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -3808,9 +3046,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3818,24 +3056,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.23", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.34" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ "cfg-if", "js-sys", @@ -3845,9 +3083,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3855,22 +3093,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.23", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-streams" @@ -3887,9 +3125,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" dependencies = [ "js-sys", "wasm-bindgen", @@ -3956,37 +3194,13 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.43.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" dependencies = [ - "windows-targets 0.48.0", -] - -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", + "windows-targets", ] [[package]] @@ -3995,22 +3209,7 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets 0.48.0", -] - -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows-targets", ] [[package]] @@ -4019,93 +3218,51 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" dependencies = [ - "windows_aarch64_gnullvm 0.48.0", - "windows_aarch64_msvc 0.48.0", - "windows_i686_gnu 0.48.0", - "windows_i686_msvc 0.48.0", - "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm 0.48.0", - "windows_x86_64_msvc 0.48.0", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.0" @@ -4151,32 +3308,13 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "zstd" -version = "0.11.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" -dependencies = [ - "zstd-safe 5.0.2+zstd.1.5.2", -] - [[package]] name = "zstd" version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe 6.0.5+zstd.1.5.4", -] - -[[package]] -name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] diff --git a/dask_planner/Cargo.toml b/Cargo.toml similarity index 53% rename from dask_planner/Cargo.toml rename to Cargo.toml index a3dce7aff..826a5df7f 100644 --- a/dask_planner/Cargo.toml +++ b/Cargo.toml @@ -1,23 +1,29 @@ [package] -name = "dask_planner" +name = "dask-sql" repository = "https://github.com/dask-contrib/dask-sql" -version = "0.1.0" +version = "2023.6.0" description = "Bindings for DataFusion used by Dask-SQL" readme = "README.md" license = "Apache-2.0" edition = "2021" rust-version = "1.65" +include = ["/src", "/dask_sql", "/LICENSE.txt", "pyproject.toml", "Cargo.toml", "Cargo.lock"] [dependencies] async-trait = "0.1.71" -datafusion-python = { git = "https://github.com/apache/arrow-datafusion-python.git", rev = "9493638" } +datafusion-python = "28.0.0" env_logger = "0.10" log = "^0.4" -pyo3 = { version = "0.18.3", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3 = { version = "0.19.1", features = ["extension-module", "abi3", "abi3-py38"] } pyo3-log = "0.8.3" [build-dependencies] pyo3-build-config = "0.19.1" [lib] -crate-type = ["cdylib"] +name = "dask_sql" +crate-type = ["cdylib", "rlib"] + +[profile.release] +lto = true +codegen-units = 1 diff --git a/README.md b/README.md index e978fadf8..d08aa0328 100644 --- a/README.md +++ b/README.md @@ -110,10 +110,7 @@ After that, you can install the package in development mode pip install -e ".[dev]" The Rust DataFusion bindings are built as part of the `pip install`. -If changes are made to the Rust source in `dask_planner/`, another build/install must be run to recompile the bindings: - - python setup.py build install - +Note that if changes are made to the Rust source in `src/`, another build must be run to recompile the bindings. This repository uses [pre-commit](https://pre-commit.com/) hooks. To install them, call pre-commit install diff --git a/continuous_integration/environment-3.10-dev.yaml b/continuous_integration/environment-3.10-dev.yaml index 08af189eb..4589cc1b5 100644 --- a/continuous_integration/environment-3.10-dev.yaml +++ b/continuous_integration/environment-3.10-dev.yaml @@ -11,15 +11,17 @@ dependencies: - intake>=0.6.0 - jsonschema - lightgbm -- maturin>=0.12.8 -- mlflow +- maturin>=1.1,<1.2 +# FIXME: mlflow 2.6.0 has import issues related to pydantic +# https://github.com/mlflow/mlflow/issues/9331 +- mlflow<2.6 - mock - numpy>=1.21.6 - pandas>=1.4.0 - pre-commit - prompt_toolkit>=3.0.8 - psycopg2 -- pyarrow>=6.0.1 +- pyarrow>=6.0.2 - pygments>=2.7.1 - pyhive - pytest-cov @@ -28,10 +30,10 @@ dependencies: - pytest - python=3.10 - scikit-learn>=1.0.0 -- setuptools-rust>=1.5.2 - sphinx - sqlalchemy<2 - tpot>=0.12.0 - tzlocal>=2.1 - uvicorn>=0.13.4 - libprotobuf=3 +- zlib diff --git a/continuous_integration/environment-3.8-dev.yaml b/continuous_integration/environment-3.8-dev.yaml index 15abe79b5..5a9255390 100644 --- a/continuous_integration/environment-3.8-dev.yaml +++ b/continuous_integration/environment-3.8-dev.yaml @@ -11,15 +11,17 @@ dependencies: - intake=0.6.0 - jsonschema - lightgbm -- maturin=0.12.8 -- mlflow +- maturin=1.1 +# FIXME: mlflow 2.6.0 has import issues related to pydantic +# https://github.com/mlflow/mlflow/issues/9331 +- mlflow<2.6 - mock - numpy=1.21.6 - pandas=1.4.0 - pre-commit - prompt_toolkit=3.0.8 - psycopg2 -- pyarrow=6.0.1 +- pyarrow=6.0.2 - pygments=2.7.1 - pyhive - pytest-cov @@ -28,10 +30,10 @@ dependencies: - pytest - python=3.8 - scikit-learn=1.0.0 -- setuptools-rust=1.5.2 - sphinx - sqlalchemy<2 - tpot>=0.12.0 - tzlocal=2.1 - uvicorn=0.13.4 - libprotobuf=3 +- zlib diff --git a/continuous_integration/environment-3.9-dev.yaml b/continuous_integration/environment-3.9-dev.yaml index 47e64e8fc..f807d2e87 100644 --- a/continuous_integration/environment-3.9-dev.yaml +++ b/continuous_integration/environment-3.9-dev.yaml @@ -11,15 +11,17 @@ dependencies: - intake>=0.6.0 - jsonschema - lightgbm -- maturin>=0.12.8 -- mlflow +- maturin>=1.1,<1.2 +# FIXME: mlflow 2.6.0 has import issues related to pydantic +# https://github.com/mlflow/mlflow/issues/9331 +- mlflow<2.6 - mock - numpy>=1.21.6 - pandas>=1.4.0 - pre-commit - prompt_toolkit>=3.0.8 - psycopg2 -- pyarrow>=6.0.1 +- pyarrow>=6.0.2 - pygments>=2.7.1 - pyhive - pytest-cov @@ -28,10 +30,10 @@ dependencies: - pytest - python=3.9 - scikit-learn>=1.0.0 -- setuptools-rust>=1.5.2 - sphinx - sqlalchemy<2 - tpot>=0.12.0 - tzlocal>=2.1 - uvicorn>=0.13.4 - libprotobuf=3 +- zlib diff --git a/continuous_integration/gpuci/environment-3.10.yaml b/continuous_integration/gpuci/environment-3.10.yaml index d312844d3..6d8372da4 100644 --- a/continuous_integration/gpuci/environment-3.10.yaml +++ b/continuous_integration/gpuci/environment-3.10.yaml @@ -14,15 +14,17 @@ dependencies: - intake>=0.6.0 - jsonschema - lightgbm -- maturin>=0.12.8 -- mlflow +- maturin>=1.1,<1.2 +# FIXME: mlflow 2.6.0 has import issues related to pydantic +# https://github.com/mlflow/mlflow/issues/9331 +- mlflow<2.6 - mock - numpy>=1.21.6 - pandas>=1.4.0 - pre-commit - prompt_toolkit>=3.0.8 - psycopg2 -- pyarrow>=6.0.1 +- pyarrow>=6.0.2 - pygments>=2.7.1 - pyhive - pytest-cov @@ -31,7 +33,6 @@ dependencies: - pytest - python=3.10 - scikit-learn>=1.0.0 -- setuptools-rust>=1.5.2 - sphinx - sqlalchemy<2 - tpot>=0.12.0 @@ -45,4 +46,6 @@ dependencies: - dask-cuda=23.08 - ucx-proc=*=gpu - ucx-py=0.33 -- xgboost=*rapidsai23.08 +- xgboost=*=rapidsai_py* +- libxgboost=*=rapidsai_h* +- zlib diff --git a/continuous_integration/gpuci/environment-3.9.yaml b/continuous_integration/gpuci/environment-3.9.yaml index 565869411..52c82a1e2 100644 --- a/continuous_integration/gpuci/environment-3.9.yaml +++ b/continuous_integration/gpuci/environment-3.9.yaml @@ -14,15 +14,17 @@ dependencies: - intake>=0.6.0 - jsonschema - lightgbm -- maturin>=0.12.8 -- mlflow +- maturin>=1.1,<1.2 +# FIXME: mlflow 2.6.0 has import issues related to pydantic +# https://github.com/mlflow/mlflow/issues/9331 +- mlflow<2.6 - mock - numpy>=1.21.6 - pandas>=1.4.0 - pre-commit - prompt_toolkit>=3.0.8 - psycopg2 -- pyarrow>=6.0.1 +- pyarrow>=6.0.2 - pygments>=2.7.1 - pyhive - pytest-cov @@ -31,7 +33,6 @@ dependencies: - pytest - python=3.9 - scikit-learn>=1.0.0 -- setuptools-rust>=1.5.2 - sphinx - sqlalchemy<2 - tpot>=0.12.0 @@ -45,4 +46,6 @@ dependencies: - dask-cuda=23.08 - ucx-proc=*=gpu - ucx-py=0.33 -- xgboost=*rapidsai23.08 +- xgboost=*=rapidsai_py* +- libxgboost=*=rapidsai_h* +- zlib diff --git a/continuous_integration/recipe/conda_build_config.yaml b/continuous_integration/recipe/conda_build_config.yaml index b1c3c40cc..df3cde4e1 100644 --- a/continuous_integration/recipe/conda_build_config.yaml +++ b/continuous_integration/recipe/conda_build_config.yaml @@ -4,5 +4,5 @@ rust_compiler_version: - 1.69 libprotobuf: - 3 -setuptools_rust: - - 1.5.2 +maturin: + - 1.1 diff --git a/continuous_integration/recipe/meta.yaml b/continuous_integration/recipe/meta.yaml index 4314efed4..48cc8b748 100644 --- a/continuous_integration/recipe/meta.yaml +++ b/continuous_integration/recipe/meta.yaml @@ -24,14 +24,17 @@ requirements: build: - python # [build_platform != target_platform] - cross-python_{{ target_platform }} # [build_platform != target_platform] + - maturin # [build_platform != target_platform] - libprotobuf # [build_platform != target_platform] + - zlib # [build_platform != target_platform] - {{ compiler('c') }} - {{ compiler('rust') }} host: - pip - python - - setuptools-rust + - maturin - libprotobuf + - zlib - xz # [linux64] run: - python diff --git a/dask_planner/update-dependencies.sh b/continuous_integration/scripts/update-dependencies.sh similarity index 100% rename from dask_planner/update-dependencies.sh rename to continuous_integration/scripts/update-dependencies.sh diff --git a/dask_planner/.classpath b/dask_planner/.classpath deleted file mode 100644 index b14b13a76..000000000 --- a/dask_planner/.classpath +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dask_planner/.gitignore b/dask_planner/.gitignore deleted file mode 100644 index c8f044299..000000000 --- a/dask_planner/.gitignore +++ /dev/null @@ -1,72 +0,0 @@ -/target - -# Byte-compiled / optimized / DLL files -__pycache__/ -.pytest_cache/ -*.py[cod] - -# C extensions -*.so - -# Distribution / packaging -.Python -.venv/ -env/ -bin/ -build/ -develop-eggs/ -dist/ -eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -include/ -man/ -venv/ -*.egg-info/ -.installed.cfg -*.egg - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt -pip-selfcheck.json - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.cache -nosetests.xml -coverage.xml - -# Translations -*.mo - -# Mr Developer -.mr.developer.cfg -.project -.pydevproject - -# Rope -.ropeproject - -# Django stuff: -*.log -*.pot - -.DS_Store - -# Sphinx documentation -docs/_build/ - -# PyCharm -.idea/ - -# VSCode -.vscode/ - -# Pyenv -.python-version diff --git a/dask_planner/.settings/org.eclipse.core.resources.prefs b/dask_planner/.settings/org.eclipse.core.resources.prefs deleted file mode 100644 index 92920805e..000000000 --- a/dask_planner/.settings/org.eclipse.core.resources.prefs +++ /dev/null @@ -1,5 +0,0 @@ -eclipse.preferences.version=1 -encoding//src/main/java=UTF-8 -encoding//src/main/resources=UTF-8 -encoding//target/generated-sources/annotations=UTF-8 -encoding/=UTF-8 diff --git a/dask_planner/.settings/org.eclipse.jdt.apt.core.prefs b/dask_planner/.settings/org.eclipse.jdt.apt.core.prefs deleted file mode 100644 index d4313d4b2..000000000 --- a/dask_planner/.settings/org.eclipse.jdt.apt.core.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.apt.aptEnabled=false diff --git a/dask_planner/.settings/org.eclipse.jdt.core.prefs b/dask_planner/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 1b6e1ef22..000000000 --- a/dask_planner/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,9 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 -org.eclipse.jdt.core.compiler.compliance=1.8 -org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore -org.eclipse.jdt.core.compiler.processAnnotations=disabled -org.eclipse.jdt.core.compiler.release=disabled -org.eclipse.jdt.core.compiler.source=1.8 diff --git a/dask_planner/.settings/org.eclipse.m2e.core.prefs b/dask_planner/.settings/org.eclipse.m2e.core.prefs deleted file mode 100644 index f897a7f1c..000000000 --- a/dask_planner/.settings/org.eclipse.m2e.core.prefs +++ /dev/null @@ -1,4 +0,0 @@ -activeProfiles= -eclipse.preferences.version=1 -resolveWorkspaceProjects=true -version=1 diff --git a/dask_planner/MANIFEST.in b/dask_planner/MANIFEST.in deleted file mode 100644 index 7c68298bd..000000000 --- a/dask_planner/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include Cargo.toml -recursive-include src * diff --git a/dask_planner/README.md b/dask_planner/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/dask_planner/pyproject.toml b/dask_planner/pyproject.toml deleted file mode 100644 index f153e3f5a..000000000 --- a/dask_planner/pyproject.toml +++ /dev/null @@ -1,11 +0,0 @@ -[build-system] -requires = ["setuptools", "wheel", "setuptools-rust"] - -[project] -name = "datafusion_planner" -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] diff --git a/dask_sql/__init__.py b/dask_sql/__init__.py index d923876b8..fd8339b5a 100644 --- a/dask_sql/__init__.py +++ b/dask_sql/__init__.py @@ -1,6 +1,6 @@ # FIXME: can we modify TLS model of Rust object to avoid aarch64 glibc bug? # https://github.com/dask-contrib/dask-sql/issues/1169 -import dask_planner.rust +from . import _datafusion_lib # isort:skip from . import _version, config from .cmd import cmd_loop diff --git a/dask_sql/context.py b/dask_sql/context.py index 17c6d0055..ab0c2ae71 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -10,7 +10,7 @@ from dask.base import optimize from dask.utils_test import hlg_layer -from dask_planner.rust import ( +from dask_sql._datafusion_lib import ( DaskSchema, DaskSQLContext, DaskTable, @@ -42,7 +42,7 @@ from dask_sql.mappings import python_to_sql_type from dask_sql.physical.rel import RelConverter, custom, logical from dask_sql.physical.rex import RexConverter, core -from dask_sql.utils import OptimizationException, ParsingException +from dask_sql.utils import ParsingException logger = logging.getLogger(__name__) @@ -831,8 +831,9 @@ def _get_ral(self, sql): try: rel = self.context.optimize_relational_algebra(nonOptimizedRel) except DFOptimizationException as oe: + # Use original plan and warn about inability to optimize plan rel = nonOptimizedRel - raise OptimizationException(str(oe)) from None + logger.warn(str(oe)) else: rel = nonOptimizedRel diff --git a/dask_sql/input_utils/hive.py b/dask_sql/input_utils/hive.py index 4d0eb9cce..14bc547f0 100644 --- a/dask_sql/input_utils/hive.py +++ b/dask_sql/input_utils/hive.py @@ -6,7 +6,7 @@ import dask.dataframe as dd -from dask_planner.rust import SqlTypeName +from dask_sql._datafusion_lib import SqlTypeName try: from pyhive import hive diff --git a/dask_sql/mappings.py b/dask_sql/mappings.py index 9ba22f797..ca0e23691 100644 --- a/dask_sql/mappings.py +++ b/dask_sql/mappings.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from dask_planner.rust import DaskTypeMap, SqlTypeName +from dask_sql._datafusion_lib import DaskTypeMap, SqlTypeName logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/base.py b/dask_sql/physical/rel/base.py index a1f378197..5f70cde4e 100644 --- a/dask_sql/physical/rel/base.py +++ b/dask_sql/physical/rel/base.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan, RelDataType + from dask_sql._datafusion_lib import LogicalPlan, RelDataType logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/convert.py b/dask_sql/physical/rel/convert.py index 29ad8c327..6d2beceff 100644 --- a/dask_sql/physical/rel/convert.py +++ b/dask_sql/physical/rel/convert.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/alter.py b/dask_sql/physical/rel/custom/alter.py index 9c8a159b0..b29eb7737 100644 --- a/dask_sql/physical/rel/custom/alter.py +++ b/dask_sql/physical/rel/custom/alter.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class AlterSchemaPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/custom/analyze_table.py b/dask_sql/physical/rel/custom/analyze_table.py index 69f734a54..49308cf3a 100644 --- a/dask_sql/physical/rel/custom/analyze_table.py +++ b/dask_sql/physical/rel/custom/analyze_table.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class AnalyzeTablePlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/custom/create_catalog_schema.py b/dask_sql/physical/rel/custom/create_catalog_schema.py index 52ed37b55..e55d31a90 100644 --- a/dask_sql/physical/rel/custom/create_catalog_schema.py +++ b/dask_sql/physical/rel/custom/create_catalog_schema.py @@ -5,7 +5,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/create_memory_table.py b/dask_sql/physical/rel/custom/create_memory_table.py index 760857563..3c829fb42 100644 --- a/dask_sql/physical/rel/custom/create_memory_table.py +++ b/dask_sql/physical/rel/custom/create_memory_table.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/create_table.py b/dask_sql/physical/rel/custom/create_table.py index 36b165230..cbe61abf7 100644 --- a/dask_sql/physical/rel/custom/create_table.py +++ b/dask_sql/physical/rel/custom/create_table.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/describe_model.py b/dask_sql/physical/rel/custom/describe_model.py index d915a6b0b..422ac7c3b 100644 --- a/dask_sql/physical/rel/custom/describe_model.py +++ b/dask_sql/physical/rel/custom/describe_model.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class DescribeModelPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/custom/distributeby.py b/dask_sql/physical/rel/custom/distributeby.py index c7ce70610..71ac114f2 100644 --- a/dask_sql/physical/rel/custom/distributeby.py +++ b/dask_sql/physical/rel/custom/distributeby.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/drop_schema.py b/dask_sql/physical/rel/custom/drop_schema.py index 444662e2b..5491fcaa4 100644 --- a/dask_sql/physical/rel/custom/drop_schema.py +++ b/dask_sql/physical/rel/custom/drop_schema.py @@ -5,7 +5,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/export_model.py b/dask_sql/physical/rel/custom/export_model.py index 07cf9979e..08446c43c 100644 --- a/dask_sql/physical/rel/custom/export_model.py +++ b/dask_sql/physical/rel/custom/export_model.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/predict_model.py b/dask_sql/physical/rel/custom/predict_model.py index 917d712c3..0bb5c79b4 100644 --- a/dask_sql/physical/rel/custom/predict_model.py +++ b/dask_sql/physical/rel/custom/predict_model.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/custom/show_columns.py b/dask_sql/physical/rel/custom/show_columns.py index 6b0b94fe9..2da4f4535 100644 --- a/dask_sql/physical/rel/custom/show_columns.py +++ b/dask_sql/physical/rel/custom/show_columns.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class ShowColumnsPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/custom/show_models.py b/dask_sql/physical/rel/custom/show_models.py index 3f879dd38..28e495810 100644 --- a/dask_sql/physical/rel/custom/show_models.py +++ b/dask_sql/physical/rel/custom/show_models.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class ShowModelsPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/custom/show_schemas.py b/dask_sql/physical/rel/custom/show_schemas.py index 98b9f8ab3..fb69c5359 100644 --- a/dask_sql/physical/rel/custom/show_schemas.py +++ b/dask_sql/physical/rel/custom/show_schemas.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class ShowSchemasPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/custom/show_tables.py b/dask_sql/physical/rel/custom/show_tables.py index d79b4052b..05fb8a66c 100644 --- a/dask_sql/physical/rel/custom/show_tables.py +++ b/dask_sql/physical/rel/custom/show_tables.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class ShowTablesPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/custom/use_schema.py b/dask_sql/physical/rel/custom/use_schema.py index 889dd2b1c..f5fc65b7d 100644 --- a/dask_sql/physical/rel/custom/use_schema.py +++ b/dask_sql/physical/rel/custom/use_schema.py @@ -5,7 +5,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class UseSchemaPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/logical/aggregate.py b/dask_sql/physical/rel/logical/aggregate.py index 84c832177..dd2f9f41d 100644 --- a/dask_sql/physical/rel/logical/aggregate.py +++ b/dask_sql/physical/rel/logical/aggregate.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) @@ -127,6 +127,7 @@ class DaskAggregatePlugin(BaseRelPlugin): "avg": AggregationSpecification("mean", AggregationOnPandas("mean")), "stddev": AggregationSpecification("std", AggregationOnPandas("std")), "stddevsamp": AggregationSpecification("std", AggregationOnPandas("std")), + "stddev_samp": AggregationSpecification("std", AggregationOnPandas("std")), "stddevpop": AggregationSpecification( dd.Aggregation( "stddevpop", @@ -142,6 +143,21 @@ class DaskAggregatePlugin(BaseRelPlugin): ** (1 / 2), ) ), + "stddev_pop": AggregationSpecification( + dd.Aggregation( + "stddev_pop", + lambda s: (s.count(), s.sum(), s.agg(lambda x: (x**2).sum())), + lambda count, sum, sum_of_squares: ( + count.sum(), + sum.sum(), + sum_of_squares.sum(), + ), + lambda count, sum, sum_of_squares: ( + (sum_of_squares / count) - (sum / count) ** 2 + ) + ** (1 / 2), + ) + ), "bit_and": AggregationSpecification( ReduceAggregation("bit_and", operator.and_) ), @@ -198,6 +214,20 @@ class DaskAggregatePlugin(BaseRelPlugin): ), ) ), + "variance_pop": AggregationSpecification( + dd.Aggregation( + "variance_pop", + lambda s: (s.count(), s.sum(), s.agg(lambda x: (x**2).sum())), + lambda count, sum, sum_of_squares: ( + count.sum(), + sum.sum(), + sum_of_squares.sum(), + ), + lambda count, sum, sum_of_squares: ( + (sum_of_squares / count) - (sum / count) ** 2 + ), + ) + ), } def convert(self, rel: "LogicalPlan", context: "dask_sql.Context") -> DataContainer: diff --git a/dask_sql/physical/rel/logical/cross_join.py b/dask_sql/physical/rel/logical/cross_join.py index 5f32d3257..d1c74c8cc 100644 --- a/dask_sql/physical/rel/logical/cross_join.py +++ b/dask_sql/physical/rel/logical/cross_join.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/empty.py b/dask_sql/physical/rel/logical/empty.py index 23f8d1cd3..453f63de5 100644 --- a/dask_sql/physical/rel/logical/empty.py +++ b/dask_sql/physical/rel/logical/empty.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/explain.py b/dask_sql/physical/rel/logical/explain.py index 69d20fca3..0e4875d0c 100644 --- a/dask_sql/physical/rel/logical/explain.py +++ b/dask_sql/physical/rel/logical/explain.py @@ -4,7 +4,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class ExplainPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/logical/filter.py b/dask_sql/physical/rel/logical/filter.py index d3c3f5fd3..af3685a11 100644 --- a/dask_sql/physical/rel/logical/filter.py +++ b/dask_sql/physical/rel/logical/filter.py @@ -12,7 +12,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/join.py b/dask_sql/physical/rel/logical/join.py index c1c904af6..1657d2bf4 100644 --- a/dask_sql/physical/rel/logical/join.py +++ b/dask_sql/physical/rel/logical/join.py @@ -18,7 +18,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/limit.py b/dask_sql/physical/rel/logical/limit.py index 3e2fc6434..9bd2be562 100644 --- a/dask_sql/physical/rel/logical/limit.py +++ b/dask_sql/physical/rel/logical/limit.py @@ -12,7 +12,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class DaskLimitPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/logical/project.py b/dask_sql/physical/rel/logical/project.py index b990e21b4..0a7637f59 100644 --- a/dask_sql/physical/rel/logical/project.py +++ b/dask_sql/physical/rel/logical/project.py @@ -1,7 +1,7 @@ import logging from typing import TYPE_CHECKING -from dask_planner.rust import RexType +from dask_sql._datafusion_lib import RexType from dask_sql.datacontainer import DataContainer from dask_sql.physical.rel.base import BaseRelPlugin from dask_sql.physical.rex import RexConverter @@ -9,7 +9,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/sort.py b/dask_sql/physical/rel/logical/sort.py index 2e1376d41..9dfccdc49 100644 --- a/dask_sql/physical/rel/logical/sort.py +++ b/dask_sql/physical/rel/logical/sort.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class DaskSortPlugin(BaseRelPlugin): diff --git a/dask_sql/physical/rel/logical/subquery_alias.py b/dask_sql/physical/rel/logical/subquery_alias.py index 2473167d7..14be8928f 100644 --- a/dask_sql/physical/rel/logical/subquery_alias.py +++ b/dask_sql/physical/rel/logical/subquery_alias.py @@ -5,7 +5,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan class SubqueryAlias(BaseRelPlugin): diff --git a/dask_sql/physical/rel/logical/table_scan.py b/dask_sql/physical/rel/logical/table_scan.py index b4025ec97..53e1d29be 100644 --- a/dask_sql/physical/rel/logical/table_scan.py +++ b/dask_sql/physical/rel/logical/table_scan.py @@ -12,7 +12,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rel/logical/union.py b/dask_sql/physical/rel/logical/union.py index 830f7f981..f31ced797 100644 --- a/dask_sql/physical/rel/logical/union.py +++ b/dask_sql/physical/rel/logical/union.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan def _extract_df(obj_cc, obj_df, output_field_names): diff --git a/dask_sql/physical/rel/logical/window.py b/dask_sql/physical/rel/logical/window.py index 331876c49..aba788bc3 100644 --- a/dask_sql/physical/rel/logical/window.py +++ b/dask_sql/physical/rel/logical/window.py @@ -17,7 +17,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import LogicalPlan + from dask_sql._datafusion_lib import LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rex/base.py b/dask_sql/physical/rex/base.py index 5724a4536..d74ad6309 100644 --- a/dask_sql/physical/rex/base.py +++ b/dask_sql/physical/rex/base.py @@ -7,7 +7,7 @@ from dask_sql.datacontainer import DataContainer if TYPE_CHECKING: - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rex/convert.py b/dask_sql/physical/rex/convert.py index 71431cbb4..1713e496d 100644 --- a/dask_sql/physical/rex/convert.py +++ b/dask_sql/physical/rex/convert.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rex/core/alias.py b/dask_sql/physical/rex/core/alias.py index 40c373766..7486bc9c5 100644 --- a/dask_sql/physical/rex/core/alias.py +++ b/dask_sql/physical/rex/core/alias.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan class RexAliasPlugin(BaseRexPlugin): diff --git a/dask_sql/physical/rex/core/call.py b/dask_sql/physical/rex/core/call.py index 85d083d78..8db8ca048 100644 --- a/dask_sql/physical/rex/core/call.py +++ b/dask_sql/physical/rex/core/call.py @@ -1,11 +1,13 @@ import logging import operator import re +import warnings from datetime import datetime from functools import partial, reduce from typing import TYPE_CHECKING, Any, Callable, Union import dask.array as da +import dask.config as dask_config import dask.dataframe as dd import numpy as np import pandas as pd @@ -14,14 +16,15 @@ from dask.highlevelgraph import HighLevelGraph from dask.utils import random_state_data -from dask_planner.rust import SqlTypeName from dask_sql._compat import DASK_CUDF_TODATETIME_SUPPORT, PANDAS_GT_200 +from dask_sql._datafusion_lib import SqlTypeName from dask_sql.datacontainer import DataContainer from dask_sql.mappings import ( cast_column_to_type, sql_to_python_type, sql_to_python_value, ) +from dask_sql.physical.rel import RelConverter from dask_sql.physical.rex import RexConverter from dask_sql.physical.rex.base import BaseRexPlugin from dask_sql.physical.rex.core.literal import SargPythonImplementation @@ -35,7 +38,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan logger = logging.getLogger(__name__) SeriesOrScalar = Union[dd.Series, Any] @@ -45,7 +48,11 @@ def as_timelike(op): if isinstance(op, np.int64): return np.timedelta64(op, "D") elif isinstance(op, str): - return np.datetime64(op) + try: + return np.datetime64(op) + except ValueError: + op = datetime.strptime(op, "%Y-%m-%d") + return np.datetime64(op.strftime("%Y-%m-%d")) elif pd.api.types.is_datetime64_dtype(op) or isinstance(op, np.timedelta64): return op else: @@ -61,6 +68,12 @@ class Operation: # True, if the operation should also get the REX needs_rex = False + # True, if the operation should also needs the Context, possible subquery Relation expansion + needs_context = False + + # True, if the operation needs the original relation algebra + needs_rel = False + @staticmethod def op_needs_dc(op): return hasattr(op, "needs_dc") and op.needs_dc @@ -69,6 +82,14 @@ def op_needs_dc(op): def op_needs_rex(op): return hasattr(op, "needs_rex") and op.needs_rex + @staticmethod + def op_needs_context(op): + return hasattr(op, "needs_context") and op.needs_context + + @staticmethod + def op_needs_rel(op): + return hasattr(op, "needs_rel") and op.needs_rel + def __init__(self, f: Callable): """Init with the given function""" self.f = f @@ -82,6 +103,8 @@ def of(self, op: "Operation") -> "Operation": new_op = Operation(lambda *x, **kwargs: self(op(*x, **kwargs))) new_op.needs_dc = Operation.op_needs_dc(op) new_op.needs_rex = Operation.op_needs_rex(op) + new_op.needs_context = Operation.op_needs_context(op) + new_op.needs_rel = Operation.op_needs_rel(op) return new_op @@ -987,6 +1010,39 @@ def inList(self, series: dd.Series, *operands, rex=None): return ~result if rex.isNegated() else result +class InSubqueryOperation(Operation): + """ + Returns a boolean of whether an expression is/isn't in a Subquery Expression result + """ + + needs_rex = True + needs_context = True + needs_rel = True + + def __init__(self): + super().__init__(self.inSubquery) + + def inSubquery( + self, series: dd.Series, *operands, rel=None, rex=None, context=None + ): + sub_rel = rex.getSubqueryLogicalPlan() + dc = RelConverter.convert(sub_rel, context=context) + + # Extract the specified column/Series from the Dataframe + fq_column_name = rex.column_name(rel).split(".") + + # FIXME: dask's isin doesn't support dask frames as arguments + # so we need to compute here + col = dc.df[fq_column_name[-1]].compute() + + warnings.warn( + "Dask doesn't support Dask frames as input for .isin, so we must force an early computation", + ResourceWarning, + ) + + return series.isin(col) + + class RexCallPlugin(BaseRexPlugin): """ RexCall is used for expressions, which calculate something. @@ -1036,6 +1092,7 @@ class RexCallPlugin(BaseRexPlugin): "negative": NegativeOperation(), "not": NotOperation(), "in list": InListOperation(), + "in subquery": InSubqueryOperation(), "is null": IsNullOperation(), "is not null": NotOperation().of(IsNullOperation()), "is true": IsTrueOperation(), @@ -1077,6 +1134,9 @@ class RexCallPlugin(BaseRexPlugin): "characterlength": TensorScalarOperation( lambda x: x.str.len(), lambda x: len(x) ), + "character_length": TensorScalarOperation( + lambda x: x.str.len(), lambda x: len(x) + ), "upper": TensorScalarOperation(lambda x: x.str.upper(), lambda x: x.upper()), "lower": TensorScalarOperation(lambda x: x.str.lower(), lambda x: x.lower()), "position": PositionOperation(), @@ -1104,6 +1164,7 @@ class RexCallPlugin(BaseRexPlugin): "dsql_totimestamp": ToTimestampOperation(), # Temporary UDF functions that need to be moved after this POC "datepart": ExtractOperation(), + "date_part": ExtractOperation(), "year": YearOperation(), "timestampadd": TimeStampAddOperation(), "timestampceil": CeilFloorOperation("ceil"), @@ -1125,6 +1186,21 @@ def convert( for o in expr.getOperands() ] + # FIXME: cuDF doesn't support binops between decimal columns and numpy ints / floats + if dask_config.get("sql.mappings.decimal_support") == "cudf" and any( + str(getattr(o, "dtype", None)) == "decimal128" for o in operands + ): + from decimal import Decimal + + operands = [ + Decimal(str(o)) + if isinstance(o, float) + else o.item() + if np.isscalar(o) and pd.api.types.is_integer_dtype(o) + else o + for o in operands + ] + # Now use the operator name in the mapping schema_name = context.schema_name operator_name = expr.getOperatorName().lower() @@ -1135,7 +1211,9 @@ def convert( try: operation = context.schema[schema_name].functions[operator_name] except KeyError: # pragma: no cover - raise NotImplementedError(f"{operator_name} not (yet) implemented") + raise NotImplementedError( + f"RexCall operator '{operator_name}' not (yet) implemented" + ) logger.debug( f"Executing {operator_name} on {[str(LoggableDataFrame(df)) for df in operands]}" @@ -1147,6 +1225,10 @@ def convert( kwargs["dc"] = dc if Operation.op_needs_rex(operation): kwargs["rex"] = expr + if Operation.op_needs_context(operation): + kwargs["context"] = context + if Operation.op_needs_rel(operation): + kwargs["rel"] = rel return operation(*operands, **kwargs) # TODO: We have information on the typing here - we should use it diff --git a/dask_sql/physical/rex/core/input_ref.py b/dask_sql/physical/rex/core/input_ref.py index 4272c832e..4d2c0f929 100644 --- a/dask_sql/physical/rex/core/input_ref.py +++ b/dask_sql/physical/rex/core/input_ref.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan class RexInputRefPlugin(BaseRexPlugin): diff --git a/dask_sql/physical/rex/core/literal.py b/dask_sql/physical/rex/core/literal.py index 73e3b8185..da0eeb128 100644 --- a/dask_sql/physical/rex/core/literal.py +++ b/dask_sql/physical/rex/core/literal.py @@ -5,14 +5,14 @@ import dask.dataframe as dd import numpy as np -from dask_planner.rust import SqlTypeName +from dask_sql._datafusion_lib import SqlTypeName from dask_sql.datacontainer import DataContainer from dask_sql.mappings import sql_to_python_value from dask_sql.physical.rex.base import BaseRexPlugin if TYPE_CHECKING: import dask_sql - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan logger = logging.getLogger(__name__) diff --git a/dask_sql/physical/rex/core/subquery.py b/dask_sql/physical/rex/core/subquery.py index 5e0a33098..60a07c0b9 100644 --- a/dask_sql/physical/rex/core/subquery.py +++ b/dask_sql/physical/rex/core/subquery.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: import dask_sql - from dask_planner.rust import Expression, LogicalPlan + from dask_sql._datafusion_lib import Expression, LogicalPlan class RexScalarSubqueryPlugin(BaseRexPlugin): diff --git a/dask_sql/physical/utils/filter.py b/dask_sql/physical/utils/filter.py index f99934c07..ae564244d 100644 --- a/dask_sql/physical/utils/filter.py +++ b/dask_sql/physical/utils/filter.py @@ -127,6 +127,22 @@ def attempt_predicate_pushdown( return ddf filters = filters.to_list_tuple() + # FIXME: pyarrow doesn't seem to like converting datetime64[D] to scalars + # so we must convert any we encounter to datetime64[ns] + filters = [ + [ + ( + col, + op, + val.astype("datetime64[ns]") + if isinstance(val, np.datetime64) and val.dtype == "datetime64[D]" + else val, + ) + for col, op, val in sublist + ] + for sublist in filters + ] + # Regenerate collection with filtered IO layer try: _regen_cache = {} @@ -388,6 +404,7 @@ def _regenerate_collection( regen_kwargs = self.creation_info.get("kwargs", {}).copy() regen_kwargs = {k: v for k, v in self.creation_info.get("kwargs", {}).items()} regen_kwargs.update((new_kwargs or {}).get(self.layer.output, {})) + result = func(*inputs, *regen_args, **regen_kwargs) _regen_cache[self.layer.output] = result return result diff --git a/dask_sql/utils.py b/dask_sql/utils.py index 39c165597..454eecb7f 100644 --- a/dask_sql/utils.py +++ b/dask_sql/utils.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from dask_planner.rust import SqlTypeName +from dask_sql._datafusion_lib import SqlTypeName from dask_sql.datacontainer import DataContainer from dask_sql.mappings import sql_to_python_value diff --git a/docker/conda.txt b/docker/conda.txt index 83b176843..5083ebd85 100644 --- a/docker/conda.txt +++ b/docker/conda.txt @@ -4,7 +4,7 @@ pandas>=1.4.0 jpype1>=1.0.2 openjdk>=8 maven>=3.6.0 -pytest>=6.0.1 +pytest>=6.0.2 pytest-cov>=2.10.1 pytest-xdist mock>=4.0.3 @@ -13,7 +13,7 @@ tzlocal>=2.1 fastapi>=0.92.0 httpx>=0.24.1 uvicorn>=0.13.4 -pyarrow>=6.0.1 +pyarrow>=6.0.2 prompt_toolkit>=3.0.8 pygments>=2.7.1 scikit-learn>=1.0.0 @@ -21,4 +21,4 @@ intake>=0.6.0 pre-commit>=2.11.1 black=22.10.0 isort=5.12.0 -setuptools-rust>=1.5.2 +maturin>=1.1,<1.2 diff --git a/docker/main.dockerfile b/docker/main.dockerfile index c3dbbf8d2..5b56bb879 100644 --- a/docker/main.dockerfile +++ b/docker/main.dockerfile @@ -14,7 +14,7 @@ ENV PATH="/root/.cargo/bin:${PATH}" COPY docker/conda.txt /opt/dask_sql/ RUN mamba install -y \ # build requirements - "setuptools-rust>=1.5.2" \ + "maturin>=1.1,<1.2" \ # core dependencies "dask>=2022.3.0" \ "pandas>=1.4.0" \ @@ -26,17 +26,21 @@ RUN mamba install -y \ "pygments>=2.7.1" \ tabulate \ # additional dependencies - "pyarrow>=6.0.1" \ + "pyarrow>=6.0.2" \ "scikit-learn>=1.0.0" \ "intake>=0.6.0" \ && conda clean -ay # install dask-sql +COPY Cargo.toml /opt/dask_sql/ +COPY Cargo.lock /opt/dask_sql/ +COPY pyproject.toml /opt/dask_sql/ COPY setup.py /opt/dask_sql/ COPY setup.cfg /opt/dask_sql/ COPY versioneer.py /opt/dask_sql/ +COPY README.md /opt/dask_sql/ COPY .git /opt/dask_sql/.git -COPY dask_planner /opt/dask_sql/dask_planner +COPY src /opt/dask_sql/src COPY dask_sql /opt/dask_sql/dask_sql RUN cd /opt/dask_sql/ \ && pip install -e . -vv diff --git a/docs/environment.yml b/docs/environment.yml index e9366eaa4..ca9b23a34 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -17,6 +17,5 @@ dependencies: - prompt_toolkit>=3.0.8 - pygments>=2.7.1 - tabulate - - setuptools-rust>=1.5.2 - ucx-proc=*=cpu - rust>=1.65.0 diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt index c039df54a..cce9cb599 100644 --- a/docs/requirements-docs.txt +++ b/docs/requirements-docs.txt @@ -11,4 +11,4 @@ tzlocal>=2.1 prompt_toolkit>=3.0.8 pygments>=2.7.1 tabulate -setuptools-rust>=1.5.2 +maturin>=1.1,<1.2 diff --git a/docs/source/how_does_it_work.rst b/docs/source/how_does_it_work.rst index 32c736431..67d2eab01 100644 --- a/docs/source/how_does_it_work.rst +++ b/docs/source/how_does_it_work.rst @@ -22,7 +22,7 @@ No matter of via the Python API (:ref:`api`), the command line client (:ref:`cmd This function will first give the SQL string to the dask_planner Rust crate via the ``PyO3`` library. Inside this crate, Apache Arrow DataFusion is used to first parse the SQL string and then turn it into a relational algebra. For this, DataFusion uses the SQL language description specified in the `sqlparser-rs library `_ -We also include `SQL extensions specific to Dask-SQL `_. They specify custom language features, such as the ``CREATE MODEL`` statement. +We also include `SQL extensions specific to Dask-SQL `_. They specify custom language features, such as the ``CREATE MODEL`` statement. 3. SQL is (maybe) optimized --------------------------- diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 71ce17959..a2a3ee895 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -84,12 +84,7 @@ After that, you can install the package in development mode pip install -e ".[dev]" -To compile the Rust code (after changes), run - -.. code-block:: bash - - python setup.py build_ext - +To compile the Rust code (after changes), the above command must be rerun. You can run the tests (after installation) with .. code-block:: bash diff --git a/pyproject.toml b/pyproject.toml index dfed2ba50..3caa92ddb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,86 @@ [build-system] -requires = ["setuptools", "wheel", "setuptools-rust"] +requires = ["maturin>=1.1,<1.2"] +build-backend = "maturin" -[tool.isort] -profile = "black" +[project] +name = "dask_sql" +description = "SQL query layer for Dask" +maintainers = [{name = "Nils Braun", email = "nilslennartbraun@gmail.com"}] +license = {text = "MIT"} +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Rust", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering", + "Topic :: System :: Distributed Computing", +] +readme = "README.md" +requires-python = ">=3.8" +dependencies = [ + "dask[dataframe]>=2022.3.0", + "distributed>=2022.3.0", + "pandas>=1.4.0", + "fastapi>=0.92.0", + "httpx>=0.24.1", + "uvicorn>=0.13.4", + "tzlocal>=2.1", + "prompt_toolkit>=3.0.8", + "pygments>=2.7.1", + "tabulate", +] +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/dask-contrib/dask-sql" +Documentation = "https://dask-sql.readthedocs.io" +Source = "https://github.com/dask-contrib/dask-sql" + +[project.optional-dependencies] +dev = [ + "pytest>=6.0.1", + "pytest-cov>=2.10.1", + "mock>=4.0.3", + "sphinx>=3.2.1", + "pyarrow>=6.0.2", + "scikit-learn>=1.0.0", + "intake>=0.6.0", + "pre-commit", + "black==22.10.0", + "isort==5.12.0", +] +fugue = ["fugue>=0.7.3"] + +[project.entry-points."fugue.plugins"] +dasksql = "dask_sql.integrations.fugue:_register_engines[fugue]" + +[project.scripts] +dask-sql = "dask_sql.cmd:main" +dask-sql-server = "dask_sql.server.app:main" + +[tool.setuptools] +include-package-data = true +zip-safe = false +license-files = ["LICENSE.txt"] + +[tool.setuptools.packages] +find = {namespaces = false} [tool.maturin] +module-name = "dask_sql._datafusion_lib" include = [ { path = "Cargo.lock", format = "sdist" } ] -exclude = [".github/**", "ci/**", ".asf.yaml"] -# Require Cargo.lock is up to date +exclude = [".github/**", "continuous_integration/**"] locked = true + +[tool.isort] +profile = "black" diff --git a/setup.py b/setup.py index 387c93125..fcbb31faf 100644 --- a/setup.py +++ b/setup.py @@ -1,87 +1,8 @@ -import os -import sys - -from setuptools import find_packages, setup -from setuptools_rust import Binding, RustExtension +from setuptools import setup import versioneer -long_description = "" -if os.path.exists("README.md"): - with open("README.md") as f: - long_description = f.read() - -needs_sphinx = "build_sphinx" in sys.argv -sphinx_requirements = ["sphinx>=3.2.1", "sphinx_rtd_theme"] if needs_sphinx else [] -debug_build = "debug" in sys.argv - -cmdclass = versioneer.get_cmdclass() - setup( - name="dask_sql", version=versioneer.get_version(), - description="SQL query layer for Dask", - url="https://github.com/dask-contrib/dask-sql/", - maintainer="Nils Braun", - maintainer_email="nilslennartbraun@gmail.com", - license="MIT", - long_description=long_description, - long_description_content_type="text/markdown", - packages=find_packages( - include=["dask_sql", "dask_sql.*", "dask_planner", "dask_planner.*"] - ), - package_data={"dask_sql": ["sql*.yaml"]}, - rust_extensions=[ - RustExtension( - "dask_planner.rust", - binding=Binding.PyO3, - path="dask_planner/Cargo.toml", - debug=debug_build, - ) - ], - python_requires=">=3.8", - setup_requires=sphinx_requirements, - install_requires=[ - "dask[dataframe]>=2022.3.0", - "distributed>=2022.3.0", - "pandas>=1.4.0", - "fastapi>=0.92.0", - "httpx>=0.24.1", - "uvicorn>=0.13.4", - "tzlocal>=2.1", - "prompt_toolkit>=3.0.8", - "pygments>=2.7.1", - "tabulate", - ], - extras_require={ - "dev": [ - "pytest>=6.0.1", - "pytest-cov>=2.10.1", - "mock>=4.0.3", - "sphinx>=3.2.1", - "pyarrow>=6.0.1", - "scikit-learn>=1.0.0", - "intake>=0.6.0", - "pre-commit", - "black==22.10.0", - "isort==5.12.0", - ], - "fugue": ["fugue>=0.7.3"], - }, - entry_points={ - "console_scripts": [ - "dask-sql-server = dask_sql.server.app:main", - "dask-sql = dask_sql.cmd:main", - ], - "fugue.plugins": [ - "dasksql = dask_sql.integrations.fugue:_register_engines[fugue]" - ], - }, - zip_safe=False, - cmdclass=cmdclass, - command_options={ - "build_sphinx": { - "source_dir": ("setup.py", "docs"), - } - }, + cmdclass=versioneer.get_cmdclass(), ) diff --git a/dask_planner/src/dialect.rs b/src/dialect.rs similarity index 97% rename from dask_planner/src/dialect.rs rename to src/dialect.rs index 24f507dec..da4e213e1 100644 --- a/dask_planner/src/dialect.rs +++ b/src/dialect.rs @@ -77,6 +77,7 @@ impl Dialect for DaskDialect { over: None, distinct: false, special: false, + order_by: vec![], }))) } Token::Word(w) if w.value.to_lowercase() == "floor" => { @@ -108,6 +109,7 @@ impl Dialect for DaskDialect { over: None, distinct: false, special: false, + order_by: vec![], }))) } Token::Word(w) if w.value.to_lowercase() == "timestampadd" => { @@ -136,6 +138,7 @@ impl Dialect for DaskDialect { over: None, distinct: false, special: false, + order_by: vec![], }))) } Token::Word(w) if w.value.to_lowercase() == "timestampdiff" => { @@ -163,6 +166,7 @@ impl Dialect for DaskDialect { over: None, distinct: false, special: false, + order_by: vec![], }))) } Token::Word(w) if w.value.to_lowercase() == "to_timestamp" => { @@ -192,6 +196,7 @@ impl Dialect for DaskDialect { over: None, distinct: false, special: false, + order_by: vec![], }))) } Token::Word(w) if w.value.to_lowercase() == "extract" => { @@ -221,6 +226,7 @@ impl Dialect for DaskDialect { over: None, distinct: false, special: false, + order_by: vec![], }))) } _ => Ok(None), diff --git a/dask_planner/src/error.rs b/src/error.rs similarity index 100% rename from dask_planner/src/error.rs rename to src/error.rs diff --git a/dask_planner/src/expression.rs b/src/expression.rs similarity index 82% rename from dask_planner/src/expression.rs rename to src/expression.rs index aa1a60a9b..fccfa9d87 100644 --- a/dask_planner/src/expression.rs +++ b/src/expression.rs @@ -4,7 +4,21 @@ use datafusion_python::{ datafusion::arrow::datatypes::DataType, datafusion_common::{Column, DFField, DFSchema, ScalarValue}, datafusion_expr::{ - expr::{AggregateFunction, BinaryExpr, Cast, Sort, TryCast, WindowFunction}, + expr::{ + AggregateFunction, + AggregateUDF, + Alias, + BinaryExpr, + Cast, + Exists, + InList, + InSubquery, + ScalarFunction, + ScalarUDF, + Sort, + TryCast, + WindowFunction, + }, lit, utils::exprlist_to_fields, Between, @@ -30,7 +44,7 @@ use crate::{ }; /// An PyExpr that can be used on a DataFrame -#[pyclass(name = "Expression", module = "datafusion", subclass)] +#[pyclass(name = "Expression", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct PyExpr { pub expr: Expr, @@ -44,7 +58,7 @@ impl From for Expr { } } -#[pyclass(name = "ScalarValue", module = "datafusion", subclass)] +#[pyclass(name = "ScalarValue", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct PyScalarValue { pub scalar_value: ScalarValue, @@ -91,9 +105,10 @@ impl PyExpr { fn _rex_type(&self, expr: &Expr) -> RexType { match expr { Expr::Alias(..) => RexType::Alias, - Expr::Column(..) | Expr::QualifiedWildcard { .. } | Expr::GetIndexedField { .. } => { - RexType::Reference - } + Expr::Column(..) + | Expr::QualifiedWildcard { .. } + | Expr::GetIndexedField { .. } + | Expr::Wildcard => RexType::Reference, Expr::ScalarVariable(..) | Expr::Literal(..) => RexType::Literal, Expr::BinaryExpr { .. } | Expr::Not(..) @@ -101,7 +116,6 @@ impl PyExpr { | Expr::Negative(..) | Expr::IsNull(..) | Expr::Like { .. } - | Expr::ILike { .. } | Expr::SimilarTo { .. } | Expr::Between { .. } | Expr::Case { .. } @@ -113,7 +127,6 @@ impl PyExpr { | Expr::WindowFunction { .. } | Expr::AggregateUDF { .. } | Expr::InList { .. } - | Expr::Wildcard | Expr::ScalarUDF { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } @@ -153,6 +166,9 @@ impl PyExpr { pub fn subquery_plan(&self) -> PyResult { match &self.expr { Expr::ScalarSubquery(subquery) => Ok(subquery.subquery.as_ref().clone().into()), + Expr::InSubquery(insubquery) => { + Ok(insubquery.subquery.subquery.as_ref().clone().into()) + } _ => Err(py_type_err(format!( "Attempted to extract a LogicalPlan instance from invalid Expr {:?}. Only Subquery and related variants are supported for this operation.", @@ -184,49 +200,61 @@ impl PyExpr { schema.merge(plan.schema().as_ref()); } let name = get_expr_name(&self.expr).map_err(py_runtime_err)?; - schema - .index_of_column(&Column::from_qualified_name(name.clone())) - .or_else(|_| { - // Handles cases when from_qualified_name doesn't format the Column correctly. - // "name" will always contain the name of the column. Anything in addition to - // that will be separated by a '.' and should be further referenced. - let parts = name.split('.').collect::>(); - let tbl_reference = match parts.len() { - // Single element means name contains just the column name so no TableReference - 1 => None, - // Tablename.column_name - 2 => Some( - TableReference::Bare { - table: Cow::Borrowed(parts[0]), - } - .to_owned_reference(), - ), - // Schema_name.table_name.column_name - 3 => Some( - TableReference::Partial { - schema: Cow::Borrowed(parts[0]), - table: Cow::Borrowed(parts[1]), + if name != "*" { + schema + .index_of_column(&Column::from_qualified_name(name.clone())) + .or_else(|_| { + // Handles cases when from_qualified_name doesn't format the Column correctly. + // "name" will always contain the name of the column. Anything in addition to + // that will be separated by a '.' and should be further referenced. + match &self.expr { + Expr::Column(col) => { + schema.index_of_column(col).map_err(py_runtime_err) } - .to_owned_reference(), - ), - // catalog_name.schema_name.table_name.column_name - 4 => Some( - TableReference::Full { - catalog: Cow::Borrowed(parts[0]), - schema: Cow::Borrowed(parts[1]), - table: Cow::Borrowed(parts[2]), + _ => { + let parts = name.split('.').collect::>(); + let tbl_reference = match parts.len() { + // Single element means name contains just the column name so no TableReference + 1 => None, + // Tablename.column_name + 2 => Some( + TableReference::Bare { + table: Cow::Borrowed(parts[0]), + } + .to_owned_reference(), + ), + // Schema_name.table_name.column_name + 3 => Some( + TableReference::Partial { + schema: Cow::Borrowed(parts[0]), + table: Cow::Borrowed(parts[1]), + } + .to_owned_reference(), + ), + // catalog_name.schema_name.table_name.column_name + 4 => Some( + TableReference::Full { + catalog: Cow::Borrowed(parts[0]), + schema: Cow::Borrowed(parts[1]), + table: Cow::Borrowed(parts[2]), + } + .to_owned_reference(), + ), + _ => None, + }; + + let col = Column { + relation: tbl_reference.clone(), + name: parts[parts.len() - 1].to_string(), + }; + schema.index_of_column(&col).map_err(py_runtime_err) } - .to_owned_reference(), - ), - _ => None, - }; - - let col = Column { - relation: tbl_reference.clone(), - name: parts[parts.len() - 1].to_string(), - }; - schema.index_of_column(&col).map_err(py_runtime_err) - }) + } + }) + } else { + // Since this is wildcard any Column will do, just use first one + Ok(0) + } } _ => Err(py_runtime_err( "We need a valid LogicalPlan instance to get the Expr's index in the schema", @@ -271,7 +299,6 @@ impl PyExpr { | Expr::IsNotTrue(_) | Expr::IsNotFalse(_) | Expr::Like { .. } - | Expr::ILike { .. } | Expr::SimilarTo { .. } | Expr::IsNotUnknown(_) | Expr::Case { .. } @@ -315,8 +342,7 @@ impl PyExpr { } // Expr(s) that house the Expr instance to return in their bounded params - Expr::Alias(expr, ..) - | Expr::Not(expr) + Expr::Not(expr) | Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsTrue(expr) @@ -330,15 +356,15 @@ impl PyExpr { | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) | Expr::Sort(Sort { expr, .. }) - | Expr::InSubquery { expr, .. } => { + | Expr::InSubquery(InSubquery { expr, .. }) => { Ok(vec![PyExpr::from(*expr.clone(), self.input_plan.clone())]) } // Expr variants containing a collection of Expr(s) for operands Expr::AggregateFunction(AggregateFunction { args, .. }) - | Expr::AggregateUDF { args, .. } - | Expr::ScalarFunction { args, .. } - | Expr::ScalarUDF { args, .. } + | Expr::AggregateUDF(AggregateUDF { args, .. }) + | Expr::ScalarFunction(ScalarFunction { args, .. }) + | Expr::ScalarUDF(ScalarUDF { args, .. }) | Expr::WindowFunction(WindowFunction { args, .. }) => Ok(args .iter() .map(|arg| PyExpr::from(arg.clone(), self.input_plan.clone())) @@ -377,7 +403,10 @@ impl PyExpr { Ok(operands) } - Expr::InList { expr, list, .. } => { + Expr::Alias(Alias { expr, .. }) => { + Ok(vec![PyExpr::from(*expr.clone(), self.input_plan.clone())]) + } + Expr::InList(InList { expr, list, .. }) => { let mut operands: Vec = vec![PyExpr::from(*expr.clone(), self.input_plan.clone())]; for list_elem in list { @@ -394,10 +423,6 @@ impl PyExpr { PyExpr::from(*expr.clone(), self.input_plan.clone()), PyExpr::from(*pattern.clone(), self.input_plan.clone()), ]), - Expr::ILike(Like { expr, pattern, .. }) => Ok(vec![ - PyExpr::from(*expr.clone(), self.input_plan.clone()), - PyExpr::from(*pattern.clone(), self.input_plan.clone()), - ]), Expr::SimilarTo(Like { expr, pattern, .. }) => Ok(vec![ PyExpr::from(*expr.clone(), self.input_plan.clone()), PyExpr::from(*pattern.clone(), self.input_plan.clone()), @@ -412,11 +437,14 @@ impl PyExpr { PyExpr::from(*low.clone(), self.input_plan.clone()), PyExpr::from(*high.clone(), self.input_plan.clone()), ]), + Expr::Wildcard => Ok(vec![PyExpr::from( + self.expr.clone(), + self.input_plan.clone(), + )]), // Currently un-support/implemented Expr types for Rex Call operations Expr::GroupingSet(..) | Expr::OuterReferenceColumn(_, _) - | Expr::Wildcard | Expr::QualifiedWildcard { .. } | Expr::ScalarSubquery(..) | Expr::Placeholder { .. } @@ -435,8 +463,8 @@ impl PyExpr { op, right: _, }) => format!("{op}"), - Expr::ScalarFunction { fun, args: _ } => format!("{fun}"), - Expr::ScalarUDF { fun, .. } => fun.name.clone(), + Expr::ScalarFunction(ScalarFunction { fun, args: _ }) => format!("{fun}"), + Expr::ScalarUDF(ScalarUDF { fun, .. }) => fun.name.clone(), Expr::Cast { .. } => "cast".to_string(), Expr::Between { .. } => "between".to_string(), Expr::Case { .. } => "case".to_string(), @@ -449,21 +477,19 @@ impl PyExpr { Expr::IsNotFalse(_) => "is not false".to_string(), Expr::IsNotUnknown(_) => "is not unknown".to_string(), Expr::InList { .. } => "in list".to_string(), + Expr::InSubquery(..) => "in subquery".to_string(), Expr::Negative(..) => "negative".to_string(), Expr::Not(..) => "not".to_string(), - Expr::Like(Like { negated, .. }) => { - if *negated { - "not like".to_string() - } else { - "like".to_string() - } - } - Expr::ILike(Like { negated, .. }) => { - if *negated { - "not ilike".to_string() - } else { - "ilike".to_string() - } + Expr::Like(Like { + negated, + case_insensitive, + .. + }) => { + format!( + "{}{}like", + if *negated { "not " } else { "" }, + if *case_insensitive { "i" } else { "" } + ) } Expr::SimilarTo(Like { negated, .. }) => { if *negated { @@ -556,8 +582,13 @@ impl PyExpr { ScalarValue::List(..) => "List", ScalarValue::Struct(..) => "Struct", ScalarValue::FixedSizeBinary(_, _) => "FixedSizeBinary", + ScalarValue::Fixedsizelist(..) => "Fixedsizelist", + ScalarValue::DurationSecond(..) => "DurationSecond", + ScalarValue::DurationMillisecond(..) => "DurationMillisecond", + ScalarValue::DurationMicrosecond(..) => "DurationMicrosecond", + ScalarValue::DurationNanosecond(..) => "DurationNanosecond", }, - Expr::ScalarFunction { fun, args: _ } => match fun { + Expr::ScalarFunction(ScalarFunction { fun, args: _ }) => match fun { BuiltinScalarFunction::Abs => "Abs", BuiltinScalarFunction::DatePart => "DatePart", _ => { @@ -637,9 +668,9 @@ impl PyExpr { pub fn get_filter_expr(&self) -> PyResult> { // TODO refactor to avoid duplication match &self.expr { - Expr::Alias(expr, _) => match expr.as_ref() { + Expr::Alias(Alias { expr, .. }) => match expr.as_ref() { Expr::AggregateFunction(AggregateFunction { filter, .. }) - | Expr::AggregateUDF { filter, .. } => match filter { + | Expr::AggregateUDF(AggregateUDF { filter, .. }) => match filter { Some(filter) => { Ok(Some(PyExpr::from(*filter.clone(), self.input_plan.clone()))) } @@ -650,7 +681,7 @@ impl PyExpr { )), }, Expr::AggregateFunction(AggregateFunction { filter, .. }) - | Expr::AggregateUDF { filter, .. } => match filter { + | Expr::AggregateUDF(AggregateUDF { filter, .. }) => match filter { Some(filter) => Ok(Some(PyExpr::from(*filter.clone(), self.input_plan.clone()))), None => Ok(None), }, @@ -739,7 +770,10 @@ impl PyExpr { ScalarValue::TimestampNanosecond(iv, tz) | ScalarValue::TimestampMicrosecond(iv, tz) | ScalarValue::TimestampMillisecond(iv, tz) - | ScalarValue::TimestampSecond(iv, tz) => Ok((*iv, tz.clone())), + | ScalarValue::TimestampSecond(iv, tz) => match tz { + Some(time_zone) => Ok((*iv, Some(time_zone.to_string()))), + None => Ok((*iv, None)), + }, other => Err(unexpected_literal_value(other)), } } @@ -790,9 +824,9 @@ impl PyExpr { pub fn is_negated(&self) -> PyResult { match &self.expr { Expr::Between(Between { negated, .. }) - | Expr::Exists { negated, .. } - | Expr::InList { negated, .. } - | Expr::InSubquery { negated, .. } => Ok(*negated), + | Expr::Exists(Exists { negated, .. }) + | Expr::InList(InList { negated, .. }) + | Expr::InSubquery(InSubquery { negated, .. }) => Ok(*negated), _ => Err(py_type_err(format!( "unknown Expr type {:?} encountered", &self.expr @@ -806,7 +840,7 @@ impl PyExpr { match &self.expr { Expr::AggregateFunction(funct) => Ok(funct.distinct), Expr::AggregateUDF { .. } => Ok(false), - Expr::Alias(expr, _) => match expr.as_ref() { + Expr::Alias(Alias { expr, .. }) => match expr.as_ref() { Expr::AggregateFunction(funct) => Ok(funct.distinct), Expr::AggregateUDF { .. } => Ok(false), _ => Err(py_type_err( @@ -847,9 +881,9 @@ impl PyExpr { #[pyo3(name = "getEscapeChar")] pub fn get_escape_char(&self) -> PyResult> { match &self.expr { - Expr::Like(Like { escape_char, .. }) - | Expr::ILike(Like { escape_char, .. }) - | Expr::SimilarTo(Like { escape_char, .. }) => Ok(*escape_char), + Expr::Like(Like { escape_char, .. }) | Expr::SimilarTo(Like { escape_char, .. }) => { + Ok(*escape_char) + } _ => Err(py_type_err(format!( "Provided Expr {:?} not one of Like/ILike/SimilarTo", &self.expr @@ -877,7 +911,11 @@ fn unexpected_literal_value(value: &ScalarValue) -> PyErr { fn get_expr_name(expr: &Expr) -> Result { match expr { - Expr::Alias(expr, _) => get_expr_name(expr), + Expr::Alias(Alias { expr, .. }) => get_expr_name(expr), + Expr::Wildcard => { + // 'Wildcard' means any and all columns. We get the first valid column name here + Ok("*".to_owned()) + } _ => Ok(expr.canonical_name()), } } @@ -890,6 +928,11 @@ pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> Result { // appear in projections) so we just delegate to the contained expression instead expr_to_field(expr, input_plan) } + Expr::Wildcard => { + // Any column will do. We use the first column to keep things consistent + Ok(input_plan.schema().field(0).clone()) + } + Expr::InSubquery(insubquery) => expr_to_field(&insubquery.expr, input_plan), _ => { let fields = exprlist_to_fields(&[expr.clone()], input_plan).map_err(DaskPlannerError::from)?; diff --git a/dask_planner/src/lib.rs b/src/lib.rs similarity index 90% rename from dask_planner/src/lib.rs rename to src/lib.rs index f5305d900..921478973 100644 --- a/dask_planner/src/lib.rs +++ b/src/lib.rs @@ -12,8 +12,7 @@ mod sql; /// The higher-level public API is defined in pure python files under the /// dask_planner directory. #[pymodule] -#[pyo3(name = "rust")] -fn rust(py: Python, m: &PyModule) -> PyResult<()> { +fn _datafusion_lib(py: Python, m: &PyModule) -> PyResult<()> { // Initialize the global Python logger instance pyo3_log::init(); @@ -41,7 +40,7 @@ fn rust(py: Python, m: &PyModule) -> PyResult<()> { py.get_type::(), )?; - debug!("dask_planner Python module loaded"); + debug!("dask_sql native library loaded"); Ok(()) } diff --git a/dask_planner/src/parser.rs b/src/parser.rs similarity index 95% rename from dask_planner/src/parser.rs rename to src/parser.rs index 3147e6309..100f9c137 100644 --- a/dask_planner/src/parser.rs +++ b/src/parser.rs @@ -30,7 +30,7 @@ pub enum CustomExpr { Nested(Vec<(String, PySqlArg)>), } -#[pyclass(name = "SqlArg", module = "datafusion")] +#[pyclass(name = "SqlArg", module = "dask_sql")] #[derive(Debug, Clone, PartialEq, Eq)] pub struct PySqlArg { expr: Option, @@ -1374,14 +1374,7 @@ mod test { let statements = DaskParser::parse_sql(sql).unwrap(); assert_eq!(1, statements.len()); let actual = format!("{:?}", statements[0]); - let expected = "projection: [\ - UnnamedExpr(Function(Function { name: ObjectName([Ident { value: \"timestampadd\", quote_style: None }]), \ - args: [\ - Unnamed(Expr(Value(SingleQuotedString(\"YEAR\")))), \ - Unnamed(Expr(Value(Number(\"2\", false)))), \ - Unnamed(Expr(Identifier(Ident { value: \"d\", quote_style: None })))\ - ], over: None, distinct: false, special: false }))\ - ]"; + let expected = "Statement(Query(Query { with: None, body: Select(Select { distinct: None, top: None, projection: [UnnamedExpr(Function(Function { name: ObjectName([Ident { value: \"timestampadd\", quote_style: None }]), args: [Unnamed(Expr(Value(SingleQuotedString(\"YEAR\")))), Unnamed(Expr(Value(Number(\"2\", false)))), Unnamed(Expr(Identifier(Ident { value: \"d\", quote_style: None })))], over: None, distinct: false, special: false, order_by: [] }))], into: None, from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: \"t\", quote_style: None }]), alias: None, args: None, with_hints: [] }, joins: [] }], lateral_views: [], selection: None, group_by: [], cluster_by: [], distribute_by: [], sort_by: [], having: None, named_window: [], qualify: None }), order_by: [], limit: None, offset: None, fetch: None, locks: [] }))"; assert!(actual.contains(expected)); } @@ -1391,26 +1384,16 @@ mod test { let statements1 = DaskParser::parse_sql(sql1).unwrap(); assert_eq!(1, statements1.len()); let actual1 = format!("{:?}", statements1[0]); - let expected1 = "projection: [\ - UnnamedExpr(Function(Function { name: ObjectName([Ident { value: \"dsql_totimestamp\", quote_style: None }]), \ - args: [\ - Unnamed(Expr(Identifier(Ident { value: \"d\", quote_style: None }))), \ - Unnamed(Expr(Value(SingleQuotedString(\"%Y-%m-%d %H:%M:%S\"))))\ - ], over: None, distinct: false, special: false }))\ - ]"; + let expected1 = "Statement(Query(Query { with: None, body: Select(Select { distinct: None, top: None, projection: [UnnamedExpr(Function(Function { name: ObjectName([Ident { value: \"dsql_totimestamp\", quote_style: None }]), args: [Unnamed(Expr(Identifier(Ident { value: \"d\", quote_style: None }))), Unnamed(Expr(Value(SingleQuotedString(\"%Y-%m-%d %H:%M:%S\"))))], over: None, distinct: false, special: false, order_by: [] }))], into: None, from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: \"t\", quote_style: None }]), alias: None, args: None, with_hints: [] }, joins: [] }], lateral_views: [], selection: None, group_by: [], cluster_by: [], distribute_by: [], sort_by: [], having: None, named_window: [], qualify: None }), order_by: [], limit: None, offset: None, fetch: None, locks: [] }))"; + assert!(actual1.contains(expected1)); let sql2 = "SELECT TO_TIMESTAMP(d, \"%d/%m/%Y\") FROM t"; let statements2 = DaskParser::parse_sql(sql2).unwrap(); assert_eq!(1, statements2.len()); let actual2 = format!("{:?}", statements2[0]); - let expected2 = "projection: [\ - UnnamedExpr(Function(Function { name: ObjectName([Ident { value: \"dsql_totimestamp\", quote_style: None }]), \ - args: [\ - Unnamed(Expr(Identifier(Ident { value: \"d\", quote_style: None }))), \ - Unnamed(Expr(Value(SingleQuotedString(\"\\\"%d/%m/%Y\\\"\"))))\ - ], over: None, distinct: false, special: false }))\ - ]"; + let expected2 = "Statement(Query(Query { with: None, body: Select(Select { distinct: None, top: None, projection: [UnnamedExpr(Function(Function { name: ObjectName([Ident { value: \"dsql_totimestamp\", quote_style: None }]), args: [Unnamed(Expr(Identifier(Ident { value: \"d\", quote_style: None }))), Unnamed(Expr(Value(SingleQuotedString(\"\\\"%d/%m/%Y\\\"\"))))], over: None, distinct: false, special: false, order_by: [] }))], into: None, from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: \"t\", quote_style: None }]), alias: None, args: None, with_hints: [] }, joins: [] }], lateral_views: [], selection: None, group_by: [], cluster_by: [], distribute_by: [], sort_by: [], having: None, named_window: [], qualify: None }), order_by: [], limit: None, offset: None, fetch: None, locks: [] }))"; + assert!(actual2.contains(expected2)); } diff --git a/dask_planner/src/sql.rs b/src/sql.rs similarity index 96% rename from dask_planner/src/sql.rs rename to src/sql.rs index a0e238727..c9a600225 100644 --- a/dask_planner/src/sql.rs +++ b/src/sql.rs @@ -21,7 +21,7 @@ use datafusion_python::{ }, datafusion_expr::{ logical_plan::Extension, - AccumulatorFunctionImplementation, + AccumulatorFactoryFunction, AggregateUDF, LogicalPlan, ReturnTypeFunction, @@ -78,21 +78,7 @@ use crate::{ /// /// The following example demonstrates how to generate an optimized LogicalPlan /// from SQL using DaskSQLContext. -/// -/// ``` -/// use datafusion_python::datafusion::prelude::*; -/// -/// # use datafusion_python::datafusion_common::Result; -/// # #[tokio::main] -/// # async fn main() -> Result<()> { -/// let mut ctx = DaskSQLContext::new(); -/// let parsed_sql = ctx.parse_sql("SELECT COUNT(*) FROM test_table"); -/// let nonOptimizedRelAlgebra = ctx.logical_relational_algebra(parsed_sql); -/// let optmizedRelAlg = ctx.optimizeRelationalAlgebra(nonOptimizedRelAlgebra); -/// # Ok(()) -/// # } -/// ``` -#[pyclass(name = "DaskSQLContext", module = "dask_planner", subclass)] +#[pyclass(name = "DaskSQLContext", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct DaskSQLContext { current_catalog: String, @@ -385,7 +371,7 @@ impl ContextProvider for DaskSQLContext { } fn get_aggregate_meta(&self, name: &str) -> Option> { - let acc: AccumulatorFunctionImplementation = + let acc: AccumulatorFactoryFunction = Arc::new(|_return_type| Err(DataFusionError::NotImplemented("".to_string()))); let st: StateTypeFunction = @@ -478,6 +464,13 @@ impl ContextProvider for DaskSQLContext { fn options(&self) -> &ConfigOptions { &self.options } + + fn get_window_meta( + &self, + _name: &str, + ) -> Option> { + unimplemented!("RUST: get_window_meta is not yet implemented for DaskSQLContext") + } } #[pymethods] @@ -592,14 +585,19 @@ impl DaskSQLContext { current_node: None, }) .map_err(py_optimization_exp); - if self.dynamic_partition_pruning { - optimizer::DaskSqlOptimizer::dynamic_partition_pruner() - .optimize_once(optimized_plan.unwrap().original_plan) - .map(|k| PyLogicalPlan { - original_plan: k, - current_node: None, - }) - .map_err(py_optimization_exp) + + if let Ok(optimized_plan) = optimized_plan { + if self.dynamic_partition_pruning { + optimizer::DaskSqlOptimizer::dynamic_partition_pruner() + .optimize_once(optimized_plan.original_plan) + .map(|k| PyLogicalPlan { + original_plan: k, + current_node: None, + }) + .map_err(py_optimization_exp) + } else { + Ok(optimized_plan) + } } else { optimized_plan } diff --git a/dask_planner/src/sql/column.rs b/src/sql/column.rs similarity index 91% rename from dask_planner/src/sql/column.rs rename to src/sql/column.rs index 63f043901..32250c382 100644 --- a/dask_planner/src/sql/column.rs +++ b/src/sql/column.rs @@ -1,7 +1,7 @@ use datafusion_python::datafusion_common::Column; use pyo3::prelude::*; -#[pyclass(name = "Column", module = "dask_planner", subclass)] +#[pyclass(name = "Column", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct PyColumn { /// Original Column instance diff --git a/dask_planner/src/sql/exceptions.rs b/src/sql/exceptions.rs similarity index 100% rename from dask_planner/src/sql/exceptions.rs rename to src/sql/exceptions.rs diff --git a/dask_planner/src/sql/function.rs b/src/sql/function.rs similarity index 93% rename from dask_planner/src/sql/function.rs rename to src/sql/function.rs index 39fa7635e..4169d386c 100644 --- a/dask_planner/src/sql/function.rs +++ b/src/sql/function.rs @@ -5,7 +5,7 @@ use pyo3::prelude::*; use super::types::PyDataType; -#[pyclass(name = "DaskFunction", module = "dask_planner", subclass)] +#[pyclass(name = "DaskFunction", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct DaskFunction { #[pyo3(get, set)] diff --git a/dask_planner/src/sql/logical.rs b/src/sql/logical.rs similarity index 95% rename from dask_planner/src/sql/logical.rs rename to src/sql/logical.rs index d2096ba9b..e8f5f9f6f 100644 --- a/dask_planner/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -37,7 +37,7 @@ pub mod window; use datafusion_python::{ datafusion_common::{DFSchemaRef, DataFusionError}, - datafusion_expr::LogicalPlan, + datafusion_expr::{DdlStatement, LogicalPlan}, }; use pyo3::prelude::*; @@ -62,7 +62,7 @@ use self::{ }; use crate::{error::Result, sql::exceptions::py_type_err}; -#[pyclass(name = "LogicalPlan", module = "dask_planner", subclass)] +#[pyclass(name = "LogicalPlan", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct PyLogicalPlan { /// The original LogicalPlan that was parsed by DataFusion from the input SQL @@ -315,18 +315,19 @@ impl PyLogicalPlan { LogicalPlan::TableScan(_table_scan) => "TableScan", LogicalPlan::EmptyRelation(_empty_relation) => "EmptyRelation", LogicalPlan::Limit(_limit) => "Limit", - LogicalPlan::CreateExternalTable(_create_external_table) => "CreateExternalTable", - LogicalPlan::CreateMemoryTable(_create_memory_table) => "CreateMemoryTable", - LogicalPlan::DropTable(_drop_table) => "DropTable", - LogicalPlan::DropView(_drop_view) => "DropView", + LogicalPlan::Ddl(DdlStatement::CreateExternalTable { .. }) => "CreateExternalTable", + LogicalPlan::Ddl(DdlStatement::CreateMemoryTable { .. }) => "CreateMemoryTable", + LogicalPlan::Ddl(DdlStatement::DropTable { .. }) => "DropTable", + LogicalPlan::Ddl(DdlStatement::DropView { .. }) => "DropView", LogicalPlan::Values(_values) => "Values", LogicalPlan::Explain(_explain) => "Explain", LogicalPlan::Analyze(_analyze) => "Analyze", LogicalPlan::Subquery(_sub_query) => "Subquery", LogicalPlan::SubqueryAlias(_sqalias) => "SubqueryAlias", - LogicalPlan::CreateCatalogSchema(_create) => "CreateCatalogSchema", - LogicalPlan::CreateCatalog(_create_catalog) => "CreateCatalog", - LogicalPlan::CreateView(_create_view) => "CreateView", + LogicalPlan::Ddl(DdlStatement::CreateCatalogSchema { .. }) => "CreateCatalogSchema", + LogicalPlan::Ddl(DdlStatement::DropCatalogSchema { .. }) => "DropCatalogSchema", + LogicalPlan::Ddl(DdlStatement::CreateCatalog { .. }) => "CreateCatalog", + LogicalPlan::Ddl(DdlStatement::CreateView { .. }) => "CreateView", LogicalPlan::Statement(_) => "Statement", // Further examine and return the name that is a possible Dask-SQL Extension type LogicalPlan::Extension(extension) => { diff --git a/dask_planner/src/sql/logical/aggregate.rs b/src/sql/logical/aggregate.rs similarity index 87% rename from dask_planner/src/sql/logical/aggregate.rs rename to src/sql/logical/aggregate.rs index 0acc8b86e..1c4074239 100644 --- a/dask_planner/src/sql/logical/aggregate.rs +++ b/src/sql/logical/aggregate.rs @@ -1,5 +1,5 @@ use datafusion_python::datafusion_expr::{ - expr::AggregateFunction, + expr::{AggregateFunction, AggregateUDF, Alias}, logical_plan::{Aggregate, Distinct}, Expr, LogicalPlan, @@ -11,7 +11,7 @@ use crate::{ sql::exceptions::py_type_err, }; -#[pyclass(name = "Aggregate", module = "dask_planner", subclass)] +#[pyclass(name = "Aggregate", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyAggregate { aggregate: Option, @@ -73,9 +73,9 @@ impl PyAggregate { impl PyAggregate { fn _aggregation_arguments(&self, expr: &Expr) -> PyResult> { match expr { - Expr::Alias(expr, _) => self._aggregation_arguments(expr.as_ref()), + Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()), Expr::AggregateFunction(AggregateFunction { fun: _, args, .. }) - | Expr::AggregateUDF { fun: _, args, .. } => match &self.aggregate { + | Expr::AggregateUDF(AggregateUDF { fun: _, args, .. }) => match &self.aggregate { Some(e) => py_expr_list(&e.input, args), None => Ok(vec![]), }, @@ -88,9 +88,9 @@ impl PyAggregate { fn _agg_func_name(expr: &Expr) -> PyResult { match expr { - Expr::Alias(expr, _) => _agg_func_name(expr.as_ref()), + Expr::Alias(Alias { expr, .. }) => _agg_func_name(expr.as_ref()), Expr::AggregateFunction(AggregateFunction { fun, .. }) => Ok(fun.to_string()), - Expr::AggregateUDF { fun, .. } => Ok(fun.name.clone()), + Expr::AggregateUDF(AggregateUDF { fun, .. }) => Ok(fun.name.clone()), _ => Err(py_type_err( "Encountered a non Aggregate type in agg_func_name", )), @@ -99,7 +99,7 @@ fn _agg_func_name(expr: &Expr) -> PyResult { fn _distinct_agg_expr(expr: &Expr) -> PyResult { match expr { - Expr::Alias(expr, _) => _distinct_agg_expr(expr.as_ref()), + Expr::Alias(Alias { expr, .. }) => _distinct_agg_expr(expr.as_ref()), Expr::AggregateFunction(AggregateFunction { distinct, .. }) => Ok(*distinct), Expr::AggregateUDF { .. } => { // DataFusion does not support DISTINCT in UDAFs diff --git a/dask_planner/src/sql/logical/alter_schema.rs b/src/sql/logical/alter_schema.rs similarity index 98% rename from dask_planner/src/sql/logical/alter_schema.rs rename to src/sql/logical/alter_schema.rs index 742ae513f..a7a8696b8 100644 --- a/dask_planner/src/sql/logical/alter_schema.rs +++ b/src/sql/logical/alter_schema.rs @@ -96,7 +96,7 @@ impl UserDefinedLogicalNode for AlterSchemaPlanNode { } } -#[pyclass(name = "AlterSchema", module = "dask_planner", subclass)] +#[pyclass(name = "AlterSchema", module = "dask_sql", subclass)] pub struct PyAlterSchema { pub(crate) alter_schema: AlterSchemaPlanNode, } diff --git a/dask_planner/src/sql/logical/alter_table.rs b/src/sql/logical/alter_table.rs similarity index 98% rename from dask_planner/src/sql/logical/alter_table.rs rename to src/sql/logical/alter_table.rs index 7f51a15c3..d6b49315b 100644 --- a/dask_planner/src/sql/logical/alter_table.rs +++ b/src/sql/logical/alter_table.rs @@ -102,7 +102,7 @@ impl UserDefinedLogicalNode for AlterTablePlanNode { } } -#[pyclass(name = "AlterTable", module = "dask_planner", subclass)] +#[pyclass(name = "AlterTable", module = "dask_sql", subclass)] pub struct PyAlterTable { pub(crate) alter_table: AlterTablePlanNode, } diff --git a/dask_planner/src/sql/logical/analyze_table.rs b/src/sql/logical/analyze_table.rs similarity index 98% rename from dask_planner/src/sql/logical/analyze_table.rs rename to src/sql/logical/analyze_table.rs index 9fa7fb219..6876c3704 100644 --- a/dask_planner/src/sql/logical/analyze_table.rs +++ b/src/sql/logical/analyze_table.rs @@ -99,7 +99,7 @@ impl UserDefinedLogicalNode for AnalyzeTablePlanNode { } } -#[pyclass(name = "AnalyzeTable", module = "dask_planner", subclass)] +#[pyclass(name = "AnalyzeTable", module = "dask_sql", subclass)] pub struct PyAnalyzeTable { pub(crate) analyze_table: AnalyzeTablePlanNode, } diff --git a/dask_planner/src/sql/logical/create_catalog_schema.rs b/src/sql/logical/create_catalog_schema.rs similarity index 98% rename from dask_planner/src/sql/logical/create_catalog_schema.rs rename to src/sql/logical/create_catalog_schema.rs index bc89b02ce..82a1426af 100644 --- a/dask_planner/src/sql/logical/create_catalog_schema.rs +++ b/src/sql/logical/create_catalog_schema.rs @@ -95,7 +95,7 @@ impl UserDefinedLogicalNode for CreateCatalogSchemaPlanNode { } } -#[pyclass(name = "CreateCatalogSchema", module = "dask_planner", subclass)] +#[pyclass(name = "CreateCatalogSchema", module = "dask_sql", subclass)] pub struct PyCreateCatalogSchema { pub(crate) create_catalog_schema: CreateCatalogSchemaPlanNode, } diff --git a/dask_planner/src/sql/logical/create_experiment.rs b/src/sql/logical/create_experiment.rs similarity index 98% rename from dask_planner/src/sql/logical/create_experiment.rs rename to src/sql/logical/create_experiment.rs index 313357d75..06fe9d856 100644 --- a/dask_planner/src/sql/logical/create_experiment.rs +++ b/src/sql/logical/create_experiment.rs @@ -105,7 +105,7 @@ impl UserDefinedLogicalNode for CreateExperimentPlanNode { } } -#[pyclass(name = "CreateExperiment", module = "dask_planner", subclass)] +#[pyclass(name = "CreateExperiment", module = "dask_sql", subclass)] pub struct PyCreateExperiment { pub(crate) create_experiment: CreateExperimentPlanNode, } diff --git a/dask_planner/src/sql/logical/create_memory_table.rs b/src/sql/logical/create_memory_table.rs similarity index 89% rename from dask_planner/src/sql/logical/create_memory_table.rs rename to src/sql/logical/create_memory_table.rs index 668295e0f..53ff9432e 100644 --- a/dask_planner/src/sql/logical/create_memory_table.rs +++ b/src/sql/logical/create_memory_table.rs @@ -1,12 +1,13 @@ use datafusion_python::datafusion_expr::{ logical_plan::{CreateMemoryTable, CreateView}, + DdlStatement, LogicalPlan, }; use pyo3::prelude::*; use crate::sql::{exceptions::py_type_err, logical::PyLogicalPlan}; -#[pyclass(name = "CreateMemoryTable", module = "dask_planner", subclass)] +#[pyclass(name = "CreateMemoryTable", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyCreateMemoryTable { create_memory_table: Option, @@ -85,13 +86,13 @@ impl TryFrom for PyCreateMemoryTable { fn try_from(logical_plan: LogicalPlan) -> Result { Ok(match logical_plan { - LogicalPlan::CreateMemoryTable(create_memory_table) => PyCreateMemoryTable { - create_memory_table: Some(create_memory_table), + LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(cmt)) => PyCreateMemoryTable { + create_memory_table: Some(cmt), create_view: None, }, - LogicalPlan::CreateView(create_view) => PyCreateMemoryTable { + LogicalPlan::Ddl(DdlStatement::CreateView(cv)) => PyCreateMemoryTable { create_memory_table: None, - create_view: Some(create_view), + create_view: Some(cv), }, _ => return Err(py_type_err("unexpected plan")), }) diff --git a/dask_planner/src/sql/logical/create_model.rs b/src/sql/logical/create_model.rs similarity index 98% rename from dask_planner/src/sql/logical/create_model.rs rename to src/sql/logical/create_model.rs index 782fe3325..7dbcdff95 100644 --- a/dask_planner/src/sql/logical/create_model.rs +++ b/src/sql/logical/create_model.rs @@ -101,7 +101,7 @@ impl UserDefinedLogicalNode for CreateModelPlanNode { } } -#[pyclass(name = "CreateModel", module = "dask_planner", subclass)] +#[pyclass(name = "CreateModel", module = "dask_sql", subclass)] pub struct PyCreateModel { pub(crate) create_model: CreateModelPlanNode, } diff --git a/dask_planner/src/sql/logical/create_table.rs b/src/sql/logical/create_table.rs similarity index 98% rename from dask_planner/src/sql/logical/create_table.rs rename to src/sql/logical/create_table.rs index 9271130c7..1c423415f 100644 --- a/dask_planner/src/sql/logical/create_table.rs +++ b/src/sql/logical/create_table.rs @@ -100,7 +100,7 @@ impl UserDefinedLogicalNode for CreateTablePlanNode { } } -#[pyclass(name = "CreateTable", module = "dask_planner", subclass)] +#[pyclass(name = "CreateTable", module = "dask_sql", subclass)] pub struct PyCreateTable { pub(crate) create_table: CreateTablePlanNode, } diff --git a/dask_planner/src/sql/logical/describe_model.rs b/src/sql/logical/describe_model.rs similarity index 97% rename from dask_planner/src/sql/logical/describe_model.rs rename to src/sql/logical/describe_model.rs index cb2087376..3e3563fe1 100644 --- a/dask_planner/src/sql/logical/describe_model.rs +++ b/src/sql/logical/describe_model.rs @@ -89,7 +89,7 @@ impl UserDefinedLogicalNode for DescribeModelPlanNode { } } -#[pyclass(name = "DescribeModel", module = "dask_planner", subclass)] +#[pyclass(name = "DescribeModel", module = "dask_sql", subclass)] pub struct PyDescribeModel { pub(crate) describe_model: DescribeModelPlanNode, } diff --git a/dask_planner/src/sql/logical/drop_model.rs b/src/sql/logical/drop_model.rs similarity index 98% rename from dask_planner/src/sql/logical/drop_model.rs rename to src/sql/logical/drop_model.rs index 71074905d..2715cb067 100644 --- a/dask_planner/src/sql/logical/drop_model.rs +++ b/src/sql/logical/drop_model.rs @@ -92,7 +92,7 @@ impl UserDefinedLogicalNode for DropModelPlanNode { } } -#[pyclass(name = "DropModel", module = "dask_planner", subclass)] +#[pyclass(name = "DropModel", module = "dask_sql", subclass)] pub struct PyDropModel { pub(crate) drop_model: DropModelPlanNode, } diff --git a/dask_planner/src/sql/logical/drop_schema.rs b/src/sql/logical/drop_schema.rs similarity index 97% rename from dask_planner/src/sql/logical/drop_schema.rs rename to src/sql/logical/drop_schema.rs index 2022a61c9..78d252d11 100644 --- a/dask_planner/src/sql/logical/drop_schema.rs +++ b/src/sql/logical/drop_schema.rs @@ -88,7 +88,7 @@ impl UserDefinedLogicalNode for DropSchemaPlanNode { } } -#[pyclass(name = "DropSchema", module = "dask_planner", subclass)] +#[pyclass(name = "DropSchema", module = "dask_sql", subclass)] pub struct PyDropSchema { pub(crate) drop_schema: DropSchemaPlanNode, } diff --git a/dask_planner/src/sql/logical/drop_table.rs b/src/sql/logical/drop_table.rs similarity index 71% rename from dask_planner/src/sql/logical/drop_table.rs rename to src/sql/logical/drop_table.rs index 7d58e8a47..504a104c1 100644 --- a/dask_planner/src/sql/logical/drop_table.rs +++ b/src/sql/logical/drop_table.rs @@ -1,9 +1,12 @@ -use datafusion_python::datafusion_expr::logical_plan::{DropTable, LogicalPlan}; +use datafusion_python::datafusion_expr::{ + logical_plan::{DropTable, LogicalPlan}, + DdlStatement, +}; use pyo3::prelude::*; use crate::sql::exceptions::py_type_err; -#[pyclass(name = "DropTable", module = "dask_planner", subclass)] +#[pyclass(name = "DropTable", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyDropTable { drop_table: DropTable, @@ -27,7 +30,7 @@ impl TryFrom for PyDropTable { fn try_from(logical_plan: LogicalPlan) -> Result { match logical_plan { - LogicalPlan::DropTable(drop_table) => Ok(PyDropTable { drop_table }), + LogicalPlan::Ddl(DdlStatement::DropTable(drop_table)) => Ok(PyDropTable { drop_table }), _ => Err(py_type_err("unexpected plan")), } } diff --git a/dask_planner/src/sql/logical/empty_relation.rs b/src/sql/logical/empty_relation.rs similarity index 94% rename from dask_planner/src/sql/logical/empty_relation.rs rename to src/sql/logical/empty_relation.rs index 5bd6659ce..6356f9c85 100644 --- a/dask_planner/src/sql/logical/empty_relation.rs +++ b/src/sql/logical/empty_relation.rs @@ -3,7 +3,7 @@ use pyo3::prelude::*; use crate::sql::exceptions::py_type_err; -#[pyclass(name = "EmptyRelation", module = "dask_planner", subclass)] +#[pyclass(name = "EmptyRelation", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyEmptyRelation { empty_relation: EmptyRelation, diff --git a/dask_planner/src/sql/logical/explain.rs b/src/sql/logical/explain.rs similarity index 93% rename from dask_planner/src/sql/logical/explain.rs rename to src/sql/logical/explain.rs index 17f1e4ee2..839a731d8 100644 --- a/dask_planner/src/sql/logical/explain.rs +++ b/src/sql/logical/explain.rs @@ -3,7 +3,7 @@ use pyo3::prelude::*; use crate::sql::exceptions::py_type_err; -#[pyclass(name = "Explain", module = "dask_planner", subclass)] +#[pyclass(name = "Explain", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyExplain { explain: Explain, diff --git a/dask_planner/src/sql/logical/export_model.rs b/src/sql/logical/export_model.rs similarity index 98% rename from dask_planner/src/sql/logical/export_model.rs rename to src/sql/logical/export_model.rs index e38551b58..58b5f7fad 100644 --- a/dask_planner/src/sql/logical/export_model.rs +++ b/src/sql/logical/export_model.rs @@ -95,7 +95,7 @@ impl UserDefinedLogicalNode for ExportModelPlanNode { } } -#[pyclass(name = "ExportModel", module = "dask_planner", subclass)] +#[pyclass(name = "ExportModel", module = "dask_sql", subclass)] pub struct PyExportModel { pub(crate) export_model: ExportModelPlanNode, } diff --git a/dask_planner/src/sql/logical/filter.rs b/src/sql/logical/filter.rs similarity index 93% rename from dask_planner/src/sql/logical/filter.rs rename to src/sql/logical/filter.rs index a50d508ff..f2dc2e702 100644 --- a/dask_planner/src/sql/logical/filter.rs +++ b/src/sql/logical/filter.rs @@ -3,7 +3,7 @@ use pyo3::prelude::*; use crate::{expression::PyExpr, sql::exceptions::py_type_err}; -#[pyclass(name = "Filter", module = "dask_planner", subclass)] +#[pyclass(name = "Filter", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyFilter { filter: Filter, diff --git a/dask_planner/src/sql/logical/join.rs b/src/sql/logical/join.rs similarity index 98% rename from dask_planner/src/sql/logical/join.rs rename to src/sql/logical/join.rs index d6c31b55b..3261e9217 100644 --- a/dask_planner/src/sql/logical/join.rs +++ b/src/sql/logical/join.rs @@ -15,7 +15,7 @@ use crate::{ sql::{column, exceptions::py_type_err}, }; -#[pyclass(name = "Join", module = "dask_planner", subclass)] +#[pyclass(name = "Join", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyJoin { join: Join, diff --git a/dask_planner/src/sql/logical/limit.rs b/src/sql/logical/limit.rs similarity index 95% rename from dask_planner/src/sql/logical/limit.rs rename to src/sql/logical/limit.rs index 189fdeea0..04d783fdd 100644 --- a/dask_planner/src/sql/logical/limit.rs +++ b/src/sql/logical/limit.rs @@ -6,7 +6,7 @@ use pyo3::prelude::*; use crate::{expression::PyExpr, sql::exceptions::py_type_err}; -#[pyclass(name = "Limit", module = "dask_planner", subclass)] +#[pyclass(name = "Limit", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyLimit { limit: Limit, diff --git a/dask_planner/src/sql/logical/predict_model.rs b/src/sql/logical/predict_model.rs similarity index 98% rename from dask_planner/src/sql/logical/predict_model.rs rename to src/sql/logical/predict_model.rs index e8d723d2c..3f68ffdb4 100644 --- a/dask_planner/src/sql/logical/predict_model.rs +++ b/src/sql/logical/predict_model.rs @@ -89,7 +89,7 @@ impl UserDefinedLogicalNode for PredictModelPlanNode { } } -#[pyclass(name = "PredictModel", module = "dask_planner", subclass)] +#[pyclass(name = "PredictModel", module = "dask_sql", subclass)] pub struct PyPredictModel { pub(crate) predict_model: PredictModelPlanNode, } diff --git a/dask_planner/src/sql/logical/projection.rs b/src/sql/logical/projection.rs similarity index 83% rename from dask_planner/src/sql/logical/projection.rs rename to src/sql/logical/projection.rs index 99ed0d684..56e5e28d8 100644 --- a/dask_planner/src/sql/logical/projection.rs +++ b/src/sql/logical/projection.rs @@ -1,9 +1,14 @@ -use datafusion_python::datafusion_expr::{logical_plan::Projection, Expr, LogicalPlan}; +use datafusion_python::datafusion_expr::{ + expr::Alias, + logical_plan::Projection, + Expr, + LogicalPlan, +}; use pyo3::prelude::*; use crate::{expression::PyExpr, sql::exceptions::py_type_err}; -#[pyclass(name = "Projection", module = "dask_planner", subclass)] +#[pyclass(name = "Projection", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyProjection { pub(crate) projection: Projection, @@ -14,7 +19,7 @@ impl PyProjection { fn projected_expressions(&mut self, local_expr: &PyExpr) -> Vec { let mut projs: Vec = Vec::new(); match &local_expr.expr { - Expr::Alias(expr, _name) => { + Expr::Alias(Alias { expr, .. }) => { let py_expr: PyExpr = PyExpr::from(*expr.clone(), Some(vec![self.projection.input.clone()])); projs.extend_from_slice(self.projected_expressions(&py_expr).as_slice()); @@ -35,9 +40,9 @@ impl PyProjection { PyExpr::from(expression, Some(vec![self.projection.input.clone()])); for expr in self.projected_expressions(&py_expr) { match expr.expr { - Expr::Alias(ex, name) => named.push(( + Expr::Alias(Alias { expr, name }) => named.push(( name.to_string(), - PyExpr::from(*ex, Some(vec![self.projection.input.clone()])), + PyExpr::from(*expr, Some(vec![self.projection.input.clone()])), )), _ => { if let Ok(name) = expr._column_name(&self.projection.input) { diff --git a/dask_planner/src/sql/logical/repartition_by.rs b/src/sql/logical/repartition_by.rs similarity index 96% rename from dask_planner/src/sql/logical/repartition_by.rs rename to src/sql/logical/repartition_by.rs index e931b88e7..687958571 100644 --- a/dask_planner/src/sql/logical/repartition_by.rs +++ b/src/sql/logical/repartition_by.rs @@ -10,7 +10,7 @@ use crate::{ sql::{exceptions::py_type_err, logical}, }; -#[pyclass(name = "RepartitionBy", module = "dask_planner", subclass)] +#[pyclass(name = "RepartitionBy", module = "dask_sql", subclass)] pub struct PyRepartitionBy { pub(crate) repartition: Repartition, } diff --git a/dask_planner/src/sql/logical/show_columns.rs b/src/sql/logical/show_columns.rs similarity index 98% rename from dask_planner/src/sql/logical/show_columns.rs rename to src/sql/logical/show_columns.rs index adfb584ef..cdd844127 100644 --- a/dask_planner/src/sql/logical/show_columns.rs +++ b/src/sql/logical/show_columns.rs @@ -92,7 +92,7 @@ impl UserDefinedLogicalNode for ShowColumnsPlanNode { } } -#[pyclass(name = "ShowColumns", module = "dask_planner", subclass)] +#[pyclass(name = "ShowColumns", module = "dask_sql", subclass)] pub struct PyShowColumns { pub(crate) show_columns: ShowColumnsPlanNode, } diff --git a/dask_planner/src/sql/logical/show_models.rs b/src/sql/logical/show_models.rs similarity index 97% rename from dask_planner/src/sql/logical/show_models.rs rename to src/sql/logical/show_models.rs index 026a179a5..a228769de 100644 --- a/dask_planner/src/sql/logical/show_models.rs +++ b/src/sql/logical/show_models.rs @@ -85,7 +85,7 @@ impl UserDefinedLogicalNode for ShowModelsPlanNode { } } -#[pyclass(name = "ShowModels", module = "dask_planner", subclass)] +#[pyclass(name = "ShowModels", module = "dask_sql", subclass)] pub struct PyShowModels { pub(crate) show_models: ShowModelsPlanNode, } diff --git a/dask_planner/src/sql/logical/show_schemas.rs b/src/sql/logical/show_schemas.rs similarity index 98% rename from dask_planner/src/sql/logical/show_schemas.rs rename to src/sql/logical/show_schemas.rs index 3e3ed4783..454afb51d 100644 --- a/dask_planner/src/sql/logical/show_schemas.rs +++ b/src/sql/logical/show_schemas.rs @@ -91,7 +91,7 @@ impl UserDefinedLogicalNode for ShowSchemasPlanNode { } } -#[pyclass(name = "ShowSchema", module = "dask_planner", subclass)] +#[pyclass(name = "ShowSchema", module = "dask_sql", subclass)] pub struct PyShowSchema { pub(crate) show_schema: ShowSchemasPlanNode, } diff --git a/dask_planner/src/sql/logical/show_tables.rs b/src/sql/logical/show_tables.rs similarity index 98% rename from dask_planner/src/sql/logical/show_tables.rs rename to src/sql/logical/show_tables.rs index 987f2546e..c01022828 100644 --- a/dask_planner/src/sql/logical/show_tables.rs +++ b/src/sql/logical/show_tables.rs @@ -95,7 +95,7 @@ impl UserDefinedLogicalNode for ShowTablesPlanNode { } } -#[pyclass(name = "ShowTables", module = "dask_planner", subclass)] +#[pyclass(name = "ShowTables", module = "dask_sql", subclass)] pub struct PyShowTables { pub(crate) show_tables: ShowTablesPlanNode, } diff --git a/dask_planner/src/sql/logical/sort.rs b/src/sql/logical/sort.rs similarity index 93% rename from dask_planner/src/sql/logical/sort.rs rename to src/sql/logical/sort.rs index 9abcd3906..5a1f862a1 100644 --- a/dask_planner/src/sql/logical/sort.rs +++ b/src/sql/logical/sort.rs @@ -6,7 +6,7 @@ use crate::{ sql::exceptions::py_type_err, }; -#[pyclass(name = "Sort", module = "dask_planner", subclass)] +#[pyclass(name = "Sort", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PySort { sort: Sort, diff --git a/dask_planner/src/sql/logical/subquery_alias.rs b/src/sql/logical/subquery_alias.rs similarity index 85% rename from dask_planner/src/sql/logical/subquery_alias.rs rename to src/sql/logical/subquery_alias.rs index 1b23e5dc4..e98c78203 100644 --- a/dask_planner/src/sql/logical/subquery_alias.rs +++ b/src/sql/logical/subquery_alias.rs @@ -3,7 +3,7 @@ use pyo3::prelude::*; use crate::sql::exceptions::py_type_err; -#[pyclass(name = "SubqueryAlias", module = "dask_planner", subclass)] +#[pyclass(name = "SubqueryAlias", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PySubqueryAlias { subquery_alias: SubqueryAlias, @@ -14,7 +14,7 @@ impl PySubqueryAlias { /// Returns a Vec of the sort expressions #[pyo3(name = "getAlias")] pub fn alias(&self) -> PyResult { - Ok(self.subquery_alias.alias.clone()) + Ok(self.subquery_alias.alias.clone().to_string()) } } diff --git a/dask_planner/src/sql/logical/table_scan.rs b/src/sql/logical/table_scan.rs similarity index 95% rename from dask_planner/src/sql/logical/table_scan.rs rename to src/sql/logical/table_scan.rs index 3b7a89e6e..c9cb92ebd 100644 --- a/dask_planner/src/sql/logical/table_scan.rs +++ b/src/sql/logical/table_scan.rs @@ -2,7 +2,12 @@ use std::{sync::Arc, vec}; use datafusion_python::{ datafusion_common::{DFSchema, ScalarValue}, - datafusion_expr::{logical_plan::TableScan, Expr, LogicalPlan}, + datafusion_expr::{ + expr::{Alias, InList}, + logical_plan::TableScan, + Expr, + LogicalPlan, + }, }; use pyo3::prelude::*; @@ -12,7 +17,7 @@ use crate::{ sql::exceptions::py_type_err, }; -#[pyclass(name = "TableScan", module = "dask_planner", subclass)] +#[pyclass(name = "TableScan", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyTableScan { pub(crate) table_scan: TableScan, @@ -20,7 +25,7 @@ pub struct PyTableScan { } type FilterTuple = (String, String, Option>); -#[pyclass(name = "FilteredResult", module = "dask_planner", subclass)] +#[pyclass(name = "FilteredResult", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct PyFilteredResult { // Certain Expr(s) do not have supporting logic in pyarrow for IO filtering @@ -52,11 +57,11 @@ impl PyTableScan { let mut filter_tuple: Vec<(PyExpr, FilterTuple)> = Vec::new(); match filter { - Expr::InList { + Expr::InList(InList { expr, list, negated, - } => { + }) => { // Only handle simple Expr(s) for InList operations for now if PyTableScan::_valid_expr_type(list) { // While ANSI SQL would not allow for anything other than a Column or Literal @@ -64,7 +69,7 @@ impl PyTableScan { // IF it is something else it is returned to Dask to handle let ident = match *expr.clone() { Expr::Column(col) => Ok(col.name), - Expr::Alias(_, name) => Ok(name), + Expr::Alias(Alias { name, .. }) => Ok(name), Expr::Literal(val) => Ok(format!("{}", val)), _ => Err(DaskPlannerError::InvalidIOFilter(format!( "Invalid InList Expr type `{}`. using in Dask instead", @@ -77,7 +82,7 @@ impl PyTableScan { .iter() .map(|f| match f { Expr::Column(col) => Ok(col.name.clone().into_py(py)), - Expr::Alias(_, name) => Ok(name.clone().into_py(py)), + Expr::Alias(Alias { name, ..}) => Ok(name.clone().into_py(py)), Expr::Literal(val) => match val { ScalarValue::Boolean(val) => Ok(val.unwrap().into_py(py)), ScalarValue::Float32(val) => Ok(val.unwrap().into_py(py)), diff --git a/dask_planner/src/sql/logical/use_schema.rs b/src/sql/logical/use_schema.rs similarity index 97% rename from dask_planner/src/sql/logical/use_schema.rs rename to src/sql/logical/use_schema.rs index 7c2206310..0f804ce7a 100644 --- a/dask_planner/src/sql/logical/use_schema.rs +++ b/src/sql/logical/use_schema.rs @@ -85,7 +85,7 @@ impl UserDefinedLogicalNode for UseSchemaPlanNode { } } -#[pyclass(name = "UseSchema", module = "dask_planner", subclass)] +#[pyclass(name = "UseSchema", module = "dask_sql", subclass)] pub struct PyUseSchema { pub(crate) use_schema: UseSchemaPlanNode, } diff --git a/dask_planner/src/sql/logical/window.rs b/src/sql/logical/window.rs similarity index 96% rename from dask_planner/src/sql/logical/window.rs rename to src/sql/logical/window.rs index e104ccdb3..3dd9d8c0d 100644 --- a/dask_planner/src/sql/logical/window.rs +++ b/src/sql/logical/window.rs @@ -17,19 +17,19 @@ use crate::{ sql::exceptions::py_type_err, }; -#[pyclass(name = "Window", module = "dask_planner", subclass)] +#[pyclass(name = "Window", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyWindow { window: Window, } -#[pyclass(name = "WindowFrame", module = "dask_planner", subclass)] +#[pyclass(name = "WindowFrame", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyWindowFrame { window_frame: WindowFrame, } -#[pyclass(name = "WindowFrameBound", module = "dask_planner", subclass)] +#[pyclass(name = "WindowFrameBound", module = "dask_sql", subclass)] #[derive(Clone)] pub struct PyWindowFrameBound { frame_bound: WindowFrameBound, diff --git a/dask_planner/src/sql/optimizer.rs b/src/sql/optimizer.rs similarity index 92% rename from dask_planner/src/sql/optimizer.rs rename to src/sql/optimizer.rs index bdaa30ea7..85f335572 100644 --- a/dask_planner/src/sql/optimizer.rs +++ b/src/sql/optimizer.rs @@ -1,11 +1,16 @@ +// Declare optimizer modules +pub mod decorrelate_where_exists; +pub mod decorrelate_where_in; +pub mod dynamic_partition_pruning; +pub mod join_reorder; +pub mod utils; + use std::sync::Arc; use datafusion_python::{ datafusion_common::DataFusionError, datafusion_expr::LogicalPlan, datafusion_optimizer::{ - decorrelate_where_exists::DecorrelateWhereExists, - decorrelate_where_in::DecorrelateWhereIn, eliminate_cross_join::EliminateCrossJoin, eliminate_limit::EliminateLimit, eliminate_outer_join::EliminateOuterJoin, @@ -22,13 +27,11 @@ use datafusion_python::{ OptimizerContext, }, }; -use log::{debug, trace}; - -mod dynamic_partition_pruning; +use decorrelate_where_exists::DecorrelateWhereExists; +use decorrelate_where_in::DecorrelateWhereIn; use dynamic_partition_pruning::DynamicPartitionPruning; - -mod join_reorder; use join_reorder::JoinReorder; +use log::{debug, trace}; /// Houses the optimization logic for Dask-SQL. This optimization controls the optimizations /// and their ordering in regards to their impact on the underlying `LogicalPlan` instance @@ -151,17 +154,7 @@ mod tests { AND (cast('2002-05-08' as date) + interval '5 days')\ )"; let plan = test_sql(sql)?; - let expected = r#"Projection: test.col_int32 - Filter: CAST(test.col_int32 AS Float64) > __scalar_sq_1.__value - CrossJoin: - TableScan: test projection=[col_int32] - SubqueryAlias: __scalar_sq_1 - Projection: AVG(test.col_int32) AS __value - Aggregate: groupBy=[[]], aggr=[[AVG(test.col_int32)]] - Projection: test.col_int32 - Filter: test.col_utf8 >= Utf8("2002-05-08") AND test.col_utf8 <= Utf8("2002-05-13") - TableScan: test projection=[col_int32, col_utf8]"#; - assert_eq!(expected, format!("{:?}", plan)); + assert!(format!("{:?}", plan).contains(r#"<= Date32("11820")"#)); Ok(()) } @@ -234,6 +227,13 @@ mod tests { fn get_variable_type(&self, _variable_names: &[String]) -> Option { None } + + fn get_window_meta( + &self, + _name: &str, + ) -> Option> { + None + } } struct MyTableSource { diff --git a/src/sql/optimizer/decorrelate_where_exists.rs b/src/sql/optimizer/decorrelate_where_exists.rs new file mode 100644 index 000000000..5944c83ae --- /dev/null +++ b/src/sql/optimizer/decorrelate_where_exists.rs @@ -0,0 +1,228 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion_python::{ + datafusion_common::{Column, DataFusionError, Result}, + datafusion_expr::{ + expr::Exists, + logical_plan::{Distinct, Filter, JoinType, Subquery}, + Expr, + LogicalPlan, + LogicalPlanBuilder, + }, + datafusion_optimizer::optimizer::{ApplyOrder, OptimizerConfig, OptimizerRule}, +}; + +use crate::sql::optimizer::utils::{ + collect_subquery_cols, + conjunction, + extract_join_filters, + split_conjunction, +}; + +/// Optimizer rule for rewriting subquery filters to joins +#[derive(Default)] +pub struct DecorrelateWhereExists {} + +impl DecorrelateWhereExists { + #[allow(missing_docs)] + pub fn new() -> Self { + Self {} + } + + /// Finds expressions that have a where in subquery (and recurse when found) + /// + /// # Arguments + /// + /// * `predicate` - A conjunction to split and search + /// * `optimizer_config` - For generating unique subquery aliases + /// + /// Returns a tuple (subqueries, non-subquery expressions) + fn extract_subquery_exprs( + &self, + predicate: &Expr, + config: &dyn OptimizerConfig, + ) -> Result<(Vec, Vec)> { + let filters = split_conjunction(predicate); + + let mut subqueries = vec![]; + let mut others = vec![]; + for it in filters.iter() { + match it { + Expr::Exists(Exists { subquery, negated }) => { + let subquery_plan = self + .try_optimize(&subquery.subquery, config)? + .map(Arc::new) + .unwrap_or_else(|| subquery.subquery.clone()); + let new_subquery = subquery.with_plan(subquery_plan); + subqueries.push(SubqueryInfo::new(new_subquery, *negated)); + } + _ => others.push((*it).clone()), + } + } + + Ok((subqueries, others)) + } +} + +impl OptimizerRule for DecorrelateWhereExists { + fn try_optimize( + &self, + plan: &LogicalPlan, + config: &dyn OptimizerConfig, + ) -> Result> { + match plan { + LogicalPlan::Filter(filter) => { + let (subqueries, other_exprs) = + self.extract_subquery_exprs(&filter.predicate, config)?; + if subqueries.is_empty() { + // regular filter, no subquery exists clause here + return Ok(None); + } + + // iterate through all exists clauses in predicate, turning each into a join + let mut cur_input = filter.input.as_ref().clone(); + for subquery in subqueries { + if let Some(x) = optimize_exists(&subquery, &cur_input)? { + cur_input = x; + } else { + return Ok(None); + } + } + + let expr = conjunction(other_exprs); + if let Some(expr) = expr { + let new_filter = Filter::try_new(expr, Arc::new(cur_input))?; + cur_input = LogicalPlan::Filter(new_filter); + } + + Ok(Some(cur_input)) + } + _ => Ok(None), + } + } + + fn name(&self) -> &str { + "decorrelate_where_exists" + } + + fn apply_order(&self) -> Option { + Some(ApplyOrder::TopDown) + } +} + +/// Takes a query like: +/// +/// SELECT t1.id +/// FROM t1 +/// WHERE exists +/// ( +/// SELECT t2.id FROM t2 WHERE t1.id = t2.id +/// ) +/// +/// and optimizes it into: +/// +/// SELECT t1.id +/// FROM t1 LEFT SEMI +/// JOIN t2 +/// ON t1.id = t2.id +/// +/// # Arguments +/// +/// * query_info - The subquery and negated(exists/not exists) info. +/// * outer_input - The non-subquery portion (relation t1) +fn optimize_exists( + query_info: &SubqueryInfo, + outer_input: &LogicalPlan, +) -> Result> { + let subquery = query_info.query.subquery.as_ref(); + if let Some((join_filter, optimized_subquery)) = optimize_subquery(subquery)? { + // join our sub query into the main plan + let join_type = match query_info.negated { + true => JoinType::LeftAnti, + false => JoinType::LeftSemi, + }; + + let new_plan = LogicalPlanBuilder::from(outer_input.clone()) + .join( + optimized_subquery, + join_type, + (Vec::::new(), Vec::::new()), + Some(join_filter), + )? + .build()?; + + Ok(Some(new_plan)) + } else { + Ok(None) + } +} +/// Optimize the subquery and extract the possible join filter. +/// This function can't optimize non-correlated subquery, and will return None. +fn optimize_subquery(subquery: &LogicalPlan) -> Result> { + match subquery { + LogicalPlan::Distinct(subqry_distinct) => { + let distinct_input = &subqry_distinct.input; + let optimized_plan = optimize_subquery(distinct_input)?.map(|(filters, right)| { + ( + filters, + LogicalPlan::Distinct(Distinct { + input: Arc::new(right), + }), + ) + }); + Ok(optimized_plan) + } + LogicalPlan::Projection(projection) => { + // extract join filters + let (join_filters, subquery_input) = extract_join_filters(&projection.input)?; + // cannot optimize non-correlated subquery + if join_filters.is_empty() { + return Ok(None); + } + let input_schema = subquery_input.schema(); + let project_exprs: Vec = + collect_subquery_cols(&join_filters, input_schema.clone())? + .into_iter() + .map(Expr::Column) + .collect(); + let right = LogicalPlanBuilder::from(subquery_input) + .project(project_exprs)? + .build()?; + + // join_filters is not empty. + let join_filter = conjunction(join_filters).ok_or_else(|| { + DataFusionError::Internal("join filters should not be empty".to_string()) + })?; + Ok(Some((join_filter, right))) + } + _ => Ok(None), + } +} + +struct SubqueryInfo { + query: Subquery, + negated: bool, +} + +impl SubqueryInfo { + pub fn new(query: Subquery, negated: bool) -> Self { + Self { query, negated } + } +} diff --git a/src/sql/optimizer/decorrelate_where_in.rs b/src/sql/optimizer/decorrelate_where_in.rs new file mode 100644 index 000000000..014f22092 --- /dev/null +++ b/src/sql/optimizer/decorrelate_where_in.rs @@ -0,0 +1,258 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion_python::{ + datafusion_common::{alias::AliasGenerator, context, Column, Result}, + datafusion_expr::{ + expr::InSubquery, + expr_rewriter::unnormalize_col, + logical_plan::{JoinType, Projection, Subquery}, + Expr, + Filter, + LogicalPlan, + LogicalPlanBuilder, + }, + datafusion_optimizer::optimizer::{ApplyOrder, OptimizerConfig, OptimizerRule}, +}; +use log::debug; + +use crate::sql::optimizer::utils::{ + collect_subquery_cols, + conjunction, + extract_join_filters, + only_or_err, + replace_qualified_name, + split_conjunction, +}; + +#[derive(Default)] +pub struct DecorrelateWhereIn { + alias: AliasGenerator, +} + +impl DecorrelateWhereIn { + #[allow(missing_docs)] + pub fn new() -> Self { + Self::default() + } + + /// Finds expressions that have a where in subquery (and recurses when found) + /// + /// # Arguments + /// + /// * `predicate` - A conjunction to split and search + /// * `optimizer_config` - For generating unique subquery aliases + /// + /// Returns a tuple (subqueries, non-subquery expressions) + fn extract_subquery_exprs( + &self, + predicate: &Expr, + config: &dyn OptimizerConfig, + ) -> Result<(Vec, Vec)> { + let filters = split_conjunction(predicate); // TODO: disjunctions + + let mut subqueries = vec![]; + let mut others = vec![]; + for it in filters.iter() { + match it { + Expr::InSubquery(InSubquery { + expr, + subquery, + negated, + }) => { + let subquery_plan = self + .try_optimize(&subquery.subquery, config)? + .map(Arc::new) + .unwrap_or_else(|| subquery.subquery.clone()); + let new_subquery = subquery.with_plan(subquery_plan); + subqueries.push(SubqueryInfo::new(new_subquery, (**expr).clone(), *negated)); + // TODO: if subquery doesn't get optimized, optimized children are lost + } + _ => others.push((*it).clone()), + } + } + + Ok((subqueries, others)) + } +} + +impl OptimizerRule for DecorrelateWhereIn { + fn try_optimize( + &self, + plan: &LogicalPlan, + config: &dyn OptimizerConfig, + ) -> Result> { + match plan { + LogicalPlan::Filter(filter) => { + let (subqueries, other_exprs) = + self.extract_subquery_exprs(&filter.predicate, config)?; + if subqueries.is_empty() { + // regular filter, no subquery exists clause here + return Ok(None); + } + + // iterate through all exists clauses in predicate, turning each into a join + let mut cur_input = filter.input.as_ref().clone(); + for subquery in subqueries { + cur_input = optimize_where_in(&subquery, &cur_input, &self.alias)?; + } + + let expr = conjunction(other_exprs); + if let Some(expr) = expr { + let new_filter = Filter::try_new(expr, Arc::new(cur_input))?; + cur_input = LogicalPlan::Filter(new_filter); + } + + Ok(Some(cur_input)) + } + _ => Ok(None), + } + } + + fn name(&self) -> &str { + "decorrelate_where_in" + } + + fn apply_order(&self) -> Option { + Some(ApplyOrder::TopDown) + } +} + +/// Optimize the where in subquery to left-anti/left-semi join. +/// If the subquery is a correlated subquery, we need extract the join predicate from the subquery. +/// +/// For example, given a query like: +/// `select t1.a, t1.b from t1 where t1 in (select t2.a from t2 where t1.b = t2.b and t1.c > t2.c)` +/// +/// The optimized plan will be: +/// +/// ```text +/// Projection: t1.a, t1.b +/// LeftSemi Join: Filter: t1.a = __correlated_sq_1.a AND t1.b = __correlated_sq_1.b AND t1.c > __correlated_sq_1.c +/// TableScan: t1 +/// SubqueryAlias: __correlated_sq_1 +/// Projection: t2.a AS a, t2.b, t2.c +/// TableScan: t2 +/// ``` +fn optimize_where_in( + query_info: &SubqueryInfo, + left: &LogicalPlan, + alias: &AliasGenerator, +) -> Result { + let projection = Projection::try_from_plan(&query_info.query.subquery) + .map_err(|e| context!("a projection is required", e))?; + let subquery_input = projection.input.clone(); + // TODO add the validate logic to Analyzer + let subquery_expr = only_or_err(projection.expr.as_slice()) + .map_err(|e| context!("single expression projection required", e))?; + + // extract join filters + let (join_filters, subquery_input) = extract_join_filters(subquery_input.as_ref())?; + + // in_predicate may be also include in the join filters, remove it from the join filters. + let in_predicate = Expr::eq(query_info.where_in_expr.clone(), subquery_expr.clone()); + let join_filters = remove_duplicated_filter(join_filters, in_predicate); + + // replace qualified name with subquery alias. + let subquery_alias = alias.next("__correlated_sq"); + let input_schema = subquery_input.schema(); + let mut subquery_cols = collect_subquery_cols(&join_filters, input_schema.clone())?; + let join_filter = conjunction(join_filters).map_or(Ok(None), |filter| { + replace_qualified_name(filter, &subquery_cols, &subquery_alias).map(Option::Some) + })?; + + // add projection + if let Expr::Column(col) = subquery_expr { + subquery_cols.remove(col); + } + let subquery_expr_name = format!("{:?}", unnormalize_col(subquery_expr.clone())); + let first_expr = subquery_expr.clone().alias(subquery_expr_name.clone()); + let projection_exprs: Vec = [first_expr] + .into_iter() + .chain(subquery_cols.into_iter().map(Expr::Column)) + .collect(); + + let right = LogicalPlanBuilder::from(subquery_input) + .project(projection_exprs)? + .alias(subquery_alias.clone())? + .build()?; + + // join our sub query into the main plan + let join_type = match query_info.negated { + true => JoinType::LeftAnti, + false => JoinType::LeftSemi, + }; + let right_join_col = Column::new(Some(subquery_alias), subquery_expr_name); + let in_predicate = Expr::eq( + query_info.where_in_expr.clone(), + Expr::Column(right_join_col), + ); + let join_filter = join_filter + .map(|filter| in_predicate.clone().and(filter)) + .unwrap_or_else(|| in_predicate); + + let new_plan = LogicalPlanBuilder::from(left.clone()) + .join( + right, + join_type, + (Vec::::new(), Vec::::new()), + Some(join_filter), + )? + .build()?; + + debug!("where in optimized:\n{}", new_plan.display_indent()); + Ok(new_plan) +} + +fn remove_duplicated_filter(filters: Vec, in_predicate: Expr) -> Vec { + filters + .into_iter() + .filter(|filter| { + if filter == &in_predicate { + return false; + } + + // ignore the binary order + !match (filter, &in_predicate) { + (Expr::BinaryExpr(a_expr), Expr::BinaryExpr(b_expr)) => { + (a_expr.op == b_expr.op) + && (a_expr.left == b_expr.left && a_expr.right == b_expr.right) + || (a_expr.left == b_expr.right && a_expr.right == b_expr.left) + } + _ => false, + } + }) + .collect::>() +} + +struct SubqueryInfo { + query: Subquery, + where_in_expr: Expr, + negated: bool, +} + +impl SubqueryInfo { + pub fn new(query: Subquery, expr: Expr, negated: bool) -> Self { + Self { + query, + where_in_expr: expr, + negated, + } + } +} diff --git a/dask_planner/src/sql/optimizer/dynamic_partition_pruning.rs b/src/sql/optimizer/dynamic_partition_pruning.rs similarity index 98% rename from dask_planner/src/sql/optimizer/dynamic_partition_pruning.rs rename to src/sql/optimizer/dynamic_partition_pruning.rs index 0ff48a682..d7e1a8be5 100644 --- a/dask_planner/src/sql/optimizer/dynamic_partition_pruning.rs +++ b/src/sql/optimizer/dynamic_partition_pruning.rs @@ -22,6 +22,7 @@ use datafusion_python::{ }, datafusion_common::{Column, Result, ScalarValue}, datafusion_expr::{ + expr::InList, logical_plan::LogicalPlan, utils::from_plan, Expr, @@ -433,13 +434,13 @@ fn gather_aliases(plan: &LogicalPlan) -> HashMap { if let LogicalPlan::SubqueryAlias(ref s) = current_plan { match *s.input { LogicalPlan::TableScan(ref t) => { - aliases.insert(s.alias.clone(), t.table_name.to_string().clone()); + aliases.insert(s.alias.to_string(), t.table_name.to_string().clone()); } // Sometimes a TableScan is immediately followed by a Projection, so we can // still use the alias for the table LogicalPlan::Projection(ref p) => { if let LogicalPlan::TableScan(ref t) = *p.input { - aliases.insert(s.alias.clone(), t.table_name.to_string().clone()); + aliases.insert(s.alias.to_string(), t.table_name.to_string().clone()); } } _ => (), @@ -536,7 +537,7 @@ fn read_table( .project(projection.clone()) .ok(); if let Some(row_iter) = row_iter_result { - rows.extend(row_iter); + rows.extend(row_iter.map(|r| r.expect("Parquet error encountered"))); } else { // TODO: Investigate cases when this would happen rows.clear(); @@ -781,6 +782,9 @@ fn satisfies_int64(long_value: Option, filter: Expr) -> bool { Expr::Literal(ScalarValue::Int32(i)) => i64::from(i.unwrap()), Expr::Literal(ScalarValue::Float64(i)) => i.unwrap() as i64, Expr::Literal(ScalarValue::TimestampNanosecond(i, None)) => i.unwrap(), + Expr::Literal(ScalarValue::Date32(i)) => i64::from(i.unwrap()), + // TODO: Add logic to check if the string can be converted to a timestamp + Expr::Literal(ScalarValue::Utf8(_)) => return false, _ => { panic!("Unknown ScalarValue type {filter_value}"); } @@ -1053,11 +1057,11 @@ fn format_inlist_expr( if list.is_empty() { None } else { - Some(Expr::InList { + Some(Expr::InList(InList { expr, list, negated: false, - }) + })) } } diff --git a/dask_planner/src/sql/optimizer/join_reorder.rs b/src/sql/optimizer/join_reorder.rs similarity index 100% rename from dask_planner/src/sql/optimizer/join_reorder.rs rename to src/sql/optimizer/join_reorder.rs diff --git a/src/sql/optimizer/utils.rs b/src/sql/optimizer/utils.rs new file mode 100644 index 000000000..f72bbe5c3 --- /dev/null +++ b/src/sql/optimizer/utils.rs @@ -0,0 +1,516 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Collection of utility functions that are leveraged by the query optimizer rules + +use std::{ + collections::{BTreeSet, HashMap}, + sync::Arc, +}; + +use datafusion_python::{ + datafusion_common::{Column, DFSchema, DFSchemaRef, Result}, + datafusion_expr::{ + and, + expr::{Alias, BinaryExpr}, + expr_rewriter::{replace_col, strip_outer_reference}, + logical_plan::{Filter, LogicalPlan}, + Expr, + LogicalPlanBuilder, + Operator, + }, + datafusion_optimizer::optimizer::{OptimizerConfig, OptimizerRule}, +}; +use log::{debug, trace}; + +#[allow(dead_code)] +/// Convenience rule for writing optimizers: recursively invoke +/// optimize on plan's children and then return a node of the same +/// type. Useful for optimizer rules which want to leave the type +/// of plan unchanged but still apply to the children. +/// This also handles the case when the `plan` is a [`LogicalPlan::Explain`]. +/// +/// Returning `Ok(None)` indicates that the plan can't be optimized by the `optimizer`. +pub fn optimize_children( + optimizer: &impl OptimizerRule, + plan: &LogicalPlan, + config: &dyn OptimizerConfig, +) -> Result> { + let mut new_inputs = Vec::with_capacity(plan.inputs().len()); + let mut plan_is_changed = false; + for input in plan.inputs() { + let new_input = optimizer.try_optimize(input, config)?; + plan_is_changed = plan_is_changed || new_input.is_some(); + new_inputs.push(new_input.unwrap_or_else(|| input.clone())) + } + if plan_is_changed { + Ok(Some(plan.with_new_inputs(&new_inputs)?)) + } else { + Ok(None) + } +} + +/// Splits a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]` +/// +/// See [`split_conjunction_owned`] for more details and an example. +pub fn split_conjunction(expr: &Expr) -> Vec<&Expr> { + split_conjunction_impl(expr, vec![]) +} + +fn split_conjunction_impl<'a>(expr: &'a Expr, mut exprs: Vec<&'a Expr>) -> Vec<&'a Expr> { + match expr { + Expr::BinaryExpr(BinaryExpr { + right, + op: Operator::And, + left, + }) => { + let exprs = split_conjunction_impl(left, exprs); + split_conjunction_impl(right, exprs) + } + Expr::Alias(Alias { expr, .. }) => split_conjunction_impl(expr, exprs), + other => { + exprs.push(other); + exprs + } + } +} + +/// Extract join predicates from the correclated subquery. +/// The join predicate means that the expression references columns +/// from both the subquery and outer table or only from the outer table. +/// +/// Returns join predicates and subquery(extracted). +pub(crate) fn extract_join_filters(maybe_filter: &LogicalPlan) -> Result<(Vec, LogicalPlan)> { + if let LogicalPlan::Filter(plan_filter) = maybe_filter { + let subquery_filter_exprs = split_conjunction(&plan_filter.predicate); + let (join_filters, subquery_filters) = find_join_exprs(subquery_filter_exprs)?; + // if the subquery still has filter expressions, restore them. + let mut plan = LogicalPlanBuilder::from((*plan_filter.input).clone()); + if let Some(expr) = conjunction(subquery_filters) { + plan = plan.filter(expr)? + } + + Ok((join_filters, plan.build()?)) + } else { + Ok((vec![], maybe_filter.clone())) + } +} + +#[allow(dead_code)] +/// Splits an owned conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]` +/// +/// This is often used to "split" filter expressions such as `col1 = 5 +/// AND col2 = 10` into [`col1 = 5`, `col2 = 10`]; +/// +/// # Example +/// ``` +/// # use datafusion_python::datafusion_expr::{col, lit}; +/// # use datafusion_python::datafusion_optimizer::utils::split_conjunction_owned; +/// // a=1 AND b=2 +/// let expr = col("a").eq(lit(1)).and(col("b").eq(lit(2))); +/// +/// // [a=1, b=2] +/// let split = vec![ +/// col("a").eq(lit(1)), +/// col("b").eq(lit(2)), +/// ]; +/// +/// // use split_conjunction_owned to split them +/// assert_eq!(split_conjunction_owned(expr), split); +/// ``` +pub fn split_conjunction_owned(expr: Expr) -> Vec { + split_binary_owned(expr, Operator::And) +} + +#[allow(dead_code)] +/// Splits an owned binary operator tree [`Expr`] such as `A B C` => `[A, B, C]` +/// +/// This is often used to "split" expressions such as `col1 = 5 +/// AND col2 = 10` into [`col1 = 5`, `col2 = 10`]; +/// +/// # Example +/// ``` +/// # use datafusion_python::datafusion_expr::{col, lit, Operator}; +/// # use datafusion_python::datafusion_optimizer::utils::split_binary_owned; +/// # use std::ops::Add; +/// // a=1 + b=2 +/// let expr = col("a").eq(lit(1)).add(col("b").eq(lit(2))); +/// +/// // [a=1, b=2] +/// let split = vec![ +/// col("a").eq(lit(1)), +/// col("b").eq(lit(2)), +/// ]; +/// +/// // use split_binary_owned to split them +/// assert_eq!(split_binary_owned(expr, Operator::Plus), split); +/// ``` +pub fn split_binary_owned(expr: Expr, op: Operator) -> Vec { + split_binary_owned_impl(expr, op, vec![]) +} + +#[allow(dead_code)] +fn split_binary_owned_impl(expr: Expr, operator: Operator, mut exprs: Vec) -> Vec { + match expr { + Expr::BinaryExpr(BinaryExpr { right, op, left }) if op == operator => { + let exprs = split_binary_owned_impl(*left, operator, exprs); + split_binary_owned_impl(*right, operator, exprs) + } + Expr::Alias(Alias { expr, .. }) => split_binary_owned_impl(*expr, operator, exprs), + other => { + exprs.push(other); + exprs + } + } +} + +#[allow(dead_code)] +/// Splits an binary operator tree [`Expr`] such as `A B C` => `[A, B, C]` +/// +/// See [`split_binary_owned`] for more details and an example. +pub fn split_binary(expr: &Expr, op: Operator) -> Vec<&Expr> { + split_binary_impl(expr, op, vec![]) +} + +#[allow(dead_code)] +fn split_binary_impl<'a>( + expr: &'a Expr, + operator: Operator, + mut exprs: Vec<&'a Expr>, +) -> Vec<&'a Expr> { + match expr { + Expr::BinaryExpr(BinaryExpr { right, op, left }) if *op == operator => { + let exprs = split_binary_impl(left, operator, exprs); + split_binary_impl(right, operator, exprs) + } + Expr::Alias(Alias { expr, .. }) => split_binary_impl(expr, operator, exprs), + other => { + exprs.push(other); + exprs + } + } +} + +/// Combines an array of filter expressions into a single filter +/// expression consisting of the input filter expressions joined with +/// logical AND. +/// +/// Returns None if the filters array is empty. +/// +/// # Example +/// ``` +/// # use datafusion_python::datafusion_expr::{col, lit}; +/// # use datafusion_python::datafusion_optimizer::utils::conjunction; +/// // a=1 AND b=2 +/// let expr = col("a").eq(lit(1)).and(col("b").eq(lit(2))); +/// +/// // [a=1, b=2] +/// let split = vec![ +/// col("a").eq(lit(1)), +/// col("b").eq(lit(2)), +/// ]; +/// +/// // use conjunction to join them together with `AND` +/// assert_eq!(conjunction(split), Some(expr)); +/// ``` +pub fn conjunction(filters: impl IntoIterator) -> Option { + filters.into_iter().reduce(|accum, expr| accum.and(expr)) +} + +#[allow(dead_code)] +/// Combines an array of filter expressions into a single filter +/// expression consisting of the input filter expressions joined with +/// logical OR. +/// +/// Returns None if the filters array is empty. +pub fn disjunction(filters: impl IntoIterator) -> Option { + filters.into_iter().reduce(|accum, expr| accum.or(expr)) +} + +/// returns a new [LogicalPlan] that wraps `plan` in a [LogicalPlan::Filter] with +/// its predicate be all `predicates` ANDed. +#[allow(dead_code)] +pub fn add_filter(plan: LogicalPlan, predicates: &[&Expr]) -> Result { + // reduce filters to a single filter with an AND + let predicate = predicates + .iter() + .skip(1) + .fold(predicates[0].clone(), |acc, predicate| { + and(acc, (*predicate).to_owned()) + }); + + Ok(LogicalPlan::Filter(Filter::try_new( + predicate, + Arc::new(plan), + )?)) +} + +/// Looks for correlating expressions: for example, a binary expression with one field from the subquery, and +/// one not in the subquery (closed upon from outer scope) +/// +/// # Arguments +/// +/// * `exprs` - List of expressions that may or may not be joins +/// +/// # Return value +/// +/// Tuple of (expressions containing joins, remaining non-join expressions) +pub fn find_join_exprs(exprs: Vec<&Expr>) -> Result<(Vec, Vec)> { + let mut joins = vec![]; + let mut others = vec![]; + for filter in exprs.into_iter() { + // If the expression contains correlated predicates, add it to join filters + if filter.contains_outer() { + if !matches!(filter, Expr::BinaryExpr(BinaryExpr{ left, op: Operator::Eq, right }) if left.eq(right)) + { + joins.push(strip_outer_reference((*filter).clone())); + } + } else { + others.push((*filter).clone()); + } + } + + Ok((joins, others)) +} + +/// Returns the first (and only) element in a slice, or an error +/// +/// # Arguments +/// +/// * `slice` - The slice to extract from +/// +/// # Return value +/// +/// The first element, or an error +pub fn only_or_err(slice: &[T]) -> Result<&T> { + match slice { + [it] => Ok(it), + [] => Err(datafusion_python::datafusion_common::DataFusionError::Plan( + "No items found!".to_owned(), + )), + _ => Err(datafusion_python::datafusion_common::DataFusionError::Plan( + "More than one item found!".to_owned(), + )), + } +} + +/// merge inputs schema into a single schema. +#[allow(dead_code)] +pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema { + if inputs.len() == 1 { + inputs[0].schema().clone().as_ref().clone() + } else { + inputs + .iter() + .map(|input| input.schema()) + .fold(DFSchema::empty(), |mut lhs, rhs| { + lhs.merge(rhs); + lhs + }) + } +} + +pub(crate) fn collect_subquery_cols( + exprs: &[Expr], + subquery_schema: DFSchemaRef, +) -> Result> { + exprs.iter().try_fold(BTreeSet::new(), |mut cols, expr| { + let mut using_cols: Vec = vec![]; + for col in expr.to_columns()?.into_iter() { + if subquery_schema.has_column(&col) { + using_cols.push(col); + } + } + + cols.extend(using_cols); + Result::<_>::Ok(cols) + }) +} + +pub(crate) fn replace_qualified_name( + expr: Expr, + cols: &BTreeSet, + subquery_alias: &str, +) -> Result { + let alias_cols: Vec = cols + .iter() + .map(|col| Column::from_qualified_name(format!("{}.{}", subquery_alias, col.name))) + .collect(); + let replace_map: HashMap<&Column, &Column> = cols.iter().zip(alias_cols.iter()).collect(); + + replace_col(expr, &replace_map) +} + +#[allow(dead_code)] +/// Log the plan in debug/tracing mode after some part of the optimizer runs +pub fn log_plan(description: &str, plan: &LogicalPlan) { + debug!("{description}:\n{}\n", plan.display_indent()); + trace!("{description}::\n{}\n", plan.display_indent_schema()); +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use datafusion_python::{ + datafusion::arrow::datatypes::DataType, + datafusion_common::Column, + datafusion_expr::{col, expr::Cast, lit, utils::expr_to_columns}, + }; + + use super::*; + + #[test] + fn test_split_conjunction() { + let expr = col("a"); + let result = split_conjunction(&expr); + assert_eq!(result, vec![&expr]); + } + + #[test] + fn test_split_conjunction_two() { + let expr = col("a").eq(lit(5)).and(col("b")); + let expr1 = col("a").eq(lit(5)); + let expr2 = col("b"); + + let result = split_conjunction(&expr); + assert_eq!(result, vec![&expr1, &expr2]); + } + + #[test] + fn test_split_conjunction_alias() { + let expr = col("a").eq(lit(5)).and(col("b").alias("the_alias")); + let expr1 = col("a").eq(lit(5)); + let expr2 = col("b"); // has no alias + + let result = split_conjunction(&expr); + assert_eq!(result, vec![&expr1, &expr2]); + } + + #[test] + fn test_split_conjunction_or() { + let expr = col("a").eq(lit(5)).or(col("b")); + let result = split_conjunction(&expr); + assert_eq!(result, vec![&expr]); + } + + #[test] + fn test_split_binary_owned() { + let expr = col("a"); + assert_eq!(split_binary_owned(expr.clone(), Operator::And), vec![expr]); + } + + #[test] + fn test_split_binary_owned_two() { + assert_eq!( + split_binary_owned(col("a").eq(lit(5)).and(col("b")), Operator::And), + vec![col("a").eq(lit(5)), col("b")] + ); + } + + #[test] + fn test_split_binary_owned_different_op() { + let expr = col("a").eq(lit(5)).or(col("b")); + assert_eq!( + // expr is connected by OR, but pass in AND + split_binary_owned(expr.clone(), Operator::And), + vec![expr] + ); + } + + #[test] + fn test_split_conjunction_owned() { + let expr = col("a"); + assert_eq!(split_conjunction_owned(expr.clone()), vec![expr]); + } + + #[test] + fn test_split_conjunction_owned_two() { + assert_eq!( + split_conjunction_owned(col("a").eq(lit(5)).and(col("b"))), + vec![col("a").eq(lit(5)), col("b")] + ); + } + + #[test] + fn test_split_conjunction_owned_alias() { + assert_eq!( + split_conjunction_owned(col("a").eq(lit(5)).and(col("b").alias("the_alias"))), + vec![ + col("a").eq(lit(5)), + // no alias on b + col("b"), + ] + ); + } + + #[test] + fn test_conjunction_empty() { + assert_eq!(conjunction(vec![]), None); + } + + #[test] + fn test_conjunction() { + // `[A, B, C]` + let expr = conjunction(vec![col("a"), col("b"), col("c")]); + + // --> `(A AND B) AND C` + assert_eq!(expr, Some(col("a").and(col("b")).and(col("c")))); + + // which is different than `A AND (B AND C)` + assert_ne!(expr, Some(col("a").and(col("b").and(col("c"))))); + } + + #[test] + fn test_disjunction_empty() { + assert_eq!(disjunction(vec![]), None); + } + + #[test] + fn test_disjunction() { + // `[A, B, C]` + let expr = disjunction(vec![col("a"), col("b"), col("c")]); + + // --> `(A OR B) OR C` + assert_eq!(expr, Some(col("a").or(col("b")).or(col("c")))); + + // which is different than `A OR (B OR C)` + assert_ne!(expr, Some(col("a").or(col("b").or(col("c"))))); + } + + #[test] + fn test_split_conjunction_owned_or() { + let expr = col("a").eq(lit(5)).or(col("b")); + assert_eq!(split_conjunction_owned(expr.clone()), vec![expr]); + } + + #[test] + fn test_collect_expr() -> Result<()> { + let mut accum: HashSet = HashSet::new(); + expr_to_columns( + &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64)), + &mut accum, + )?; + expr_to_columns( + &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64)), + &mut accum, + )?; + assert_eq!(1, accum.len()); + assert!(accum.contains(&Column::from_name("a"))); + Ok(()) + } +} diff --git a/dask_planner/src/sql/parser_utils.rs b/src/sql/parser_utils.rs similarity index 100% rename from dask_planner/src/sql/parser_utils.rs rename to src/sql/parser_utils.rs diff --git a/dask_planner/src/sql/schema.rs b/src/sql/schema.rs similarity index 95% rename from dask_planner/src/sql/schema.rs rename to src/sql/schema.rs index 0975391f4..804db700f 100644 --- a/dask_planner/src/sql/schema.rs +++ b/src/sql/schema.rs @@ -6,7 +6,7 @@ use pyo3::prelude::*; use super::types::PyDataType; use crate::sql::{function::DaskFunction, table}; -#[pyclass(name = "DaskSchema", module = "dask_planner", subclass)] +#[pyclass(name = "DaskSchema", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct DaskSchema { #[pyo3(get, set)] diff --git a/dask_planner/src/sql/statement.rs b/src/sql/statement.rs similarity index 88% rename from dask_planner/src/sql/statement.rs rename to src/sql/statement.rs index f8fabc109..40fc9f268 100644 --- a/dask_planner/src/sql/statement.rs +++ b/src/sql/statement.rs @@ -2,7 +2,7 @@ use pyo3::prelude::*; use crate::parser::DaskStatement; -#[pyclass(name = "Statement", module = "dask_planner", subclass)] +#[pyclass(name = "Statement", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct PyStatement { pub statement: DaskStatement, diff --git a/dask_planner/src/sql/table.rs b/src/sql/table.rs similarity index 97% rename from dask_planner/src/sql/table.rs rename to src/sql/table.rs index abe71733a..1c2585bef 100644 --- a/dask_planner/src/sql/table.rs +++ b/src/sql/table.rs @@ -2,7 +2,7 @@ use std::{any::Any, sync::Arc}; use async_trait::async_trait; use datafusion_python::{ - datafusion::arrow::datatypes::{DataType, Field, SchemaRef}, + datafusion::arrow::datatypes::{DataType, Fields, SchemaRef}, datafusion_common::DFField, datafusion_expr::{Expr, LogicalPlan, TableProviderFilterPushDown, TableSource}, datafusion_optimizer::utils::split_conjunction, @@ -90,7 +90,7 @@ fn is_supported_push_down_expr(_expr: &Expr) -> bool { true } -#[pyclass(name = "DaskStatistics", module = "dask_planner", subclass)] +#[pyclass(name = "DaskStatistics", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct DaskStatistics { row_count: f64, @@ -109,7 +109,7 @@ impl DaskStatistics { } } -#[pyclass(name = "DaskTable", module = "dask_planner", subclass)] +#[pyclass(name = "DaskTable", module = "dask_sql", subclass)] #[derive(Debug, Clone)] pub struct DaskTable { pub(crate) schema_name: Option, @@ -194,7 +194,7 @@ pub(crate) fn table_from_logical_plan( // Get the TableProvider for this Table instance let tbl_provider: Arc = table_scan.source.clone(); let tbl_schema: SchemaRef = tbl_provider.schema(); - let fields: &Vec = tbl_schema.fields(); + let fields: &Fields = tbl_schema.fields(); let mut cols: Vec<(String, DaskTypeMap)> = Vec::new(); for field in fields { diff --git a/dask_planner/src/sql/types.rs b/src/sql/types.rs similarity index 95% rename from dask_planner/src/sql/types.rs rename to src/sql/types.rs index ceff904a6..34af22342 100644 --- a/dask_planner/src/sql/types.rs +++ b/src/sql/types.rs @@ -1,6 +1,8 @@ pub mod rel_data_type; pub mod rel_data_type_field; +use std::sync::Arc; + use datafusion_python::{ datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit}, datafusion_sql::sqlparser::{ast::DataType as SQLType, parser::Parser, tokenizer::Tokenizer}, @@ -10,7 +12,7 @@ use pyo3::{prelude::*, types::PyDict}; use crate::{dialect::DaskDialect, error::DaskPlannerError, sql::exceptions::py_type_err}; #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "RexType", module = "datafusion")] +#[pyclass(name = "RexType", module = "dask_sql")] pub enum RexType { Alias, Literal, @@ -21,7 +23,7 @@ pub enum RexType { } #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "DaskTypeMap", module = "datafusion", subclass)] +#[pyclass(name = "DaskTypeMap", module = "dask_sql", subclass)] /// Represents a Python Data Type. This is needed instead of simple /// Enum instances because PyO3 can only support unit variants as /// of version 0.16 which means Enums like `DataType::TIMESTAMP_WITH_LOCAL_TIME_ZONE` @@ -54,10 +56,12 @@ impl DaskTypeMap { SqlTypeName::TIMESTAMP_WITH_LOCAL_TIME_ZONE => { let (unit, tz) = match py_kwargs { Some(dict) => { - let tz: Option = match dict.get_item("tz") { + let tz: Option> = match dict.get_item("tz") { Some(e) => { let res: PyResult = e.extract(); - Some(res.unwrap()) + Some(Arc::from(>::as_ref( + &res.unwrap(), + ))) } None => None, }; @@ -85,10 +89,12 @@ impl DaskTypeMap { SqlTypeName::TIMESTAMP => { let (unit, tz) = match py_kwargs { Some(dict) => { - let tz: Option = match dict.get_item("tz") { + let tz: Option> = match dict.get_item("tz") { Some(e) => { let res: PyResult = e.extract(); - Some(res.unwrap()) + Some(Arc::from(>::as_ref( + &res.unwrap(), + ))) } None => None, }; @@ -161,7 +167,7 @@ impl DaskTypeMap { } #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "PyDataType", module = "datafusion", subclass)] +#[pyclass(name = "PyDataType", module = "dask_sql", subclass)] pub struct PyDataType { data_type: DataType, } @@ -204,7 +210,7 @@ impl From for PyDataType { #[allow(non_camel_case_types)] #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "SqlTypeName", module = "datafusion")] +#[pyclass(name = "SqlTypeName", module = "dask_sql")] pub enum SqlTypeName { ANY, ARRAY, diff --git a/dask_planner/src/sql/types/rel_data_type.rs b/src/sql/types/rel_data_type.rs similarity index 98% rename from dask_planner/src/sql/types/rel_data_type.rs rename to src/sql/types/rel_data_type.rs index 1ae3646b0..59cb0fb7c 100644 --- a/dask_planner/src/sql/types/rel_data_type.rs +++ b/src/sql/types/rel_data_type.rs @@ -8,7 +8,7 @@ const PRECISION_NOT_SPECIFIED: i32 = i32::MIN; const SCALE_NOT_SPECIFIED: i32 = -1; /// RelDataType represents the type of a scalar expression or entire row returned from a relational expression. -#[pyclass(name = "RelDataType", module = "dask_planner", subclass)] +#[pyclass(name = "RelDataType", module = "dask_sql", subclass)] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct RelDataType { nullable: bool, diff --git a/dask_planner/src/sql/types/rel_data_type_field.rs b/src/sql/types/rel_data_type_field.rs similarity index 98% rename from dask_planner/src/sql/types/rel_data_type_field.rs rename to src/sql/types/rel_data_type_field.rs index 13f036d0e..3694d0bce 100644 --- a/dask_planner/src/sql/types/rel_data_type_field.rs +++ b/src/sql/types/rel_data_type_field.rs @@ -12,7 +12,7 @@ use crate::{ }; /// RelDataTypeField represents the definition of a field in a structured RelDataType. -#[pyclass(name = "RelDataTypeField", module = "dask_planner", subclass)] +#[pyclass(name = "RelDataTypeField", module = "dask_sql", subclass)] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct RelDataTypeField { qualifier: Option, diff --git a/tests/integration/test_join.py b/tests/integration/test_join.py index c46cec101..3f19a3211 100644 --- a/tests/integration/test_join.py +++ b/tests/integration/test_join.py @@ -377,7 +377,7 @@ def test_intersect(c): limit 100 """ ) - assert actual_df["COUNT(UInt8(1))"].compute()[0] == 3 + assert actual_df["COUNT(*)"].compute()[0] == 3 # Join df_simple against itself, and then that result against df_wide. Nothing should match so therefore result should be 0 actual_df = c.sql( @@ -392,7 +392,7 @@ def test_intersect(c): limit 100 """ ) - assert len(actual_df["COUNT(UInt8(1))"]) == 0 + assert len(actual_df["COUNT(*)"]) == 0 actual_df = c.sql( """ diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index b49a687d2..e099a3ddb 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -407,8 +407,7 @@ def test_coalesce(c, gpu): COALESCE(NULL, 'hi') as c3, COALESCE(NULL, NULL, 'bye', 5/0) as c4, COALESCE(NULL, 3/2, NULL, 'fly') as c5, - COALESCE(SUM(b), 'why', 2.2) as c6, - COALESCE(NULL, MEAN(b), MEAN(a), 4/0) as c7 + COALESCE(NULL, MEAN(b), MEAN(a), 4/0) as c6 FROM df """ ) @@ -419,9 +418,8 @@ def test_coalesce(c, gpu): "c2": [np.nan], "c3": ["hi"], "c4": ["bye"], - "c5": ["1"], - "c6": ["why"], - "c7": [2.0], + "c5": ["1.5"], + "c6": [2.0], } ) diff --git a/tests/integration/test_select.py b/tests/integration/test_select.py index 9c4331d77..53ebdc224 100644 --- a/tests/integration/test_select.py +++ b/tests/integration/test_select.py @@ -272,3 +272,15 @@ def test_multiple_column_projection(c, parquet_ddf, input_cols): "read-parquet", ).columns ) == sorted(input_cols) + + +def test_wildcard_select(c): + result_df = c.sql("SELECT COUNT(*) FROM df") + + expected_df = pd.DataFrame( + { + "COUNT(*)": [700], + } + ) + + assert_eq(result_df, expected_df) diff --git a/tests/unit/test_mapping.py b/tests/unit/test_mapping.py index b49ed1aae..98f065bf8 100644 --- a/tests/unit/test_mapping.py +++ b/tests/unit/test_mapping.py @@ -4,7 +4,7 @@ import pandas as pd import pytest -from dask_planner.rust import SqlTypeName +from dask_sql._datafusion_lib import SqlTypeName from dask_sql.mappings import python_to_sql_type, similar_type, sql_to_python_value diff --git a/tests/unit/test_queries.py b/tests/unit/test_queries.py index 67120df82..bfaedfcee 100644 --- a/tests/unit/test_queries.py +++ b/tests/unit/test_queries.py @@ -4,9 +4,7 @@ XFAIL_QUERIES = ( 5, - 6, 8, - 9, 10, 14, 16, @@ -21,13 +19,10 @@ 39, 41, 44, - 45, 47, 49, 51, - 54, 57, - 58, 62, 67, 69,