From 6cccc5074571beeb4be35a7a7f5ae60a5f3b9abf Mon Sep 17 00:00:00 2001 From: Jonathan Date: Tue, 2 Sep 2025 18:23:00 -0400 Subject: [PATCH 1/3] fix: Remove duplicate filter from `CrossJoin` unparsing --- Cargo.lock | 2 ++ datafusion/sql/Cargo.toml | 2 ++ datafusion/sql/src/unparser/plan.rs | 7 ---- datafusion/sql/tests/cases/plan_to_sql.rs | 44 +++++++++++++++++++++++ 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2344ddf99b9cc..4fd8e9143218d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2654,6 +2654,7 @@ dependencies = [ "arrow", "bigdecimal", "ctor", + "datafusion", "datafusion-common", "datafusion-expr", "datafusion-functions", @@ -2669,6 +2670,7 @@ dependencies = [ "regex", "rstest", "sqlparser", + "tokio", ] [[package]] diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index eca40c553280b..9fd8ae86cf703 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -61,6 +61,7 @@ sqlparser = { workspace = true } [dev-dependencies] ctor = { workspace = true } # please do not move these dependencies to the main dependencies section +datafusion = { workspace = true } datafusion-functions = { workspace = true, default-features = true } datafusion-functions-aggregate = { workspace = true } datafusion-functions-nested = { workspace = true } @@ -69,3 +70,4 @@ env_logger = { workspace = true } insta = { workspace = true } paste = "^1.0" rstest = { workspace = true } +tokio = { workspace = true } diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index befea3fe28c18..3826ef9feab28 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -696,13 +696,6 @@ impl Unparser<'_> { join_filters.as_ref(), )?; - self.select_to_sql_recursively( - right_plan.as_ref(), - query, - select, - &mut right_relation, - )?; - let right_projection: Option> = if !already_projected { Some(select.pop_projections()) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index a35836420c6e2..4b82195208ebb 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -16,6 +16,8 @@ // under the License. use arrow::datatypes::{DataType, Field, Schema}; +use datafusion::catalog::MemTable; +use datafusion::prelude::SessionContext; use datafusion_common::{ assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result, TableReference, @@ -2659,3 +2661,45 @@ fn test_struct_expr3() { @r#"SELECT test.c1."metadata".product."name" FROM (SELECT {"metadata": {product: {"name": 'Product Name'}}} AS c1) AS test"# ); } + +// Issue #17359: https://github.com/apache/datafusion/issues/17359 +#[tokio::test] +async fn unparse_cross_join() -> Result<()> { + let ctx = SessionContext::new(); + + let j1_schema = Arc::new(Schema::new(vec![ + Field::new("j1_id", DataType::Int32, true), + Field::new("j1_string", DataType::Utf8, true), + ])); + let j2_schema = Arc::new(Schema::new(vec![ + Field::new("j2_id", DataType::Int32, true), + Field::new("j2_string", DataType::Utf8, true), + ])); + + ctx.register_table("j1", Arc::new(MemTable::try_new(j1_schema, vec![vec![]])?))?; + ctx.register_table("j2", Arc::new(MemTable::try_new(j2_schema, vec![vec![]])?))?; + + let df = ctx + .sql( + r#" + select j1.j1_id, j2.j2_string + from j1, j2 + where j2.j2_id = 0 + "#, + ) + .await?; + + let unopt_sql = plan_to_sql(df.logical_plan())?; + assert_snapshot!(unopt_sql, @r#" + SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0) + "#); + + let optimized_plan = df.into_optimized_plan()?; + + let opt_sql = plan_to_sql(&optimized_plan)?; + assert_snapshot!(opt_sql, @r#" + SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0) + "#); + + Ok(()) +} From 73a07c79f90b36492fd623b5130967f95fdae552 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Tue, 2 Sep 2025 19:21:06 -0400 Subject: [PATCH 2/3] move test --- Cargo.lock | 2 - datafusion/core/tests/sql/joins.rs | 43 ++++++++++++++++++++++ datafusion/sql/Cargo.toml | 2 - datafusion/sql/tests/cases/plan_to_sql.rs | 45 +---------------------- 4 files changed, 44 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1c1576359738e..dc54296fbae9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2654,7 +2654,6 @@ dependencies = [ "arrow", "bigdecimal", "ctor", - "datafusion", "datafusion-common", "datafusion-expr", "datafusion-functions", @@ -2670,7 +2669,6 @@ dependencies = [ "regex", "rstest", "sqlparser", - "tokio", ] [[package]] diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs index fbe7e3a00f542..c5ff8002990d5 100644 --- a/datafusion/core/tests/sql/joins.rs +++ b/datafusion/core/tests/sql/joins.rs @@ -20,6 +20,7 @@ use insta::assert_snapshot; use datafusion::assert_batches_eq; use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable}; use datafusion::test_util::register_unbounded_file_with_ordering; +use datafusion_sql::unparser::plan_to_sql; use super::*; @@ -264,3 +265,45 @@ async fn join_using_uppercase_column() -> Result<()> { Ok(()) } + +// Issue #17359: https://github.com/apache/datafusion/issues/17359 +#[tokio::test] +async fn unparse_cross_join() -> Result<()> { + let ctx = SessionContext::new(); + + let j1_schema = Arc::new(Schema::new(vec![ + Field::new("j1_id", DataType::Int32, true), + Field::new("j1_string", DataType::Utf8, true), + ])); + let j2_schema = Arc::new(Schema::new(vec![ + Field::new("j2_id", DataType::Int32, true), + Field::new("j2_string", DataType::Utf8, true), + ])); + + ctx.register_table("j1", Arc::new(MemTable::try_new(j1_schema, vec![vec![]])?))?; + ctx.register_table("j2", Arc::new(MemTable::try_new(j2_schema, vec![vec![]])?))?; + + let df = ctx + .sql( + r#" + select j1.j1_id, j2.j2_string + from j1, j2 + where j2.j2_id = 0 + "#, + ) + .await?; + + let unopt_sql = plan_to_sql(df.logical_plan())?; + assert_snapshot!(unopt_sql, @r#" + SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0) + "#); + + let optimized_plan = df.into_optimized_plan()?; + + let opt_sql = plan_to_sql(&optimized_plan)?; + assert_snapshot!(opt_sql, @r#" + SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0) + "#); + + Ok(()) +} diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 9fd8ae86cf703..eca40c553280b 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -61,7 +61,6 @@ sqlparser = { workspace = true } [dev-dependencies] ctor = { workspace = true } # please do not move these dependencies to the main dependencies section -datafusion = { workspace = true } datafusion-functions = { workspace = true, default-features = true } datafusion-functions-aggregate = { workspace = true } datafusion-functions-nested = { workspace = true } @@ -70,4 +69,3 @@ env_logger = { workspace = true } insta = { workspace = true } paste = "^1.0" rstest = { workspace = true } -tokio = { workspace = true } diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 4b82195208ebb..349933b8ec340 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -16,8 +16,7 @@ // under the License. use arrow::datatypes::{DataType, Field, Schema}; -use datafusion::catalog::MemTable; -use datafusion::prelude::SessionContext; + use datafusion_common::{ assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result, TableReference, @@ -2661,45 +2660,3 @@ fn test_struct_expr3() { @r#"SELECT test.c1."metadata".product."name" FROM (SELECT {"metadata": {product: {"name": 'Product Name'}}} AS c1) AS test"# ); } - -// Issue #17359: https://github.com/apache/datafusion/issues/17359 -#[tokio::test] -async fn unparse_cross_join() -> Result<()> { - let ctx = SessionContext::new(); - - let j1_schema = Arc::new(Schema::new(vec![ - Field::new("j1_id", DataType::Int32, true), - Field::new("j1_string", DataType::Utf8, true), - ])); - let j2_schema = Arc::new(Schema::new(vec![ - Field::new("j2_id", DataType::Int32, true), - Field::new("j2_string", DataType::Utf8, true), - ])); - - ctx.register_table("j1", Arc::new(MemTable::try_new(j1_schema, vec![vec![]])?))?; - ctx.register_table("j2", Arc::new(MemTable::try_new(j2_schema, vec![vec![]])?))?; - - let df = ctx - .sql( - r#" - select j1.j1_id, j2.j2_string - from j1, j2 - where j2.j2_id = 0 - "#, - ) - .await?; - - let unopt_sql = plan_to_sql(df.logical_plan())?; - assert_snapshot!(unopt_sql, @r#" - SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0) - "#); - - let optimized_plan = df.into_optimized_plan()?; - - let opt_sql = plan_to_sql(&optimized_plan)?; - assert_snapshot!(opt_sql, @r#" - SELECT j1.j1_id, j2.j2_string FROM j1 CROSS JOIN j2 WHERE (j2.j2_id = 0) - "#); - - Ok(()) -} From 982f6447ff6c0df8b4521e8228cc9737eeb155ae Mon Sep 17 00:00:00 2001 From: Jonathan Date: Tue, 2 Sep 2025 19:21:21 -0400 Subject: [PATCH 3/3] add --- datafusion/core/tests/sql/joins.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs index c5ff8002990d5..7a59834475920 100644 --- a/datafusion/core/tests/sql/joins.rs +++ b/datafusion/core/tests/sql/joins.rs @@ -18,6 +18,7 @@ use insta::assert_snapshot; use datafusion::assert_batches_eq; +use datafusion::catalog::MemTable; use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable}; use datafusion::test_util::register_unbounded_file_with_ordering; use datafusion_sql::unparser::plan_to_sql;