|
18 | 18 | use std::any::Any; |
19 | 19 | use std::fmt::{Display, Formatter}; |
20 | 20 | use std::ops::Deref; |
| 21 | + |
21 | 22 | use std::sync::Arc; |
22 | 23 | use std::vec; |
23 | 24 |
|
@@ -1736,3 +1737,46 @@ async fn roundtrip_physical_plan_node() { |
1736 | 1737 |
|
1737 | 1738 | let _ = plan.execute(0, ctx.task_ctx()).unwrap(); |
1738 | 1739 | } |
| 1740 | + |
| 1741 | +// Failing due to https://github.com/apache/datafusion/pull/16662 |
| 1742 | +#[ignore] |
| 1743 | +#[tokio::test] |
| 1744 | +async fn test_tpch_part_in_list_query_with_real_parquet_data() -> Result<()> { |
| 1745 | + // Test the specific query: SELECT p_size FROM part WHERE p_size IN (14, 6, 5, 31) |
| 1746 | + // |
| 1747 | + // NOTE: This test uses a minimal subset of TPC-H part.parquet data (tpch_part_small.parquet) |
| 1748 | + // which contains only 20 rows with p_size values in [14, 6, 5, 31] to reproduce the bug. |
| 1749 | + // Using alltypes_plain.parquet does NOT reproduce the issue, suggesting the bug |
| 1750 | + // is specific to certain characteristics of TPC-H parquet files or their schema. |
| 1751 | + |
| 1752 | + use datafusion_common::test_util::datafusion_test_data; |
| 1753 | + |
| 1754 | + let ctx = SessionContext::new(); |
| 1755 | + |
| 1756 | + // Register the TPC-H part table using the local test data |
| 1757 | + let test_data = datafusion_test_data(); |
| 1758 | + let table_sql = format!( |
| 1759 | + "CREATE EXTERNAL TABLE part STORED AS PARQUET LOCATION '{test_data}/tpch_part_small.parquet'" |
| 1760 | + |
| 1761 | + ); |
| 1762 | + ctx.sql(&table_sql).await.map_err(|e| { |
| 1763 | + DataFusionError::External(format!("Failed to create part table: {e}").into()) |
| 1764 | + })?; |
| 1765 | + |
| 1766 | + // Test the exact problematic query |
| 1767 | + let sql = "SELECT p_size FROM part WHERE p_size IN (14, 6, 5, 31)"; |
| 1768 | + |
| 1769 | + let logical_plan = ctx.sql(sql).await?.into_unoptimized_plan(); |
| 1770 | + let optimized_plan = ctx.state().optimize(&logical_plan)?; |
| 1771 | + let physical_plan = ctx.state().create_physical_plan(&optimized_plan).await?; |
| 1772 | + |
| 1773 | + // Serialize the physical plan - bug may happen here already but not necessarily manifests |
| 1774 | + let codec = DefaultPhysicalExtensionCodec {}; |
| 1775 | + let proto = PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?; |
| 1776 | + |
| 1777 | + // This will fail with the bug, but should succeed when fixed |
| 1778 | + let _deserialized_plan = |
| 1779 | + proto.try_into_physical_plan(&ctx, ctx.runtime_env().as_ref(), &codec)?; |
| 1780 | + |
| 1781 | + Ok(()) |
| 1782 | +} |
0 commit comments