From 38c98f8dee0694e94ef496858b984a21ca1dd98f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 16 Jul 2024 12:14:19 -0400 Subject: [PATCH 01/10] Test + workaround for SanityCheck plan --- .../src/physical_optimizer/sanity_checker.rs | 10 +++++ datafusion/sqllogictest/test_files/union.slt | 37 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/datafusion/core/src/physical_optimizer/sanity_checker.rs b/datafusion/core/src/physical_optimizer/sanity_checker.rs index e392105fbcb78..123d61d94a215 100644 --- a/datafusion/core/src/physical_optimizer/sanity_checker.rs +++ b/datafusion/core/src/physical_optimizer/sanity_checker.rs @@ -34,6 +34,8 @@ use datafusion_physical_plan::joins::SymmetricHashJoinExec; use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties}; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::sorts::sort::SortExec; +use datafusion_physical_plan::union::UnionExec; use itertools::izip; /// The SanityCheckPlan rule rejects the following query plans: @@ -125,6 +127,14 @@ pub fn check_plan_sanity( plan.required_input_ordering().iter(), plan.required_input_distribution().iter() ) { + // TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492 + if child.as_any().downcast_ref::().is_some() { + continue; + } + if child.as_any().downcast_ref::().is_some() { + continue; + } + let child_eq_props = child.equivalence_properties(); if let Some(sort_req) = sort_req { if !child_eq_props.ordering_satisfy_requirement(sort_req) { diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index a3d0ff4383ae6..12eb551ff8760 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -538,6 +538,9 @@ physical_plan # Clean up after the test ######## +statement ok +drop table t + statement ok drop table t1; @@ -761,3 +764,37 @@ SELECT NULL WHERE FALSE; ---- 0.5 1 + +### +# Test for https://github.com/apache/datafusion/issues/11492 +### + +# Input data is +# a,b,c +# 1,2,3 + +statement ok +CREATE EXTERNAL TABLE t ( + a INT, + b INT, + c INT +) +STORED AS CSV +LOCATION '../core/tests/data/example.csv' +WITH ORDER (a ASC) +OPTIONS ('format.has_header' 'true'); + +query T +SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT 'bar' as a from t) ORDER BY a; +---- +1 +bar + +query I +SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT NULL as a from t) ORDER BY a; +---- +1 +NULL + +statement ok +drop table t From 76b145e8b77d991029e3fb87b13fba25b5b3a18c Mon Sep 17 00:00:00 2001 From: wiedld Date: Tue, 1 Oct 2024 00:03:30 -1000 Subject: [PATCH 02/10] Provide field and schema metadata missing on distinct aggregations. (#12691) * test(12687): reproducer of missing metadata bug * fix(12687): minimum change needed to fix the missing metadata --- .../physical-plan/src/aggregates/mod.rs | 25 +++++++----- datafusion/physical-plan/src/projection.rs | 2 +- .../sqllogictest/test_files/metadata.slt | 38 +++++++++++++++++++ 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index c3bc7b042e655..1dc41059a7ee5 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -26,6 +26,7 @@ use crate::aggregates::{ topk_stream::GroupedTopKAggregateStream, }; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; +use crate::projection::get_field_metadata; use crate::windows::get_ordered_partition_by_indices; use crate::{ DisplayFormatType, Distribution, ExecutionPlan, InputOrderMode, @@ -793,14 +794,17 @@ fn create_schema( ) -> Result { let mut fields = Vec::with_capacity(group_expr.len() + aggr_expr.len()); for (index, (expr, name)) in group_expr.iter().enumerate() { - fields.push(Field::new( - name, - expr.data_type(input_schema)?, - // In cases where we have multiple grouping sets, we will use NULL expressions in - // order to align the grouping sets. So the field must be nullable even if the underlying - // schema field is not. - group_expr_nullable[index] || expr.nullable(input_schema)?, - )) + fields.push( + Field::new( + name, + expr.data_type(input_schema)?, + // In cases where we have multiple grouping sets, we will use NULL expressions in + // order to align the grouping sets. So the field must be nullable even if the underlying + // schema field is not. + group_expr_nullable[index] || expr.nullable(input_schema)?, + ) + .with_metadata(get_field_metadata(expr, input_schema).unwrap_or_default()), + ) } match mode { @@ -821,7 +825,10 @@ fn create_schema( } } - Ok(Schema::new(fields)) + Ok(Schema::new_with_metadata( + fields, + input_schema.metadata().clone(), + )) } fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef { diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index f1b9cdaf728ff..4c889d1fc88c4 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -237,7 +237,7 @@ impl ExecutionPlan for ProjectionExec { /// If e is a direct column reference, returns the field level /// metadata for that field, if any. Otherwise returns None -fn get_field_metadata( +pub(crate) fn get_field_metadata( e: &Arc, input_schema: &Schema, ) -> Option> { diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index 3b2b219244f55..f38281abc5abd 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -58,5 +58,43 @@ WHERE "data"."id" = "samples"."id"; 1 3 + + +# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687 +query I +select count(distinct name) from table_with_metadata; +---- +2 + +# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687 +query I +select approx_median(distinct id) from table_with_metadata; +---- +2 + +# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687 +statement ok +select array_agg(distinct id) from table_with_metadata; + +query I +select distinct id from table_with_metadata order by id; +---- +1 +3 +NULL + +query I +select count(id) from table_with_metadata; +---- +2 + +query I +select count(id) cnt from table_with_metadata group by name order by cnt; +---- +0 +1 +1 + + statement ok drop table table_with_metadata; From ccdfa9a729dc36123606123185c3d0f37f837b33 Mon Sep 17 00:00:00 2001 From: wiedld Date: Fri, 4 Oct 2024 08:28:12 -0700 Subject: [PATCH 03/10] Provide field and schema metadata missing on cross joins, and union with null fields. (#12729) * test: reproducer for missing schema metadata on cross join * fix: pass thru schema metadata on cross join * fix: preserve metadata when transforming to view types * test: reproducer for missing field metadata in left hand NULL field of union * fix: preserve field metadata from right side of union * chore: safe indexing --- .../core/src/datasource/file_format/mod.rs | 18 +++++------ .../physical-plan/src/joins/cross_join.rs | 13 ++++++-- datafusion/physical-plan/src/union.rs | 11 ++++++- datafusion/sqllogictest/src/test_context.rs | 8 ++++- .../sqllogictest/test_files/metadata.slt | 31 ++++++++++++++++++- 5 files changed, 65 insertions(+), 16 deletions(-) diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index a503e36adbeb1..2a4fa1d64ecbc 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -241,16 +241,14 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema { .fields .iter() .map(|field| match field.data_type() { - DataType::Utf8 | DataType::LargeUtf8 => Arc::new(Field::new( - field.name(), - DataType::Utf8View, - field.is_nullable(), - )), - DataType::Binary | DataType::LargeBinary => Arc::new(Field::new( - field.name(), - DataType::BinaryView, - field.is_nullable(), - )), + DataType::Utf8 | DataType::LargeUtf8 => Arc::new( + Field::new(field.name(), DataType::Utf8View, field.is_nullable()) + .with_metadata(field.metadata().to_owned()), + ), + DataType::Binary | DataType::LargeBinary => Arc::new( + Field::new(field.name(), DataType::BinaryView, field.is_nullable()) + .with_metadata(field.metadata().to_owned()), + ), _ => field.clone(), }) .collect(); diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 11153556f2538..a70645f3d6c0c 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -69,15 +69,22 @@ impl CrossJoinExec { /// Create a new [CrossJoinExec]. pub fn new(left: Arc, right: Arc) -> Self { // left then right - let all_columns: Fields = { + let (all_columns, metadata) = { let left_schema = left.schema(); let right_schema = right.schema(); let left_fields = left_schema.fields().iter(); let right_fields = right_schema.fields().iter(); - left_fields.chain(right_fields).cloned().collect() + + let mut metadata = left_schema.metadata().clone(); + metadata.extend(right_schema.metadata().clone()); + + ( + left_fields.chain(right_fields).cloned().collect::(), + metadata, + ) }; - let schema = Arc::new(Schema::new(all_columns)); + let schema = Arc::new(Schema::new(all_columns).with_metadata(metadata)); let cache = Self::compute_properties(&left, &right, Arc::clone(&schema)); CrossJoinExec { left, diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 78b25686054d8..1cf22060b62ab 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -474,7 +474,16 @@ fn union_schema(inputs: &[Arc]) -> SchemaRef { .iter() .filter_map(|input| { if input.schema().fields().len() > i { - Some(input.schema().field(i).clone()) + let field = input.schema().field(i).clone(); + let right_hand_metdata = inputs + .get(1) + .map(|right_input| { + right_input.schema().field(i).metadata().clone() + }) + .unwrap_or_default(); + let mut metadata = field.metadata().clone(); + metadata.extend(right_hand_metdata); + Some(field.with_metadata(metadata)) } else { None } diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index 19016d328f4cf..76ded12ef1561 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -313,8 +313,13 @@ pub async fn register_metadata_tables(ctx: &SessionContext) { String::from("metadata_key"), String::from("the name field"), )])); + let l_name = + Field::new("l_name", DataType::Utf8, true).with_metadata(HashMap::from([( + String::from("metadata_key"), + String::from("the l_name field"), + )])); - let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([( + let schema = Schema::new(vec![id, name, l_name]).with_metadata(HashMap::from([( String::from("metadata_key"), String::from("the entire schema"), )])); @@ -324,6 +329,7 @@ pub async fn register_metadata_tables(ctx: &SessionContext) { vec![ Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _, Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")])) as _, + Arc::new(StringArray::from(vec![None, Some("l_bar"), Some("l_baz")])) as _, ], ) .unwrap(); diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index f38281abc5abd..d0853b9e4983b 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -25,7 +25,7 @@ ## with metadata in SQL. query IT -select * from table_with_metadata; +select id, name from table_with_metadata; ---- 1 NULL NULL bar @@ -96,5 +96,34 @@ select count(id) cnt from table_with_metadata group by name order by cnt; 1 + +# Regression test: missing schema metadata, when aggregate on cross join +query I +SELECT count("data"."id") +FROM + ( + SELECT "id" FROM "table_with_metadata" + ) as "data", + ( + SELECT "id" FROM "table_with_metadata" + ) as "samples"; +---- +6 + +# Regression test: missing field metadata, from the NULL field on the left side of the union +query ITT +(SELECT id, NULL::string as name, l_name FROM "table_with_metadata") + UNION +(SELECT id, name, NULL::string as l_name FROM "table_with_metadata") +ORDER BY id, name, l_name; +---- +1 NULL NULL +3 baz NULL +3 NULL l_baz +NULL bar NULL +NULL NULL l_bar + + + statement ok drop table table_with_metadata; From add65db69b8697cc7455765e201255fd6369e0f4 Mon Sep 17 00:00:00 2001 From: wiedld Date: Wed, 9 Oct 2024 09:59:06 -0700 Subject: [PATCH 04/10] fix(iox-11401): temporary patch to permit count col be nullable --- datafusion/functions-aggregate/src/count.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs index 417e28e72a71f..10233b2505281 100644 --- a/datafusion/functions-aggregate/src/count.rs +++ b/datafusion/functions-aggregate/src/count.rs @@ -122,7 +122,7 @@ impl AggregateUDFImpl for Count { } fn is_nullable(&self) -> bool { - false + true } fn state_fields(&self, args: StateFieldsArgs) -> Result> { From a166cd0cbd435891495be0bb60e06d384a707014 Mon Sep 17 00:00:00 2001 From: wiedld Date: Fri, 11 Oct 2024 12:30:18 -0700 Subject: [PATCH 05/10] fix: when extracting metadata from expr, handle CastExpr --- datafusion/physical-plan/src/projection.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index 4c889d1fc88c4..0f4d06130d8aa 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -40,7 +40,7 @@ use datafusion_common::stats::Precision; use datafusion_common::Result; use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::ProjectionMapping; -use datafusion_physical_expr::expressions::Literal; +use datafusion_physical_expr::expressions::{CastExpr, Literal}; use futures::stream::{Stream, StreamExt}; use log::trace; @@ -241,6 +241,10 @@ pub(crate) fn get_field_metadata( e: &Arc, input_schema: &Schema, ) -> Option> { + if let Some(cast) = e.as_any().downcast_ref::() { + return get_field_metadata(cast.expr(), input_schema); + } + // Look up field by index in schema (not NAME as there can be more than one // column with the same name) e.as_any() From bfbaf0acf2816f89465fe9783e191d9f8e920e9f Mon Sep 17 00:00:00 2001 From: wiedld Date: Fri, 11 Oct 2024 12:31:34 -0700 Subject: [PATCH 06/10] fix: handle when the left side of the union has no fields (e.g. an empty projection) --- datafusion/physical-plan/src/union.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 1cf22060b62ab..d6eab89f2fdd5 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -468,7 +468,13 @@ pub fn can_interleave>>( } fn union_schema(inputs: &[Arc]) -> SchemaRef { - let fields: Vec = (0..inputs[0].schema().fields().len()) + let fields: Vec = (0..std::cmp::max( + inputs[0].schema().fields().len(), + inputs + .get(1) + .map(|l| l.schema().fields().len()) + .unwrap_or_default(), + )) .map(|i| { inputs .iter() From dd3b48bd5d96d142c033d6b2aa4d1e1cb6fda7ec Mon Sep 17 00:00:00 2001 From: wiedld Date: Fri, 11 Oct 2024 12:33:06 -0700 Subject: [PATCH 07/10] fix: because either the left or right fields may be chosen, add metadata from both to each other --- datafusion/physical-plan/src/union.rs | 29 +++++++++++++-------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index d6eab89f2fdd5..8956478643840 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -478,21 +478,20 @@ fn union_schema(inputs: &[Arc]) -> SchemaRef { .map(|i| { inputs .iter() - .filter_map(|input| { - if input.schema().fields().len() > i { - let field = input.schema().field(i).clone(); - let right_hand_metdata = inputs - .get(1) - .map(|right_input| { - right_input.schema().field(i).metadata().clone() - }) - .unwrap_or_default(); - let mut metadata = field.metadata().clone(); - metadata.extend(right_hand_metdata); - Some(field.with_metadata(metadata)) - } else { - None - } + .enumerate() + .filter_map(|(input_idx, input)| { + let field = input.schema().field(i).clone(); + let mut metadata = field.metadata().clone(); + + let other_side_metdata = inputs + .get(input_idx ^ (1 << 0)) + .map(|other_input| { + other_input.schema().field(i).metadata().clone() + }) + .unwrap_or_default(); + + metadata.extend(other_side_metdata); + Some(field.with_metadata(metadata)) }) .find_or_first(|f| f.is_nullable()) .unwrap() From e9723c93b3d6099be132d13c218743e5ffc50a80 Mon Sep 17 00:00:00 2001 From: wiedld Date: Tue, 15 Oct 2024 10:19:41 -0700 Subject: [PATCH 08/10] fix: now() UDF is not nullable --- datafusion/functions/src/datetime/now.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index b2221215b94b7..74eb5aea4255c 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -21,7 +21,7 @@ use arrow::datatypes::DataType; use arrow::datatypes::DataType::Timestamp; use arrow::datatypes::TimeUnit::Nanosecond; -use datafusion_common::{internal_err, Result, ScalarValue}; +use datafusion_common::{internal_err, ExprSchema, Result, ScalarValue}; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}; @@ -84,4 +84,8 @@ impl ScalarUDFImpl for NowFunc { ScalarValue::TimestampNanosecond(now_ts, Some("+00:00".into())), ))) } + + fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool { + false + } } From 78b910bb1bccd1e598183083bfde962c8be47534 Mon Sep 17 00:00:00 2001 From: wiedld Date: Tue, 15 Oct 2024 12:51:48 -0700 Subject: [PATCH 09/10] chore: temp patch to convert errors to logged warnings, for schema mismatch in aggregates --- datafusion/core/src/physical_planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 84d285fc2509d..6e4de9f60bd1c 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -673,7 +673,7 @@ impl DefaultPhysicalPlanner { logical_input_schema.as_ref().clone().into(); if physical_input_schema != physical_input_schema_from_logical { - return internal_err!("Physical input schema should be the same as the one converted from logical input schema."); + log::warn!("Physical input schema should be the same as the one converted from logical input schema, but did not match for logical plan:\n{}", input.display_indent()); } let groups = self.create_grouping_physical_expr( From c27d5f2356a21ee6224c149ee971c89c2cc13e18 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 30 Oct 2024 12:08:29 -0400 Subject: [PATCH 10/10] Apply projection to `Statistics` in `FilterExec` --- datafusion/common/src/stats.rs | 20 ++++++++ datafusion/physical-plan/src/filter.rs | 7 ++- .../sqllogictest/test_files/parquet.slt | 48 +++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index d8e62b3045f93..1c774a95d0e8b 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -258,6 +258,26 @@ impl Statistics { self } + /// Project the statistics to the given column indices. + /// + /// For example, if we had statistics for columns `{"a", "b", "c"}`, + /// projecting to `vec![2, 1]` would return statistics for columns `{"c", + /// "b"}`. + pub fn project(mut self, projection: Option<&Vec>) -> Self { + let Some(projection) = projection else { + return self; + }; + + // todo: it would be nice to avoid cloning column statistics if + // possible (e.g. if the projection did not contain duplicates) + self.column_statistics = projection + .iter() + .map(|&i| self.column_statistics[i].clone()) + .collect(); + + self + } + /// Calculates the statistics after `fetch` and `skip` operations apply. /// Here, `self` denotes per-partition statistics. Use the `n_partitions` /// parameter to compute global statistics in a multi-partition setting. diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 3da0f21156d94..31cf29d01d1d1 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -370,7 +370,12 @@ impl ExecutionPlan for FilterExec { /// The output statistics of a filtering operation can be estimated if the /// predicate's selectivity value can be determined for the incoming data. fn statistics(&self) -> Result { - Self::statistics_helper(&self.input, self.predicate(), self.default_selectivity) + let stats = Self::statistics_helper( + &self.input, + self.predicate(), + self.default_selectivity, + )?; + Ok(stats.project(self.projection.as_ref())) } } diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index f8b163adc7967..031eb9f0ff389 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -348,3 +348,51 @@ DROP TABLE list_columns; # Clean up statement ok DROP TABLE listing_table; + +## Tests for https://github.com/apache/datafusion/issues/13186 +statement ok +create table cpu (time timestamp, usage_idle float, usage_user float, cpu int); + +statement ok +insert into cpu values ('1970-01-01 00:00:00', 1.0, 2.0, 3); + +# must put it into a parquet file to get statistics +statement ok +copy (select * from cpu) to 'test_files/scratch/parquet/cpu.parquet'; + +# Run queries against parquet files +statement ok +create external table cpu_parquet +stored as parquet +location 'test_files/scratch/parquet/cpu.parquet'; + +# Double filtering +# +# Expect 1 row for both queries +query PI +select time, rn +from ( + select time, row_number() OVER (ORDER BY usage_idle, time) as rn + from cpu + where cpu = 3 +) where rn > 0; +---- +1970-01-01T00:00:00 1 + +query PI +select time, rn +from ( + select time, row_number() OVER (ORDER BY usage_idle, time) as rn + from cpu_parquet + where cpu = 3 +) where rn > 0; +---- +1970-01-01T00:00:00 1 + + +# Clean up +statement ok +drop table cpu; + +statement ok +drop table cpu_parquet;