Skip to content

Commit 40353fe

Browse files
jonahgaoalamb
andauthored
GROUP-BY prioritizes input columns in case of ambiguity (#9228)
* GROUP-BY prioritizes input columns in case of ambiguity * Update datafusion/sqllogictest/test_files/aggregate.slt Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> * Update datafusion/sqllogictest/test_files/aggregate.slt Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent e4f4031 commit 40353fe

2 files changed

Lines changed: 25 additions & 2 deletions

File tree

datafusion/sql/src/select.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
8888

8989
// having and group by clause may reference aliases defined in select projection
9090
let projected_plan = self.project(base_plan.clone(), select_exprs.clone())?;
91-
let mut combined_schema = (**projected_plan.schema()).clone();
92-
combined_schema.merge(base_plan.schema());
91+
// Place the fields of the base plan at the front so that when there are references
92+
// with the same name, the fields of the base plan will be searched first.
93+
// See https://github.com/apache/arrow-datafusion/issues/9162
94+
let mut combined_schema = base_plan.schema().as_ref().clone();
95+
combined_schema.merge(projected_plan.schema());
9396

9497
// this alias map is resolved and looked up in both having exprs and group by exprs
9598
let alias_map = extract_aliases(&select_exprs);

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3179,3 +3179,23 @@ NULL
31793179
statement ok
31803180
DROP TABLE t;
31813181

3182+
3183+
# Test for the case when the column name is ambiguous
3184+
statement ok
3185+
CREATE TABLE t(a BIGINT) AS VALUES(1), (2), (3);
3186+
3187+
# The column name referenced by GROUP-BY is ambiguous, prefer the column in base plan
3188+
query I
3189+
SELECT 0 as "t.a" FROM t GROUP BY t.a;
3190+
----
3191+
0
3192+
0
3193+
0
3194+
3195+
# The column name referenced by HAVING is ambiguous, prefer the column in the base plan
3196+
query I
3197+
SELECT 0 AS "t.a" FROM t HAVING MAX(t.a) = 0;
3198+
----
3199+
3200+
statement ok
3201+
DROP TABLE t;

0 commit comments

Comments
 (0)