[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into groupby clause in 'pgSQL/select_implicit.sql' #25350

Udbhav30 · 2019-08-04T17:30:00Z

What changes were proposed in this pull request?

This PR adds UDF cases into group by clause in 'pgSQL/select_implicit.sql'

Diff comparing to 'pgSQL/select_implicit.sql'

diff --git a/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out b/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out
index 17303b2..0675820 100755
--- a/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
+++ b/home/root1/src/spark/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out
@@ -91,11 +91,9 @@ struct<>
 
 
 -- !query 11
-SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
-udf(test_missing_target.c)
-ORDER BY udf(c)
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
 -- !query 11 schema
-struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<c:string,count(1):bigint>
 -- !query 11 output
 ABAB	2
 BBBB	2
@@ -106,10 +104,9 @@ cccc	2
 
 
 -- !query 12
-SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(test_missing_target.c)
-ORDER BY udf(c)
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
 -- !query 12 schema
-struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<count(1):bigint>
 -- !query 12 output
 2
 2
@@ -120,18 +117,18 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 
 
 -- !query 13
-SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b)
+SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b
 -- !query 13 schema
 struct<>
 -- !query 13 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 75
+cannot resolve '`b`' given input columns: [count(1)]; line 1 pos 61
 
 
 -- !query 14
-SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b)
+SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b
 -- !query 14 schema
-struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<count(1):bigint>
 -- !query 14 output
 1
 2
@@ -140,10 +137,10 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 
 
 -- !query 15
-SELECT udf(test_missing_target.b), udf(count(*))
-  FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b)
+SELECT test_missing_target.b, count(*)
+  FROM test_missing_target GROUP BY b ORDER BY b
 -- !query 15 schema
-struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<b:int,count(1):bigint>
 -- !query 15 output
 1	1
 2	2
@@ -152,9 +149,9 @@ struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)
 
 
 -- !query 16
-SELECT udf(c) FROM test_missing_target ORDER BY udf(a)
+SELECT c FROM test_missing_target ORDER BY a
 -- !query 16 schema
-struct<CAST(udf(cast(c as string)) AS STRING):string>
+struct<c:string>
 -- !query 16 output
 XXXX
 ABAB
@@ -169,10 +166,9 @@ CCCC
 
 
 -- !query 17
-SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b)
-desc
+SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc
 -- !query 17 schema
-struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<count(1):bigint>
 -- !query 17 output
 4
 3
@@ -181,17 +177,17 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 
 
 -- !query 18
-SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc
+SELECT count(*) FROM test_missing_target ORDER BY 1 desc
 -- !query 18 schema
-struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<count(1):bigint>
 -- !query 18 output
 10
 
 
 -- !query 19
-SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1
 -- !query 19 schema
-struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<c:string,count(1):bigint>
 -- !query 19 output
 ABAB	2
 BBBB	2
@@ -202,30 +198,30 @@ cccc	2
 
 
 -- !query 20
-SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
+SELECT c, count(*) FROM test_missing_target GROUP BY 3
 -- !query 20 schema
 struct<>
 -- !query 20 output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
+GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
 
 
 -- !query 21
-SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
-	WHERE udf(x.a) = udf(y.a)
-	GROUP BY udf(b) ORDER BY udf(b)
+SELECT count(*) FROM test_missing_target x, test_missing_target y
+	WHERE x.a = y.a
+	GROUP BY b ORDER BY b
 -- !query 21 schema
 struct<>
 -- !query 21 output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
 
 
 -- !query 22
-SELECT udf(a), udf(a) FROM test_missing_target
-	ORDER BY udf(a)
+SELECT a, a FROM test_missing_target
+	ORDER BY a
 -- !query 22 schema
-struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
+struct<a:int,a:int>
 -- !query 22 output
 0	0
 1	1
@@ -240,10 +236,10 @@ struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS IN
 
 
 -- !query 23
-SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
-	ORDER BY udf(udf(a)/2)
+SELECT a/2, a/2 FROM test_missing_target
+	ORDER BY a/2
 -- !query 23 schema
-struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int,CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int>
+struct<(a div 2):int,(a div 2):int>
 -- !query 23 output
 0	0
 0	0
@@ -258,10 +254,10 @@ struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS
 
 
 -- !query 24
-SELECT udf(a/2), udf(a/2) FROM test_missing_target
-	GROUP BY udf(a/2) ORDER BY udf(a/2)
+SELECT a/2, a/2 FROM test_missing_target
+	GROUP BY a/2 ORDER BY a/2
 -- !query 24 schema
-struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int>
+struct<(a div 2):int,(a div 2):int>
 -- !query 24 output
 0	0
 1	1
@@ -271,11 +267,11 @@ struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) a
 
 
 -- !query 25
-SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
-	WHERE udf(x.a) = udf(y.a)
-	GROUP BY udf(x.b) ORDER BY udf(x.b)
+SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
+	WHERE x.a = y.a
+	GROUP BY x.b ORDER BY x.b
 -- !query 25 schema
-struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<b:int,count(1):bigint>
 -- !query 25 output
 1	1
 2	2
@@ -284,11 +280,11 @@ struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)
 
 
 -- !query 26
-SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
-	WHERE udf(x.a) = udf(y.a)
-	GROUP BY udf(x.b) ORDER BY udf(x.b)
+SELECT count(*) FROM test_missing_target x, test_missing_target y
+	WHERE x.a = y.a
+	GROUP BY x.b ORDER BY x.b
 -- !query 26 schema
-struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
+struct<count(1):bigint>
 -- !query 26 output
 1
 2
@@ -297,22 +293,22 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 
 
 -- !query 27
-SELECT udf(a%2), udf(count(udf(b))) FROM test_missing_target
-GROUP BY udf(test_missing_target.a%2)
-ORDER BY udf(test_missing_target.a%2)
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2
 -- !query 27 schema
-struct<CAST(udf(cast((a % 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
+struct<(a % 2):int,count(b):bigint>
 -- !query 27 output
 0	5
 1	5
 
 
 -- !query 28
-SELECT udf(count(c)) FROM test_missing_target
-GROUP BY udf(lower(test_missing_target.c))
-ORDER BY udf(lower(test_missing_target.c))
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c)
 -- !query 28 schema
-struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
+struct<count(c):bigint>
 -- !query 28 output
 2
 3
@@ -321,18 +317,18 @@ struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
 
 
 -- !query 29
-SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b)
+SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b
 -- !query 29 schema
 struct<>
 -- !query 29 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 80
+cannot resolve '`b`' given input columns: [count(a)]; line 1 pos 61
 
 
 -- !query 30
-SELECT udf(count(b)) FROM test_missing_target GROUP BY udf(b/2) ORDER BY udf(b/2)
+SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2
 -- !query 30 schema
-struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
+struct<count(b):bigint>
 -- !query 30 output
 1
 5
@@ -340,10 +336,10 @@ struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
 
 
 -- !query 31
-SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
-  FROM test_missing_target GROUP BY udf(lower(c)) ORDER BY udf(lower(c))
+SELECT lower(test_missing_target.c), count(c)
+  FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c)
 -- !query 31 schema
-struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint>
+struct<lower(c):string,count(c):bigint>
 -- !query 31 output
 abab	2
 bbbb	3
@@ -352,9 +348,9 @@ xxxx	1
 
 
 -- !query 32
-SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)))
+SELECT a FROM test_missing_target ORDER BY upper(d)
 -- !query 32 schema
-struct<CAST(udf(cast(a as string)) AS INT):int>
+struct<a:int>
 -- !query 32 output
 0
 1
@@ -369,33 +365,32 @@ struct<CAST(udf(cast(a as string)) AS INT):int>
 
 
 -- !query 33
-SELECT udf(count(b)) FROM test_missing_target
-	GROUP BY udf((b + 1) / 2) ORDER BY udf((b + 1) / 2) desc
+SELECT count(b) FROM test_missing_target
+	GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc
 -- !query 33 schema
-struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
+struct<count(b):bigint>
 -- !query 33 output
 7
 3
 
 
 -- !query 34
-SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
-	WHERE udf(x.a) = udf(y.a)
-	GROUP BY udf(b/2) ORDER BY udf(b/2)
+SELECT count(x.a) FROM test_missing_target x, test_missing_target y
+	WHERE x.a = y.a
+	GROUP BY b/2 ORDER BY b/2
 -- !query 34 schema
 struct<>
 -- !query 34 output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
 
 
 -- !query 35
-SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
-test_missing_target y
-	WHERE udf(x.a) = udf(y.a)
-	GROUP BY udf(x.b/2) ORDER BY udf(x.b/2)
+SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
+	WHERE x.a = y.a
+	GROUP BY x.b/2 ORDER BY x.b/2
 -- !query 35 schema
-struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
+struct<(b div 2):int,count(b):bigint>
 -- !query 35 output
 0	1
 1	5
@@ -403,14 +398,14 @@ struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(
 
 
 -- !query 36
-SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
-	WHERE udf(x.a) = udf(y.a)
-	GROUP BY udf(x.b/2)
+SELECT count(b) FROM test_missing_target x, test_missing_target y
+	WHERE x.a = y.a
+	GROUP BY x.b/2
 -- !query 36 schema
 struct<>
 -- !query 36 output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21
+Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13
 
 
 -- !query 37

How was this patch tested?

Tested as Guided in SPARK-27921

… by clause in 'pgSQL/select_implicit.sql'

Udbhav30 · 2019-08-04T17:31:03Z

cc @HyukjinKwon

HyukjinKwon · 2019-08-04T23:50:39Z

ok to test

SparkQA · 2019-08-05T02:17:27Z

Test build #108633 has finished for PR 25350 at commit 344513a.

This patch fails Spark unit tests.
This patch merges cleanly.
This patch adds no public classes.

HyukjinKwon · 2019-08-05T10:55:41Z

@Udbhav30, seems the tests failed. Can you fix please?

Udbhav30 · 2019-08-05T17:09:57Z

@HyukjinKwon done!

SparkQA · 2019-08-05T20:35:39Z

Test build #108675 has finished for PR 25350 at commit 62c6727.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

HyukjinKwon · 2019-08-06T06:13:59Z

Thanks, @Udbhav30.

Merged to master.

[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into group…

13a17e5

… by clause in 'pgSQL/select_implicit.sql'

removed TODO comments

344513a

HyukjinKwon changed the title ~~[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into group…~~ [SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into groupby clause in 'pgSQL/select_implicit.sql' Aug 5, 2019

dongjoon-hyun added the SQL label Aug 5, 2019

testcase correction

62c6727

HyukjinKwon closed this in 150dbc5 Aug 6, 2019

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into groupby clause in 'pgSQL/select_implicit.sql' #25350

[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into groupby clause in 'pgSQL/select_implicit.sql' #25350

Uh oh!

Udbhav30 commented Aug 4, 2019 •

edited by HyukjinKwon

Loading

Uh oh!

Udbhav30 commented Aug 4, 2019

Uh oh!

HyukjinKwon commented Aug 4, 2019

Uh oh!

SparkQA commented Aug 5, 2019

Uh oh!

HyukjinKwon commented Aug 5, 2019

Uh oh!

Udbhav30 commented Aug 5, 2019

Uh oh!

SparkQA commented Aug 5, 2019

Uh oh!

HyukjinKwon commented Aug 6, 2019

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into groupby clause in 'pgSQL/select_implicit.sql' #25350

[SPARK-28391][PYTHON][SQL][TESTS][FOLLOW-UP] Add UDF cases into groupby clause in 'pgSQL/select_implicit.sql' #25350

Uh oh!

Conversation

Udbhav30 commented Aug 4, 2019 • edited by HyukjinKwon Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

What changes were proposed in this pull request?

How was this patch tested?

Uh oh!

Udbhav30 commented Aug 4, 2019

Uh oh!

HyukjinKwon commented Aug 4, 2019

Uh oh!

SparkQA commented Aug 5, 2019

Uh oh!

HyukjinKwon commented Aug 5, 2019

Uh oh!

Udbhav30 commented Aug 5, 2019

Uh oh!

SparkQA commented Aug 5, 2019

Uh oh!

HyukjinKwon commented Aug 6, 2019

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

Udbhav30 commented Aug 4, 2019 •

edited by HyukjinKwon

Loading