Skip to content

Commit dabae9b

Browse files
committed
fix add UT
1 parent 5b17cd9 commit dabae9b

File tree

4 files changed

+243
-27
lines changed

4 files changed

+243
-27
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -753,15 +753,18 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
753753
// expects a seq of pairs in which the old parsers' token names are used as keys.
754754
// Transforming the result of visitRowFormatDelimited would be quite a bit messier than
755755
// retrieving the key value pairs ourselves.
756-
def entry(key: String, value: Token): Seq[(String, String)] = {
757-
Option(value).map(t => key -> t.getText).toSeq
758-
}
759-
760756
val entries = entry("TOK_TABLEROWFORMATFIELD", ctx.fieldsTerminatedBy) ++
761757
entry("TOK_TABLEROWFORMATCOLLITEMS", ctx.collectionItemsTerminatedBy) ++
762758
entry("TOK_TABLEROWFORMATMAPKEYS", ctx.keysTerminatedBy) ++
763-
entry("TOK_TABLEROWFORMATLINES", ctx.linesSeparatedBy) ++
764-
entry("TOK_TABLEROWFORMATNULL", ctx.nullDefinedAs)
759+
entry("TOK_TABLEROWFORMATNULL", ctx.nullDefinedAs) ++
760+
Option(ctx.linesSeparatedBy).toSeq.map { token =>
761+
val value = string(token)
762+
validate(
763+
value == "\n",
764+
s"LINES TERMINATED BY only supports newline '\\n' right now: $value",
765+
ctx)
766+
"TOK_TABLEROWFORMATLINES" -> value
767+
}
765768

766769
(entries, None, Seq.empty, None)
767770
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,16 +1111,16 @@ class PlanParserSuite extends AnalysisTest {
11111111
AttributeReference("c", StringType)()),
11121112
UnresolvedRelation(TableIdentifier("testData")),
11131113
ScriptInputOutputSchema(
1114-
Seq(("TOK_TABLEROWFORMATFIELD", "'\\t'"),
1115-
("TOK_TABLEROWFORMATCOLLITEMS", "'\u0002'"),
1116-
("TOK_TABLEROWFORMATMAPKEYS", "'\u0003'"),
1117-
("TOK_TABLEROWFORMATLINES", "'\\n'"),
1118-
("TOK_TABLEROWFORMATNULL", "'null'")),
1119-
Seq(("TOK_TABLEROWFORMATFIELD", "'\\t'"),
1120-
("TOK_TABLEROWFORMATCOLLITEMS", "'\u0004'"),
1121-
("TOK_TABLEROWFORMATMAPKEYS", "'\u0005'"),
1122-
("TOK_TABLEROWFORMATLINES", "'\\n'"),
1123-
("TOK_TABLEROWFORMATNULL", "'NULL'")), None, None,
1114+
Seq(("TOK_TABLEROWFORMATFIELD", "\t"),
1115+
("TOK_TABLEROWFORMATCOLLITEMS", "\u0002"),
1116+
("TOK_TABLEROWFORMATMAPKEYS", "\u0003"),
1117+
("TOK_TABLEROWFORMATNULL", "null"),
1118+
("TOK_TABLEROWFORMATLINES", "\n")),
1119+
Seq(("TOK_TABLEROWFORMATFIELD", "\t"),
1120+
("TOK_TABLEROWFORMATCOLLITEMS", "\u0004"),
1121+
("TOK_TABLEROWFORMATMAPKEYS", "\u0005"),
1122+
("TOK_TABLEROWFORMATNULL", "NULL"),
1123+
("TOK_TABLEROWFORMATLINES", "\n")), None, None,
11241124
List.empty, List.empty, None, None, false)))
11251125

11261126
// verify with ROW FORMAT SERDE

sql/core/src/test/resources/sql-tests/inputs/transform.sql

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,24 +91,105 @@ REDUCE a, b USING 'cat' AS (a, b) FROM t;
9191
-- transform with defined row format delimit
9292
SELECT TRANSFORM(a, b, c, null)
9393
ROW FORMAT DELIMITED
94-
FIELDS TERMINATED BY '|'
94+
FIELDS TERMINATED BY '@'
9595
LINES TERMINATED BY '\n'
9696
NULL DEFINED AS 'NULL'
9797
USING 'cat' AS (a, b, c, d)
9898
ROW FORMAT DELIMITED
99-
FIELDS TERMINATED BY '|'
99+
FIELDS TERMINATED BY '@'
100100
LINES TERMINATED BY '\n'
101101
NULL DEFINED AS 'NULL'
102102
FROM t;
103103

104104
SELECT TRANSFORM(a, b, c, null)
105105
ROW FORMAT DELIMITED
106-
FIELDS TERMINATED BY '|'
106+
FIELDS TERMINATED BY '@'
107107
LINES TERMINATED BY '\n'
108108
NULL DEFINED AS 'NULL'
109109
USING 'cat' AS (d)
110110
ROW FORMAT DELIMITED
111-
FIELDS TERMINATED BY '||'
111+
FIELDS TERMINATED BY '@'
112112
LINES TERMINATED BY '\n'
113113
NULL DEFINED AS 'NULL'
114114
FROM t;
115+
116+
-- transform with defined row format delimit handle schema with correct type
117+
SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
118+
SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
119+
ROW FORMAT DELIMITED
120+
FIELDS TERMINATED BY ','
121+
LINES TERMINATED BY '\n'
122+
NULL DEFINED AS 'NULL'
123+
USING 'cat' AS (
124+
a string,
125+
b boolean,
126+
c binary,
127+
d tinyint,
128+
e int,
129+
f smallint,
130+
g long,
131+
h float,
132+
i double,
133+
j decimal(38, 18),
134+
k timestamp,
135+
l date)
136+
ROW FORMAT DELIMITED
137+
FIELDS TERMINATED BY ','
138+
LINES TERMINATED BY '\n'
139+
NULL DEFINED AS 'NULL'
140+
FROM t
141+
) tmp;
142+
143+
-- transform with defined row format delimit handle schema with wrong type
144+
SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
145+
SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
146+
ROW FORMAT DELIMITED
147+
FIELDS TERMINATED BY ','
148+
LINES TERMINATED BY '\n'
149+
NULL DEFINED AS 'NULL'
150+
USING 'cat' AS (
151+
a string,
152+
b long,
153+
c binary,
154+
d tinyint,
155+
e int,
156+
f smallint,
157+
g long,
158+
h float,
159+
i double,
160+
j decimal(38, 18),
161+
k int,
162+
l long)
163+
ROW FORMAT DELIMITED
164+
FIELDS TERMINATED BY ','
165+
LINES TERMINATED BY '\n'
166+
NULL DEFINED AS 'NULL'
167+
FROM t
168+
) tmp;
169+
170+
-- transform with defined row format delimit LINE TERMINATED BY only support '\n'
171+
SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
172+
SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
173+
ROW FORMAT DELIMITED
174+
FIELDS TERMINATED BY ','
175+
LINES TERMINATED BY '@'
176+
NULL DEFINED AS 'NULL'
177+
USING 'cat' AS (
178+
a string,
179+
b string,
180+
c string,
181+
d string,
182+
e string,
183+
f string,
184+
g string,
185+
h string,
186+
i string,
187+
j string,
188+
k string,
189+
l string)
190+
ROW FORMAT DELIMITED
191+
FIELDS TERMINATED BY ','
192+
LINES TERMINATED BY '@'
193+
NULL DEFINED AS 'NULL'
194+
FROM t
195+
) tmp;

sql/core/src/test/resources/sql-tests/results/transform.sql.out

Lines changed: 139 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 15
2+
-- Number of queries: 18
33

44

55
-- !query
@@ -189,31 +189,32 @@ struct<a:string,b:string>
189189
-- !query
190190
SELECT TRANSFORM(a, b, c, null)
191191
ROW FORMAT DELIMITED
192-
FIELDS TERMINATED BY '|'
192+
FIELDS TERMINATED BY '@'
193193
LINES TERMINATED BY '\n'
194194
NULL DEFINED AS 'NULL'
195195
USING 'cat' AS (a, b, c, d)
196196
ROW FORMAT DELIMITED
197-
FIELDS TERMINATED BY '|'
197+
FIELDS TERMINATED BY '@'
198198
LINES TERMINATED BY '\n'
199199
NULL DEFINED AS 'NULL'
200200
FROM t
201201
-- !query schema
202202
struct<a:string,b:string,c:string,d:string>
203203
-- !query output
204-
1 | true |
205-
2 | false |
204+
1 true Spark SQL null
205+
2 false Spark SQL null
206+
3 true Spark SQL null
206207

207208

208209
-- !query
209210
SELECT TRANSFORM(a, b, c, null)
210211
ROW FORMAT DELIMITED
211-
FIELDS TERMINATED BY '|'
212+
FIELDS TERMINATED BY '@'
212213
LINES TERMINATED BY '\n'
213214
NULL DEFINED AS 'NULL'
214215
USING 'cat' AS (d)
215216
ROW FORMAT DELIMITED
216-
FIELDS TERMINATED BY '||'
217+
FIELDS TERMINATED BY '@'
217218
LINES TERMINATED BY '\n'
218219
NULL DEFINED AS 'NULL'
219220
FROM t
@@ -222,3 +223,134 @@ struct<d:string>
222223
-- !query output
223224
1
224225
2
226+
3
227+
228+
229+
-- !query
230+
SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
231+
SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
232+
ROW FORMAT DELIMITED
233+
FIELDS TERMINATED BY ','
234+
LINES TERMINATED BY '\n'
235+
NULL DEFINED AS 'NULL'
236+
USING 'cat' AS (
237+
a string,
238+
b boolean,
239+
c binary,
240+
d tinyint,
241+
e int,
242+
f smallint,
243+
g long,
244+
h float,
245+
i double,
246+
j decimal(38, 18),
247+
k timestamp,
248+
l date)
249+
ROW FORMAT DELIMITED
250+
FIELDS TERMINATED BY ','
251+
LINES TERMINATED BY '\n'
252+
NULL DEFINED AS 'NULL'
253+
FROM t
254+
) tmp
255+
-- !query schema
256+
struct<a:string,b:boolean,decode(c, UTF-8):string,d:tinyint,e:int,f:smallint,g:bigint,h:float,i:double,j:decimal(38,18),k:timestamp,l:date>
257+
-- !query output
258+
1 true Spark SQL 1 1 100 1 1.0 1.0 1.000000000000000000 1997-01-02 00:00:00 2000-04-01
259+
2 false Spark SQL 2 2 200 2 2.0 2.0 2.000000000000000000 1997-01-02 03:04:05 2000-04-02
260+
3 true Spark SQL 3 3 300 3 3.0 3.0 3.000000000000000000 1997-02-10 17:32:01 2000-04-03
261+
262+
263+
-- !query
264+
SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
265+
SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
266+
ROW FORMAT DELIMITED
267+
FIELDS TERMINATED BY ','
268+
LINES TERMINATED BY '\n'
269+
NULL DEFINED AS 'NULL'
270+
USING 'cat' AS (
271+
a string,
272+
b long,
273+
c binary,
274+
d tinyint,
275+
e int,
276+
f smallint,
277+
g long,
278+
h float,
279+
i double,
280+
j decimal(38, 18),
281+
k int,
282+
l long)
283+
ROW FORMAT DELIMITED
284+
FIELDS TERMINATED BY ','
285+
LINES TERMINATED BY '\n'
286+
NULL DEFINED AS 'NULL'
287+
FROM t
288+
) tmp
289+
-- !query schema
290+
struct<a:string,b:bigint,decode(c, UTF-8):string,d:tinyint,e:int,f:smallint,g:bigint,h:float,i:double,j:decimal(38,18),k:int,l:bigint>
291+
-- !query output
292+
1 NULL Spark SQL 1 1 100 1 1.0 1.0 1.000000000000000000 NULL NULL
293+
2 NULL Spark SQL 2 2 200 2 2.0 2.0 2.000000000000000000 NULL NULL
294+
3 NULL Spark SQL 3 3 300 3 3.0 3.0 3.000000000000000000 NULL NULL
295+
296+
297+
-- !query
298+
SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
299+
SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
300+
ROW FORMAT DELIMITED
301+
FIELDS TERMINATED BY ','
302+
LINES TERMINATED BY '@'
303+
NULL DEFINED AS 'NULL'
304+
USING 'cat' AS (
305+
a string,
306+
b string,
307+
c string,
308+
d string,
309+
e string,
310+
f string,
311+
g string,
312+
h string,
313+
i string,
314+
j string,
315+
k string,
316+
l string)
317+
ROW FORMAT DELIMITED
318+
FIELDS TERMINATED BY ','
319+
LINES TERMINATED BY '@'
320+
NULL DEFINED AS 'NULL'
321+
FROM t
322+
) tmp
323+
-- !query schema
324+
struct<>
325+
-- !query output
326+
org.apache.spark.sql.catalyst.parser.ParseException
327+
328+
LINES TERMINATED BY only supports newline '\n' right now: @(line 3, pos 4)
329+
330+
== SQL ==
331+
SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
332+
SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
333+
ROW FORMAT DELIMITED
334+
----^^^
335+
FIELDS TERMINATED BY ','
336+
LINES TERMINATED BY '@'
337+
NULL DEFINED AS 'NULL'
338+
USING 'cat' AS (
339+
a string,
340+
b string,
341+
c string,
342+
d string,
343+
e string,
344+
f string,
345+
g string,
346+
h string,
347+
i string,
348+
j string,
349+
k string,
350+
l string)
351+
ROW FORMAT DELIMITED
352+
FIELDS TERMINATED BY ','
353+
LINES TERMINATED BY '@'
354+
NULL DEFINED AS 'NULL'
355+
FROM t
356+
) tmp

0 commit comments

Comments
 (0)