Skip to content

Commit fc4b49c

Browse files
cloud-fandongjoon-hyun
authored andcommitted
[SPARK-31503][SQL] fix the SQL string of the TRIM functions
### What changes were proposed in this pull request? override the `sql` method of `StringTrim`, `StringTrimLeft` and `StringTrimRight`, to use the standard SQL syntax. ### Why are the changes needed? The current implementation is wrong. It gives you a SQL string that returns different result. ### Does this PR introduce any user-facing change? No ### How was this patch tested? new tests Closes #28281 from cloud-fan/sql. Authored-by: Wenchen Fan <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> (cherry picked from commit b209b5f) Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent c5eac36 commit fc4b49c

4 files changed

Lines changed: 34 additions & 25 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -683,11 +683,22 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi
683683

684684
trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
685685

686+
protected def srcStr: Expression
687+
protected def trimStr: Option[Expression]
688+
protected def direction: String
689+
690+
override def children: Seq[Expression] = srcStr +: trimStr.toSeq
686691
override def dataType: DataType = StringType
687692
override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
688693

689694
override def nullable: Boolean = children.exists(_.nullable)
690695
override def foldable: Boolean = children.forall(_.foldable)
696+
697+
override def sql: String = if (trimStr.isDefined) {
698+
s"TRIM($direction ${trimStr.get.sql} FROM ${srcStr.sql})"
699+
} else {
700+
super.sql
701+
}
691702
}
692703

693704
object StringTrim {
@@ -769,11 +780,8 @@ case class StringTrim(
769780

770781
override def prettyName: String = "trim"
771782

772-
override def children: Seq[Expression] = if (trimStr.isDefined) {
773-
srcStr :: trimStr.get :: Nil
774-
} else {
775-
srcStr :: Nil
776-
}
783+
override protected def direction: String = "BOTH"
784+
777785
override def eval(input: InternalRow): Any = {
778786
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
779787
if (srcString == null) {
@@ -865,11 +873,7 @@ case class StringTrimLeft(
865873

866874
override def prettyName: String = "ltrim"
867875

868-
override def children: Seq[Expression] = if (trimStr.isDefined) {
869-
srcStr :: trimStr.get :: Nil
870-
} else {
871-
srcStr :: Nil
872-
}
876+
override protected def direction: String = "LEADING"
873877

874878
override def eval(input: InternalRow): Any = {
875879
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
@@ -964,11 +968,7 @@ case class StringTrimRight(
964968

965969
override def prettyName: String = "rtrim"
966970

967-
override def children: Seq[Expression] = if (trimStr.isDefined) {
968-
srcStr :: trimStr.get :: Nil
969-
} else {
970-
srcStr :: Nil
971-
}
971+
override protected def direction: String = "TRAILING"
972972

973973
override def eval(input: InternalRow): Any = {
974974
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]

sql/core/src/test/resources/sql-tests/inputs/string-functions.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ SELECT substring('Spark SQL' from -3);
4040
SELECT substring('Spark SQL' from 5 for 1);
4141

4242
-- trim
43+
SELECT trim(" xyz "), ltrim(" xyz "), rtrim(" xyz ");
4344
SELECT trim(BOTH 'xyz' FROM 'yxTomxx'), trim('xyz' FROM 'yxTomxx');
4445
SELECT trim(BOTH 'x' FROM 'xxxbarxxx'), trim('x' FROM 'xxxbarxxx');
4546
SELECT trim(LEADING 'xyz' FROM 'zzzytest');

sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -977,7 +977,7 @@ struct<repeat(Pg, -4):string>
977977
-- !query
978978
SELECT trim(binary('\\000') from binary('\\000Tom\\000'))
979979
-- !query schema
980-
struct<trim(CAST(CAST(\000Tom\000 AS BINARY) AS STRING), CAST(CAST(\000 AS BINARY) AS STRING)):string>
980+
struct<TRIM(BOTH CAST(CAST(\000 AS BINARY) AS STRING) FROM CAST(CAST(\000Tom\000 AS BINARY) AS STRING)):string>
981981
-- !query output
982982
Tom
983983

sql/core/src/test/resources/sql-tests/results/string-functions.sql.out

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 33
2+
-- Number of queries: 34
33

44

55
-- !query
@@ -204,65 +204,73 @@ struct<substring(Spark SQL, 5, 1):string>
204204
k
205205

206206

207+
-- !query
208+
SELECT trim(" xyz "), ltrim(" xyz "), rtrim(" xyz ")
209+
-- !query schema
210+
struct<trim( xyz ):string,ltrim( xyz ):string,rtrim( xyz ):string>
211+
-- !query output
212+
xyz xyz xyz
213+
214+
207215
-- !query
208216
SELECT trim(BOTH 'xyz' FROM 'yxTomxx'), trim('xyz' FROM 'yxTomxx')
209217
-- !query schema
210-
struct<trim(yxTomxx, xyz):string,trim(yxTomxx, xyz):string>
218+
struct<TRIM(BOTH xyz FROM yxTomxx):string,TRIM(BOTH xyz FROM yxTomxx):string>
211219
-- !query output
212220
Tom Tom
213221

214222

215223
-- !query
216224
SELECT trim(BOTH 'x' FROM 'xxxbarxxx'), trim('x' FROM 'xxxbarxxx')
217225
-- !query schema
218-
struct<trim(xxxbarxxx, x):string,trim(xxxbarxxx, x):string>
226+
struct<TRIM(BOTH x FROM xxxbarxxx):string,TRIM(BOTH x FROM xxxbarxxx):string>
219227
-- !query output
220228
bar bar
221229

222230

223231
-- !query
224232
SELECT trim(LEADING 'xyz' FROM 'zzzytest')
225233
-- !query schema
226-
struct<ltrim(zzzytest, xyz):string>
234+
struct<TRIM(LEADING xyz FROM zzzytest):string>
227235
-- !query output
228236
test
229237

230238

231239
-- !query
232240
SELECT trim(LEADING 'xyz' FROM 'zzzytestxyz')
233241
-- !query schema
234-
struct<ltrim(zzzytestxyz, xyz):string>
242+
struct<TRIM(LEADING xyz FROM zzzytestxyz):string>
235243
-- !query output
236244
testxyz
237245

238246

239247
-- !query
240248
SELECT trim(LEADING 'xy' FROM 'xyxXxyLAST WORD')
241249
-- !query schema
242-
struct<ltrim(xyxXxyLAST WORD, xy):string>
250+
struct<TRIM(LEADING xy FROM xyxXxyLAST WORD):string>
243251
-- !query output
244252
XxyLAST WORD
245253

246254

247255
-- !query
248256
SELECT trim(TRAILING 'xyz' FROM 'testxxzx')
249257
-- !query schema
250-
struct<rtrim(testxxzx, xyz):string>
258+
struct<TRIM(TRAILING xyz FROM testxxzx):string>
251259
-- !query output
252260
test
253261

254262

255263
-- !query
256264
SELECT trim(TRAILING 'xyz' FROM 'xyztestxxzx')
257265
-- !query schema
258-
struct<rtrim(xyztestxxzx, xyz):string>
266+
struct<TRIM(TRAILING xyz FROM xyztestxxzx):string>
259267
-- !query output
260268
xyztest
261269

262270

263271
-- !query
264272
SELECT trim(TRAILING 'xy' FROM 'TURNERyxXxy')
265273
-- !query schema
266-
struct<rtrim(TURNERyxXxy, xy):string>
274+
struct<TRIM(TRAILING xy FROM TURNERyxXxy):string>
267275
-- !query output
268276
TURNERyxX

0 commit comments

Comments
 (0)