Skip to content

Commit e7d5344

Browse files
turboFeimaropu
authored andcommitted
[SPARK-33100][SQL][3.0] Ignore a semicolon inside a bracketed comment in spark-sql
### What changes were proposed in this pull request? Now the spark-sql does not support parse the sql statements with bracketed comments. For the sql statements: ``` /* SELECT 'test'; */ SELECT 'test'; ``` Would be split to two statements: The first one: `/* SELECT 'test'` The second one: `*/ SELECT 'test'` Then it would throw an exception because the first one is illegal. In this PR, we ignore the content in bracketed comments while splitting the sql statements. Besides, we ignore the comment without any content. NOTE: This backport comes from #29982 ### Why are the changes needed? Spark-sql might split the statements inside bracketed comments and it is not correct. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added UT. Closes #31033 from turboFei/SPARK-33100. Authored-by: fwang12 <fwang12@ebay.com> Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
1 parent c9c3d6f commit e7d5344

2 files changed

Lines changed: 65 additions & 8 deletions

File tree

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -518,15 +518,32 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
518518
// Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
519519
// string, the origin implementation from Hive will not drop the trailing semicolon as expected,
520520
// hence we refined this function a little bit.
521+
// Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
521522
private def splitSemiColon(line: String): JList[String] = {
522523
var insideSingleQuote = false
523524
var insideDoubleQuote = false
524-
var insideComment = false
525+
var insideSimpleComment = false
526+
var bracketedCommentLevel = 0
525527
var escape = false
526528
var beginIndex = 0
529+
var leavingBracketedComment = false
530+
var isStatement = false
527531
val ret = new JArrayList[String]
528532

533+
def insideBracketedComment: Boolean = bracketedCommentLevel > 0
534+
def insideComment: Boolean = insideSimpleComment || insideBracketedComment
535+
def statementInProgress(index: Int): Boolean = isStatement || (!insideComment &&
536+
index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
537+
529538
for (index <- 0 until line.length) {
539+
// Checks if we need to decrement a bracketed comment level; the last character '/' of
540+
// bracketed comments is still inside the comment, so `insideBracketedComment` must keep true
541+
// in the previous loop and we decrement the level here if needed.
542+
if (leavingBracketedComment) {
543+
bracketedCommentLevel -= 1
544+
leavingBracketedComment = false
545+
}
546+
530547
if (line.charAt(index) == '\'' && !insideComment) {
531548
// take a look to see if it is escaped
532549
// See the comment above about SPARK-31595
@@ -549,21 +566,34 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
549566
// Sample query: select "quoted value --"
550567
// ^^ avoids starting a comment if it's inside quotes.
551568
} else if (hasNext && line.charAt(index + 1) == '-') {
552-
// ignore quotes and ;
553-
insideComment = true
569+
// ignore quotes and ; in simple comment
570+
insideSimpleComment = true
554571
}
555572
} else if (line.charAt(index) == ';') {
556573
if (insideSingleQuote || insideDoubleQuote || insideComment) {
557574
// do not split
558575
} else {
559-
// split, do not include ; itself
560-
ret.add(line.substring(beginIndex, index))
576+
if (isStatement) {
577+
// split, do not include ; itself
578+
ret.add(line.substring(beginIndex, index))
579+
}
561580
beginIndex = index + 1
581+
isStatement = false
562582
}
563583
} else if (line.charAt(index) == '\n') {
564-
// with a new line the inline comment should end.
584+
// with a new line the inline simple comment should end.
565585
if (!escape) {
566-
insideComment = false
586+
insideSimpleComment = false
587+
}
588+
} else if (line.charAt(index) == '/' && !insideSimpleComment) {
589+
val hasNext = index + 1 < line.length
590+
if (insideSingleQuote || insideDoubleQuote) {
591+
// Ignores '/' in any case of quotes
592+
} else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
593+
// Decrements `bracketedCommentLevel` at the beginning of the next loop
594+
leavingBracketedComment = true
595+
} else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
596+
bracketedCommentLevel += 1
567597
}
568598
}
569599
// set the escape
@@ -572,8 +602,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
572602
} else if (line.charAt(index) == '\\') {
573603
escape = true
574604
}
605+
606+
isStatement = statementInProgress(index)
607+
}
608+
if (isStatement) {
609+
ret.add(line.substring(beginIndex))
575610
}
576-
ret.add(line.substring(beginIndex))
577611
ret
578612
}
579613
}

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,4 +551,27 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
551551
errorResponses = Seq("AnalysisException"))(
552552
("", "Error in query: The second argument of 'date_sub' function needs to be an integer."))
553553
}
554+
555+
test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") {
556+
runCliWithin(4.minute)(
557+
"/* SELECT 'test';*/ SELECT 'test';" -> "test",
558+
";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
559+
"/* SELECT 'test';*/;; SELECT 'test';" -> "test",
560+
"SELECT 'test'; -- SELECT 'test';" -> "test",
561+
"SELECT 'test'; /* SELECT 'test';*/;" -> "test",
562+
"/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
563+
"/*\nmulti-line\n*/ SELECT 'test';" -> "test",
564+
"/*/* multi-level bracketed*/ SELECT 'test';" -> "test"
565+
)
566+
}
567+
568+
test("SPARK-33100: test sql statements with hint in bracketed comment") {
569+
runCliWithin(2.minute)(
570+
"CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES(1, 2) AS t1(k, v);" -> "",
571+
"CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES(2, 1) AS t2(k, v);" -> "",
572+
"EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin",
573+
"EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;"
574+
-> "BroadcastHashJoin"
575+
)
576+
}
554577
}

0 commit comments

Comments
 (0)