Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -519,14 +519,22 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
// Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
// string, the origin implementation from Hive will not drop the trailing semicolon as expected,
// hence we refined this function a little bit.
// Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
private def splitSemiColon(line: String): JList[String] = {
var insideSingleQuote = false
var insideDoubleQuote = false
var insideComment = false
var insideSimpleComment = false
var bracketedCommentLevel = 0
var escape = false
var beginIndex = 0
var includingStatement = false
val ret = new JArrayList[String]

def insideBracketedComment: Boolean = bracketedCommentLevel > 0
def insideComment: Boolean = insideSimpleComment || insideBracketedComment
def statementBegin(index: Int): Boolean = includingStatement || (!insideComment &&
index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)

for (index <- 0 until line.length) {
if (line.charAt(index) == '\'' && !insideComment) {
// take a look to see if it is escaped
Expand All @@ -550,21 +558,33 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
// Sample query: select "quoted value --"
// ^^ avoids starting a comment if it's inside quotes.
} else if (hasNext && line.charAt(index + 1) == '-') {
// ignore quotes and ;
insideComment = true
// ignore quotes and ; in simple comment
insideSimpleComment = true
}
} else if (line.charAt(index) == ';') {
if (insideSingleQuote || insideDoubleQuote || insideComment) {
// do not split
} else {
// split, do not include ; itself
ret.add(line.substring(beginIndex, index))
if (includingStatement) {
// split, do not include ; itself
ret.add(line.substring(beginIndex, index))
}
beginIndex = index + 1
includingStatement = false
}
} else if (line.charAt(index) == '\n') {
// with a new line the inline comment should end.
// with a new line the inline simple comment should end.
if (!escape) {
insideComment = false
insideSimpleComment = false
}
} else if (line.charAt(index) == '/' && !insideSimpleComment) {
val hasNext = index + 1 < line.length
if (insideSingleQuote || insideDoubleQuote) {
// Ignores '/' in any case of quotes
} else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
bracketedCommentLevel -= 1
} else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
bracketedCommentLevel += 1
}
}
// set the escape
Expand All @@ -573,8 +593,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
} else if (line.charAt(index) == '\\') {
escape = true
}

includingStatement = statementBegin(index)
}
if (includingStatement) {
ret.add(line.substring(beginIndex))
}
ret.add(line.substring(beginIndex))
ret
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -573,4 +573,27 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
// the date formatter for `java.sql.LocalDate` must output negative years with sign.
runCliWithin(1.minute)("SELECT MAKE_DATE(-44, 3, 15);" -> "-0044-03-15")
}

test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") {
runCliWithin(4.minute)(
"/* SELECT 'test';*/ SELECT 'test';" -> "test",
";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
"/* SELECT 'test';*/;; SELECT 'test';" -> "test",
"SELECT 'test'; -- SELECT 'test';" -> "",
"SELECT 'test'; /* SELECT 'test';*/;" -> "",
"/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
"/*\nmulti-line\n*/ SELECT 'test';" -> "test",
"/*/* multi-level bracketed*/ SELECT 'test';" -> "test"
)
}

test("SPARK-33100: test sql statements with hint in bracketed comment") {
runCliWithin(2.minute)(
"CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES(1, 2) AS t1(k, v);" -> "",
"CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES(2, 1) AS t2(k, v);" -> "",
"EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin",
"EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;"
-> "BroadcastHashJoin"
)
}
}