Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,18 @@ package object util {
def toCommentSafeString(str: String): String = {
val len = math.min(str.length, 128)
val suffix = if (str.length > len) "..." else ""
str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + suffix

Copy link
Contributor

@davies davies May 16, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only need to make sure that the comment string does not have */ in it, *\/ will be OK, one simpler solution could be

str.substring(0, len).replaceAll("(\\*|(u002A))(/|(\\\\u002F))", "$1\\\\/") + suffix

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the advice.
I think "\u" should be escaped too otherwise, the compilation will fail when invalid unicode characters, like \u002X or \u001, are in literals.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, LGTM

// Unicode literals, like \u0022, should be escaped before
// they are put in code comment to avoid codegen breaking.
// To escape them, single "\" should be prepended to a series of "\" just before "u"
// only when the number of "\" is odd.
// For example, \u0022 should become to \\u0022
// but \\u0022 should not become to \\\u0022 because the first backslash escapes the second one,
// and \u0022 will remain, means not escaped.
// Otherwise, the runtime Java compiler will fail to compile or code injection can be allowed.
// For details, see SPARK-15165.
str.substring(0, len).replace("*/", "*\\/")
.replaceAll("(^|[^\\\\])(\\\\(\\\\\\\\)*u)", "$1\\\\$2") + suffix
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about we also have a comment at here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, make sense.
I've added.

Copy link

@mhseiden mhseiden May 10, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the implementation of org.apache.commons.lang3.StringEscapeUtils.escapeJava(...) sufficient to cover this case, instead of a custom regex?

Copy link
Member Author

@sarutak sarutak May 10, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion @mhseiden .
I tried escaping by escapeJava and it may fix this issue but I noticed it escapes all of "", means the number of "" will be doubled.
For example, \\\u0022 will be \\\\\\u0022 but I expects only "" just before "u" will be escaped if the number of "" is odd.

}

/* FIX ME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,48 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
true,
InternalRow(UTF8String.fromString("\\u")))
}

test("check compilation error doesn't occur caused by specific literal") {
// The end of comment (*/) should be escaped.
GenerateUnsafeProjection.generate(
Literal.create("*/Compilation error occurs/*", StringType) :: Nil)

// `\u002A` is `*` and `\u002F` is `/`
// so if the end of comment consists of those characters in queries, we need to escape them.
GenerateUnsafeProjection.generate(
Literal.create("\\u002A/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u002A/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\u002a/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u002a/Compilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\u002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\\\u002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\002fCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("*\\\\002fCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\002A\\002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\002A\\002FCompilation error occurs/*", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\002A\\\\002FCompilation error occurs/*", StringType) :: Nil)

// \ u002X is an invalid unicode literal so it should be escaped.
GenerateUnsafeProjection.generate(
Literal.create("\\u002X/Compilation error occurs", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u002X/Compilation error occurs", StringType) :: Nil)

// \ u001 is an invalid unicode literal so it should be escaped.
GenerateUnsafeProjection.generate(
Literal.create("\\u001/Compilation error occurs", StringType) :: Nil)
GenerateUnsafeProjection.generate(
Literal.create("\\\\u001/Compilation error occurs", StringType) :: Nil)

}
}
264 changes: 264 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2496,4 +2496,268 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
}

test("check code injection is prevented") {
// The end of comment (*/) should be escaped.
var literal =
"""|*/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
var expected =
"""|*/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

// `\u002A` is `*` and `\u002F` is `/`
// so if the end of comment consists of those characters in queries, we need to escape them.
literal =
"""|\\u002A/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002A/"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002A/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|\\u002A/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\u002a/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002a/"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002a/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|\\u002a/
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"*\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|*\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\u002f
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"*\\u002f"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|*\\\\u002f
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|*\\u002f
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\u002A\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002A\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002A\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\\\u002A\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\u002A\\\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
s"""|${"\\u002A\\\\u002F"}
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)

literal =
"""|\\\\u002A\\\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
expected =
"""|\\u002A\\u002F
|{
| new Object() {
| void f() { throw new RuntimeException("This exception is injected."); }
| }.f();
|}
|/*""".stripMargin
checkAnswer(
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)
}
}