From 62bf87fbdb29f33d7c5bd0fad2bec117889f9acd Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 17 Nov 2020 23:37:27 +0800 Subject: [PATCH 1/7] init --- .../catalyst/expressions/stringExpressions.scala | 5 ++++- .../expressions/StringExpressionsSuite.scala | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 16e22940495f..c89bbebfe2d5 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1404,7 +1404,10 @@ case class ParseUrl(children: Seq[Expression]) try { new URI(url.toString) } catch { - case e: URISyntaxException => null + // We fail on error if in ansi mode. + case e: URISyntaxException if SQLConf.get.ansiEnabled => + throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e) + case _: URISyntaxException => null } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index a1b6cec24f23..e74731ae05e0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -943,6 +943,21 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil) } + test("SPARK-33468: ParseUrl should fail if input string is not a valid url") { + // fail on error if in ansi mode. + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val msg = intercept[IllegalArgumentException] { + evaluateWithoutCodegen( + ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST"))) + }.getMessage + assert(msg.contains("Find an invaild url string")) + } + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + checkEvaluation( + ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST")), null) + } + } + test("Sentences") { val nullString = Literal.create(null, StringType) checkEvaluation(Sentences(nullString, nullString, nullString), null) From 5aec76a8934ecbffb27c3a6b5927effc34c2f57c Mon Sep 17 00:00:00 2001 From: ulysses Date: Wed, 18 Nov 2020 01:26:09 +0800 Subject: [PATCH 2/7] remove comment --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 1 - .../spark/sql/catalyst/expressions/StringExpressionsSuite.scala | 1 - 2 files changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index c89bbebfe2d5..951c4eae17fe 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1404,7 +1404,6 @@ case class ParseUrl(children: Seq[Expression]) try { new URI(url.toString) } catch { - // We fail on error if in ansi mode. case e: URISyntaxException if SQLConf.get.ansiEnabled => throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e) case _: URISyntaxException => null diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index e74731ae05e0..a5cbe36981aa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -944,7 +944,6 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("SPARK-33468: ParseUrl should fail if input string is not a valid url") { - // fail on error if in ansi mode. withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { val msg = intercept[IllegalArgumentException] { evaluateWithoutCodegen( From a408734cbbedf7830d15e606cbe09f019275173f Mon Sep 17 00:00:00 2001 From: ulysses Date: Wed, 18 Nov 2020 01:27:01 +0800 Subject: [PATCH 3/7] update title --- .../spark/sql/catalyst/expressions/StringExpressionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index a5cbe36981aa..730574a4b984 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -943,7 +943,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil) } - test("SPARK-33468: ParseUrl should fail if input string is not a valid url") { + test("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") { withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { val msg = intercept[IllegalArgumentException] { evaluateWithoutCodegen( From 2202ca4bb4294083691e1af774f167f87404ae40 Mon Sep 17 00:00:00 2001 From: ulysses Date: Wed, 18 Nov 2020 12:13:25 +0800 Subject: [PATCH 4/7] doc --- docs/sql-ref-ansi-compliance.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 41921265b7c7..be427fddbaf9 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -114,6 +114,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. + - `parse_url`: This function throws `IllegalArgumentException` if input string is not a valid url. ### SQL Operators From 497015cdbb27410ad9a3cae49e8b72d0807e3849 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Fri, 20 Nov 2020 08:15:04 +0800 Subject: [PATCH 5/7] add parameter --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 951c4eae17fe..7598ce86e5e0 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1357,7 +1357,7 @@ object ParseUrl { 1 """, since = "2.0.0") -case class ParseUrl(children: Seq[Expression]) +case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression with ExpectsInputTypes with CodegenFallback { override def nullable: Boolean = true @@ -1404,7 +1404,7 @@ case class ParseUrl(children: Seq[Expression]) try { new URI(url.toString) } catch { - case e: URISyntaxException if SQLConf.get.ansiEnabled => + case e: URISyntaxException if failOnError => throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e) case _: URISyntaxException => null } From ecd21436c00881c0a3ae7dc1e797bc4d5135064d Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Fri, 20 Nov 2020 09:16:51 +0800 Subject: [PATCH 6/7] doc --- docs/sql-ref-ansi-compliance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index be427fddbaf9..865b3fc440f1 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -114,7 +114,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - - `parse_url`: This function throws `IllegalArgumentException` if input string is not a valid url. + - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url. ### SQL Operators From d7e437b326dc9564a9460946bdbc0856e6876322 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Fri, 20 Nov 2020 10:42:17 +0800 Subject: [PATCH 7/7] add constructor --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 7598ce86e5e0..9f92181b34df 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1359,6 +1359,7 @@ object ParseUrl { since = "2.0.0") case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression with ExpectsInputTypes with CodegenFallback { + def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled) override def nullable: Boolean = true override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType)