diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 41921265b7c70..865b3fc440f12 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -114,6 +114,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. + - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url. ### SQL Operators diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 16e22940495f1..9f92181b34df1 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1357,8 +1357,9 @@ object ParseUrl { 1 """, since = "2.0.0") -case class ParseUrl(children: Seq[Expression]) +case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression with ExpectsInputTypes with CodegenFallback { + def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled) override def nullable: Boolean = true override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType) @@ -1404,7 +1405,9 @@ case class ParseUrl(children: Seq[Expression]) try { new URI(url.toString) } catch { - case e: URISyntaxException => null + case e: URISyntaxException if failOnError => + throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e) + case _: URISyntaxException => null } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index a1b6cec24f23f..730574a4b9846 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -943,6 +943,20 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil) } + test("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val msg = intercept[IllegalArgumentException] { + evaluateWithoutCodegen( + ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST"))) + }.getMessage + assert(msg.contains("Find an invaild url string")) + } + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + checkEvaluation( + ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST")), null) + } + } + test("Sentences") { val nullString = Literal.create(null, StringType) checkEvaluation(Sentences(nullString, nullString, nullString), null)