From eff6d77073b9dc24fdbdabdd837cfffff7fe2022 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 6 Jun 2024 16:51:31 +0800 Subject: [PATCH 01/12] [SPARK-48548][SQL] Perf improvement for escapePathName --- .../catalog/ExternalCatalogUtils.scala | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index eb649c4d4796..727538b04516 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -63,22 +63,26 @@ object ExternalCatalogUtils { bitSet } - def needsEscaping(c: Char): Boolean = { + @inline private final def needsEscaping(c: Char): Boolean = { c < charToEscape.size() && charToEscape.get(c) } def escapePathName(path: String): String = { - val builder = new StringBuilder() - path.foreach { c => - if (needsEscaping(c)) { - builder.append('%') - builder.append(f"${c.asInstanceOf[Int]}%02X") - } else { - builder.append(c) + val firstIndex = path.indexWhere(needsEscaping) + if (firstIndex == -1) { + path + } else { + val builder = new StringBuilder(path.substring(0, firstIndex)) + path.substring(firstIndex).foreach { c => + if (needsEscaping(c)) { + builder.append('%') + builder.append(f"${c.asInstanceOf[Int]}%02X") + } else { + builder.append(c) + } } + builder.toString() } - - builder.toString() } From 019cfb26461d35ee8022b5b820ed7b4918b7bbbd Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 14:24:48 +0800 Subject: [PATCH 02/12] [SPARK-48548][SQL] Perf improvement for escapePathName --- .../catalog/ExternalCatalogUtils.scala | 28 +++++---- .../spark/sql/EscapePathBenchmark.scala | 63 +++++++++++++++++++ 2 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/EscapePathBenchmark.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 727538b04516..dcd491f1fe2b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -40,7 +40,7 @@ object ExternalCatalogUtils { // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils). ////////////////////////////////////////////////////////////////////////////////////////////////// - val charToEscape = { + final val charToEscape = { val bitSet = new java.util.BitSet(128) /** @@ -63,29 +63,35 @@ object ExternalCatalogUtils { bitSet } - @inline private final def needsEscaping(c: Char): Boolean = { + private final val HEX_CHARS = "0123456789ABCDEF".toCharArray + + @inline final def needsEscaping(c: Char): Boolean = { c < charToEscape.size() && charToEscape.get(c) } def escapePathName(path: String): String = { - val firstIndex = path.indexWhere(needsEscaping) - if (firstIndex == -1) { + var firstIndex = 0 + val length = path.length + while (firstIndex < length && !needsEscaping(path.charAt(firstIndex))) { + firstIndex += 1 + } + if (firstIndex == 0) { path } else { - val builder = new StringBuilder(path.substring(0, firstIndex)) - path.substring(firstIndex).foreach { c => + val sb = new StringBuilder(path.substring(0, firstIndex)) + while(firstIndex < length) { + val c = path.charAt(firstIndex) if (needsEscaping(c)) { - builder.append('%') - builder.append(f"${c.asInstanceOf[Int]}%02X") + sb.append('%').append(HEX_CHARS((c & 0xF0) >> 4)).append(HEX_CHARS(c & 0x0F)) } else { - builder.append(c) + sb.append(c) } + firstIndex += 1 } - builder.toString() + sb.toString() } } - def unescapePathName(path: String): String = { val sb = new StringBuilder var i = 0 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EscapePathBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/EscapePathBenchmark.scala new file mode 100644 index 000000000000..c94bb31b7ce9 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/EscapePathBenchmark.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils + +object EscapePathBenchmark extends BenchmarkBase { + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val N = 1000000 + runBenchmark("Escape") { + val benchmark = new Benchmark("Escape Tests", N, 10, output = output) + val paths = Seq( + "https://issues.apache.org/jira/browse/SPARK-48551", + "https...issues.apache.org/jira/browse/SPARK-48551", + "https...issues.apache.org.jira/browse/SPARK-48551", + "https...issues.apache.org.jira.browse/SPARK-48551", + "https...issues.apache.org.jira.browse.SPARK-48551") + benchmark.addCase("Legacy") { _ => + (1 to N).foreach(_ => paths.foreach(escapePathNameLegacy)) + } + + benchmark.addCase("New") { _ => + (1 to N).foreach(_ => { + paths.foreach(ExternalCatalogUtils.escapePathName) + }) + } + benchmark.run() + } + } + + /** + * Legacy implementation of escapePathName before Spark 4.0 + */ + def escapePathNameLegacy(path: String): String = { + val builder = new StringBuilder() + path.foreach { c => + if (ExternalCatalogUtils.needsEscaping(c)) { + builder.append('%') + builder.append(f"${c.asInstanceOf[Int]}%02X") + } else { + builder.append(c) + } + } + + builder.toString() + } +} From d04b589ff9bb82ece9da5aad9d35fc92def0021c Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 14:40:22 +0800 Subject: [PATCH 03/12] Update sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala Co-authored-by: Josh Rosen --- .../spark/sql/catalyst/catalog/ExternalCatalogUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index dcd491f1fe2b..0787281fc1a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -78,7 +78,8 @@ object ExternalCatalogUtils { if (firstIndex == 0) { path } else { - val sb = new StringBuilder(path.substring(0, firstIndex)) + val sb = new java.lang.StringBuilder(length + 16) + sb.append(path.substring(0, firstIndex)) while(firstIndex < length) { val c = path.charAt(firstIndex) if (needsEscaping(c)) { From 3fac8fc90f61d9187e8e20b5d0b759fa8828b3d4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 15:07:23 +0800 Subject: [PATCH 04/12] Update sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala Co-authored-by: Josh Rosen --- .../spark/sql/catalyst/catalog/ExternalCatalogUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 0787281fc1a2..98b1d7335cca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -79,7 +79,7 @@ object ExternalCatalogUtils { path } else { val sb = new java.lang.StringBuilder(length + 16) - sb.append(path.substring(0, firstIndex)) + sb.append(path, 0, firstIndex) while(firstIndex < length) { val c = path.charAt(firstIndex) if (needsEscaping(c)) { From e3e783d1fad4519f71c7bdc0f8bc7095d52ddd8f Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 15:27:53 +0800 Subject: [PATCH 05/12] tests --- .../catalog/ExternalCatalogUtils.scala | 2 +- .../catalog/ExternalCatalogUtilsSuite.scala | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 98b1d7335cca..3e84ca4cfa13 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -75,7 +75,7 @@ object ExternalCatalogUtils { while (firstIndex < length && !needsEscaping(path.charAt(firstIndex))) { firstIndex += 1 } - if (firstIndex == 0) { + if (firstIndex == 0 && !needsEscaping(path.charAt(firstIndex))) { path } else { val sb = new java.lang.StringBuilder(length + 16) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala new file mode 100644 index 000000000000..e03c81eaa9e7 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.catalog + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName + +class ExternalCatalogUtilsSuite extends SparkFunSuite { + + test("SPARK-48551: escapePathName") { + ExternalCatalogUtils.charToEscape.stream().toArray.map(_.asInstanceOf[Char]).foreach { c => + // Check parity with old conversion technique: + assert(escapePathName(c.toString) === "%" + f"$c%02X", + s"wrong escaping for $c") + } + assert(escapePathName("a b") === "a b") + assert(escapePathName("a:b") === "a%3Ab") + assert(escapePathName("a%b") === "a%25b") + assert(escapePathName("a,b") === "a,b") + assert(escapePathName("a/b") === "a%2Fb") + } +} From 072a957b0863ded5fa04eb635a4639f2683a56ff Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 15:42:40 +0800 Subject: [PATCH 06/12] nit --- .../sql/catalyst/catalog/ExternalCatalogUtils.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 3e84ca4cfa13..0f3f353cb011 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -40,7 +40,7 @@ object ExternalCatalogUtils { // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils). ////////////////////////////////////////////////////////////////////////////////////////////////// - final val charToEscape = { + final val (charToEscape, sizeOfCharToEscape) = { val bitSet = new java.util.BitSet(128) /** @@ -60,22 +60,22 @@ object ExternalCatalogUtils { Array(' ', '<', '>', '|').foreach(bitSet.set(_)) } - bitSet + (bitSet, bitSet.size) } private final val HEX_CHARS = "0123456789ABCDEF".toCharArray @inline final def needsEscaping(c: Char): Boolean = { - c < charToEscape.size() && charToEscape.get(c) + c < sizeOfCharToEscape && charToEscape.get(c) } def escapePathName(path: String): String = { - var firstIndex = 0 val length = path.length + var firstIndex = 0 while (firstIndex < length && !needsEscaping(path.charAt(firstIndex))) { firstIndex += 1 } - if (firstIndex == 0 && !needsEscaping(path.charAt(firstIndex))) { + if (firstIndex == 0 && !needsEscaping(path.charAt(0))) { path } else { val sb = new java.lang.StringBuilder(length + 16) From e1189178f5c9690632d6aa4c65e955dccf8405c4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 15:47:32 +0800 Subject: [PATCH 07/12] address comments --- .../org/apache/spark/sql/catalyst}/EscapePathBenchmark.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) rename sql/{core/src/test/scala/org/apache/spark/sql => catalyst/src/test/scala/org/apache/spark/sql/catalyst}/EscapePathBenchmark.scala (98%) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/EscapePathBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala similarity index 98% rename from sql/core/src/test/scala/org/apache/spark/sql/EscapePathBenchmark.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala index c94bb31b7ce9..5ec93592ec1e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/EscapePathBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala @@ -14,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.spark.sql +package org.apache.spark.sql.catalyst import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils From bac8b9d0875400b58894117272183db75dbc3c16 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 16:11:59 +0800 Subject: [PATCH 08/12] update golden files --- .../benchmarks/EscapePathBenchmark-jdk21-results.txt | 12 ++++++++++++ .../benchmarks/EscapePathBenchmark-results.txt | 12 ++++++++++++ .../sql/catalyst/catalog/ExternalCatalogUtils.scala | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt create mode 100644 sql/catalyst/benchmarks/EscapePathBenchmark-results.txt diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt new file mode 100644 index 000000000000..4fffb9bfd49a --- /dev/null +++ b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt @@ -0,0 +1,12 @@ +================================================================================================ +Escape +================================================================================================ + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure +AMD EPYC 7763 64-Core Processor +Escape Tests: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Legacy 7128 7146 8 0.1 7127.9 1.0X +New 790 795 5 1.3 789.7 9.0X + + diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt new file mode 100644 index 000000000000..32e44f6e19ef --- /dev/null +++ b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt @@ -0,0 +1,12 @@ +================================================================================================ +Escape +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure +AMD EPYC 7763 64-Core Processor +Escape Tests: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +Legacy 6719 6726 6 0.1 6719.3 1.0X +New 735 744 21 1.4 735.3 9.1X + + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 0f3f353cb011..cab5bb42ce68 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -89,7 +89,7 @@ object ExternalCatalogUtils { } firstIndex += 1 } - sb.toString() + sb.toString } } From c6a0cf7eb7021470774f09b04b72949da74a19bf Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 7 Jun 2024 22:11:02 +0800 Subject: [PATCH 09/12] corner case --- .../spark/sql/catalyst/catalog/ExternalCatalogUtils.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index cab5bb42ce68..00280a695131 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -70,6 +70,9 @@ object ExternalCatalogUtils { } def escapePathName(path: String): String = { + if (path == null || path.isEmpty) { + return path + } val length = path.length var firstIndex = 0 while (firstIndex < length && !needsEscaping(path.charAt(firstIndex))) { From 74dee2bcc48a3ed61f7ec0f99047f1909c207d6e Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 11 Jun 2024 12:09:20 +0800 Subject: [PATCH 10/12] address comments --- .../spark/sql/catalyst/catalog/ExternalCatalogUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 00280a695131..60c064a61a0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -78,7 +78,7 @@ object ExternalCatalogUtils { while (firstIndex < length && !needsEscaping(path.charAt(firstIndex))) { firstIndex += 1 } - if (firstIndex == 0 && !needsEscaping(path.charAt(0))) { + if (firstIndex == length) { path } else { val sb = new java.lang.StringBuilder(length + 16) From bd38e8bd8bdbd337bbc5fd9dd0a7566303ef5a95 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 11 Jun 2024 13:40:53 +0800 Subject: [PATCH 11/12] address comments --- .../spark/sql/catalyst/EscapePathBenchmark.scala | 12 ++++++++++++ .../catalyst/catalog/ExternalCatalogUtilsSuite.scala | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala index 5ec93592ec1e..ce277456e0be 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala @@ -19,6 +19,18 @@ package org.apache.spark.sql.catalyst import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils +/** + * Benchmark for path escaping + * To run this benchmark: + * {{{ + * 1. without sbt: + * bin/spark-submit --class --jars + * 2. build/sbt "catalyst/Test/runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain " + * Results will be written to "benchmarks/EscapePathBenchmark-results.txt". + * }}} + */ object EscapePathBenchmark extends BenchmarkBase { override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val N = 1000000 diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala index e03c81eaa9e7..843ffc037d6c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala @@ -28,8 +28,13 @@ class ExternalCatalogUtilsSuite extends SparkFunSuite { assert(escapePathName(c.toString) === "%" + f"$c%02X", s"wrong escaping for $c") } + assert(escapePathName("") === "") + assert(escapePathName(" ") === " ") + assert(escapePathName("\n") === "%0A") assert(escapePathName("a b") === "a b") assert(escapePathName("a:b") === "a%3Ab") + assert(escapePathName(":ab") === "%3Aab") + assert(escapePathName("ab:") === "ab%3A") assert(escapePathName("a%b") === "a%25b") assert(escapePathName("a,b") === "a,b") assert(escapePathName("a/b") === "a%2Fb") From b7c1e0cb915fdb9d38f74f83a79c9c27dfd57aff Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 11 Jun 2024 13:49:44 +0800 Subject: [PATCH 12/12] address comments --- .../spark/sql/catalyst/catalog/ExternalCatalogUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 60c064a61a0f..1dc2a760a8af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -82,7 +82,7 @@ object ExternalCatalogUtils { path } else { val sb = new java.lang.StringBuilder(length + 16) - sb.append(path, 0, firstIndex) + if (firstIndex != 0) sb.append(path, 0, firstIndex) while(firstIndex < length) { val c = path.charAt(firstIndex) if (needsEscaping(c)) {