Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@ package org.apache.spark.sql.catalyst.expressions

import java.net.{URI, URISyntaxException}
import java.text.{BreakIterator, DecimalFormat, DecimalFormatSymbols}
import java.util.{Base64 => JBase64}
import java.util.{HashMap, Locale, Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.ArrayBuffer

import org.apache.commons.codec.binary.{Base64 => CommonsBase64}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult}
import org.apache.spark.sql.catalyst.expressions.codegen._
Expand Down Expand Up @@ -2345,13 +2344,13 @@ case class Base64(child: Expression)
override def inputTypes: Seq[DataType] = Seq(BinaryType)

protected override def nullSafeEval(bytes: Any): Any = {
UTF8String.fromBytes(CommonsBase64.encodeBase64(bytes.asInstanceOf[Array[Byte]]))
UTF8String.fromBytes(JBase64.getMimeEncoder.encode(bytes.asInstanceOf[Array[Byte]]))
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (child) => {
s"""${ev.value} = UTF8String.fromBytes(
${classOf[CommonsBase64].getName}.encodeBase64($child));
${classOf[JBase64].getName}.getMimeEncoder().encode($child));
"""})
}

Expand All @@ -2377,12 +2376,12 @@ case class UnBase64(child: Expression)
override def inputTypes: Seq[DataType] = Seq(StringType)

protected override def nullSafeEval(string: Any): Any =
CommonsBase64.decodeBase64(string.asInstanceOf[UTF8String].toString)
JBase64.getMimeDecoder.decode(string.asInstanceOf[UTF8String].toString)

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (child) => {
s"""
${ev.value} = ${classOf[CommonsBase64].getName}.decodeBase64($child.toString());
${ev.value} = ${classOf[JBase64].getName}.getMimeDecoder().decode($child.toString());
"""})
}

Expand Down
56 changes: 56 additions & 0 deletions sql/core/benchmarks/Base64Benchmark-jdk11-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 2848 2930 132 7.0 142.4 1.0X
apache 12281 14421 1142 1.6 614.1 0.2X

OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 3: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 3895 4151 236 5.1 194.8 1.0X
apache 13238 14325 1127 1.5 661.9 0.3X

OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 5: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 4572 4638 113 4.4 228.6 1.0X
apache 15585 15649 59 1.3 779.3 0.3X

OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 7: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 5339 5391 46 3.7 267.0 1.0X
apache 16755 16899 153 1.2 837.8 0.3X

OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 3550 3552 3 5.6 177.5 1.0X
apache 13286 13347 103 1.5 664.3 0.3X

OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 3: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 4575 4623 74 4.4 228.7 1.0X
apache 14173 14283 103 1.4 708.6 0.3X

OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 5: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 6044 6087 41 3.3 302.2 1.0X
apache 17261 17412 164 1.2 863.0 0.4X

OpenJDK 64-Bit Server VM 11.0.12+0 on Mac OS X 12.0.1
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 7: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 7952 8190 236 2.5 397.6 1.0X
apache 20086 20416 340 1.0 1004.3 0.4X

56 changes: 56 additions & 0 deletions sql/core/benchmarks/Base64Benchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 4479 4607 178 4.5 224.0 1.0X
apache 13219 15017 NaN 1.5 661.0 0.3X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 3: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 5592 5920 349 3.6 279.6 1.0X
apache 14732 14797 62 1.4 736.6 0.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 5: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 5818 6043 210 3.4 290.9 1.0X
apache 16450 16875 550 1.2 822.5 0.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
encode for 7: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 7175 7382 231 2.8 358.8 1.0X
apache 18694 18821 138 1.1 934.7 0.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 5112 5218 135 3.9 255.6 1.0X
apache 14546 14742 239 1.4 727.3 0.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 3: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 7071 7294 195 2.8 353.6 1.0X
apache 16702 16795 98 1.2 835.1 0.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 5: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 8968 9204 396 2.2 448.4 1.0X
apache 19000 19045 43 1.1 950.0 0.5X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.16
Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
decode for 7: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
java 10085 10292 179 2.0 504.3 1.0X
apache 20670 20783 109 1.0 1033.5 0.5X

Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.benchmark

import org.apache.spark.benchmark.Benchmark

/**
* Benchmark for measuring perf of different Base64 implementation
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
* 2. build/sbt "sql/test:runMain <this class>"
* 3. generate result:
* SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
* Results will be written to "benchmarks/IntervalBenchmark-results.txt".
* }}}
*/
object Base64Benchmark extends SqlBasedBenchmark {
import spark.implicits._
private val N = 20L * 1000 * 1000


private def doEncode(len: Int, f: Array[Byte] => Array[Byte]): Unit = {
spark.range(N).map(_ => "Spark" * len).foreach { s =>
f(s.getBytes)
()
}
}

private def doDecode(len: Int, f: Array[Byte] => Array[Byte]): Unit = {
spark.range(N).map(_ => "Spark" * len).map { s =>
// using the same encode func
java.util.Base64.getMimeEncoder.encode(s.getBytes)
}.foreach { s =>
f(s)
()
}
}

override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
Seq(1, 3, 5, 7).map { len =>
val benchmark = new Benchmark(s"encode for $len", N, output = output)
benchmark.addCase("java", 3) { _ =>
doEncode(len, x => java.util.Base64.getMimeEncoder().encode(x))
}
benchmark.addCase(s"apache", 3) { _ =>
doEncode(len, org.apache.commons.codec.binary.Base64.encodeBase64)
}
benchmark
}.foreach(_.run())

Seq(1, 3, 5, 7).map { len =>
val benchmark = new Benchmark(s"decode for $len", N, output = output)
benchmark.addCase("java", 3) { _ =>
doDecode(len, x => java.util.Base64.getMimeDecoder.decode(x))
}
benchmark.addCase(s"apache", 3) { _ =>
doDecode(len, org.apache.commons.codec.binary.Base64.decodeBase64)
}
benchmark
}.foreach(_.run())
}

}