apache · yucai · Oct 26, 2018 · Oct 29, 2018 · Oct 29, 2018 · Oct 29, 2018
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-results.txt
@@ -0,0 +1,10 @@
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+Avro writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Output Single Int Column                      2287 / 2295          6.9         145.4       1.0X
+Output Single Double Column                   2494 / 2538          6.3         158.6       0.9X
+Output Int and String Column                  5555 / 5587          2.8         353.2       0.4X
+Output Partitions                             3928 / 4157          4.0         249.7       0.6X
+Output Buckets                                5374 / 5441          2.9         341.7       0.4X
+
diff --git a/...nal/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala b/...nal/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala
@@ -19,22 +19,17 @@ package org.apache.spark.sql.execution.benchmark
 
 /**
  * Benchmark to measure Avro data sources write performance.
- * Usage:
- * 1. with spark-submit: bin/spark-submit --class <this class> <spark sql test jar>
- * 2. with sbt: build/sbt "avro/test:runMain <this class>"
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt: bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/test:runMain <this class>"
+ *      Results will be written to "benchmarks/AvroWriteBenchmark-results.txt".
+ *  }}}
  */
 object AvroWriteBenchmark extends DataSourceWriteBenchmark {
-  def main(args: Array[String]): Unit = {
-    /*
-    Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-    Avro writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Output Single Int Column                      2481 / 2499          6.3         157.8       1.0X
-    Output Single Double Column                   2705 / 2710          5.8         172.0       0.9X
-    Output Int and String Column                  5539 / 5639          2.8         352.2       0.4X
-    Output Partitions                             4613 / 5004          3.4         293.3       0.5X
-    Output Buckets                                5554 / 5561          2.8         353.1       0.4X
-    */
-    runBenchmark("Avro")
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runDataSourceBenchmark("Avro")
   }
 }
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -0,0 +1,60 @@
+================================================================================================
+Parquet writer benchmark
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+Parquet writer benchmark:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Output Single Int Column                      2049 / 2120          7.7         130.2       1.0X
+Output Single Double Column                   2194 / 2203          7.2         139.5       0.9X
+Output Int and String Column                  5704 / 5715          2.8         362.7       0.4X
+Output Partitions                             3727 / 3856          4.2         237.0       0.5X
+Output Buckets                                5119 / 5361          3.1         325.4       0.4X
+
+
+================================================================================================
+ORC writer benchmark
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+ORC writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Output Single Int Column                      1109 / 1116         14.2          70.5       1.0X
+Output Single Double Column                   1366 / 1378         11.5          86.8       0.8X
+Output Int and String Column                  5303 / 5318          3.0         337.2       0.2X
+Output Partitions                             3078 / 3472          5.1         195.7       0.4X
+Output Buckets                                4374 / 4398          3.6         278.1       0.3X
+
+
+================================================================================================
+JSON writer benchmark
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+JSON writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Output Single Int Column                      1806 / 1813          8.7         114.8       1.0X
+Output Single Double Column                   2530 / 2538          6.2         160.8       0.7X
+Output Int and String Column                  5311 / 5344          3.0         337.6       0.3X
+Output Partitions                             3524 / 3580          4.5         224.1       0.5X
+Output Buckets                                4661 / 4723          3.4         296.3       0.4X
+
+
+================================================================================================
+CSV writer benchmark
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+CSV writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Output Single Int Column                      2863 / 2896          5.5         182.0       1.0X
+Output Single Double Column                   3233 / 3238          4.9         205.6       0.9X
+Output Int and String Column                  6805 / 6822          2.3         432.6       0.4X
+Output Partitions                             4873 / 4886          3.2         309.8       0.6X
+Output Buckets                                6733 / 6757          2.3         428.1       0.4X
+
+
diff --git a/...test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala b/...test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala
@@ -18,62 +18,40 @@ package org.apache.spark.sql.execution.benchmark
 
 /**
  * Benchmark to measure built-in data sources write performance.
- * By default it measures 4 data source format: Parquet, ORC, JSON, CSV. Run it with spark-submit:
- *   spark-submit --class <this class> <spark sql test jar>
- * Or with sbt:
- *   build/sbt "sql/test:runMain <this class>"
+ * To run this benchmark:
+ * {{{
+ *   By default it measures 4 data source format: Parquet, ORC, JSON, CSV.
+ *   1. without sbt: bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/BuiltInDataSourceWriteBenchmark-results.txt".
+ *
+ *   To measure specified formats, run it with arguments.
+ *   1. without sbt:
+ *        bin/spark-submit --class <this class> <spark sql test jar> format1 [format2] [...]
+ *   2. build/sbt "sql/test:runMain <this class> format1 [format2] [...]"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
+ *        "sql/test:runMain <this class> format1 [format2] [...]"
+ *      Results will be written to "benchmarks/BuiltInDataSourceWriteBenchmark-results.txt".
+ * }}}
  *
- * To measure specified formats, run it with arguments:
- *   spark-submit --class <this class> <spark sql test jar> format1 [format2] [...]
- * Or with sbt:
- *   build/sbt "sql/test:runMain <this class> format1 [format2] [...]"
  */
 object BuiltInDataSourceWriteBenchmark extends DataSourceWriteBenchmark {
-  def main(args: Array[String]): Unit = {
-    val formats: Seq[String] = if (args.isEmpty) {
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val formats: Seq[String] = if (mainArgs.isEmpty) {
       Seq("Parquet", "ORC", "JSON", "CSV")
     } else {
-      args
+      mainArgs
     }
 
     spark.conf.set("spark.sql.parquet.compression.codec", "snappy")
     spark.conf.set("spark.sql.orc.compression.codec", "snappy")
-    /*
-    Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-    Parquet writer benchmark:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Output Single Int Column                      1815 / 1932          8.7         115.4       1.0X
-    Output Single Double Column                   1877 / 1878          8.4         119.3       1.0X
-    Output Int and String Column                  6265 / 6543          2.5         398.3       0.3X
-    Output Partitions                             4067 / 4457          3.9         258.6       0.4X
-    Output Buckets                                5608 / 5820          2.8         356.6       0.3X
-
-    ORC writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Output Single Int Column                      1201 / 1239         13.1          76.3       1.0X
-    Output Single Double Column                   1542 / 1600         10.2          98.0       0.8X
-    Output Int and String Column                  6495 / 6580          2.4         412.9       0.2X
-    Output Partitions                             3648 / 3842          4.3         231.9       0.3X
-    Output Buckets                                5022 / 5145          3.1         319.3       0.2X
-
-    JSON writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Output Single Int Column                      1988 / 2093          7.9         126.4       1.0X
-    Output Single Double Column                   2854 / 2911          5.5         181.4       0.7X
-    Output Int and String Column                  6467 / 6653          2.4         411.1       0.3X
-    Output Partitions                             4548 / 5055          3.5         289.1       0.4X
-    Output Buckets                                5664 / 5765          2.8         360.1       0.4X
 
-    CSV writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Output Single Int Column                      3025 / 3190          5.2         192.3       1.0X
-    Output Single Double Column                   3575 / 3634          4.4         227.3       0.8X
-    Output Int and String Column                  7313 / 7399          2.2         464.9       0.4X
-    Output Partitions                             5105 / 5190          3.1         324.6       0.6X
-    Output Buckets                                6986 / 6992          2.3         444.1       0.4X
-    */
     formats.foreach { format =>
-      runBenchmark(format)
+      runBenchmark(s"$format writer benchmark") {
+        runDataSourceBenchmark(format)
+      }
     }
   }
 }
diff --git a/...re/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala b/...re/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceWriteBenchmark.scala
@@ -16,18 +16,9 @@
  */
 package org.apache.spark.sql.execution.benchmark
 
-import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.internal.SQLConf
 
-trait DataSourceWriteBenchmark {
-  val conf = new SparkConf()
-    .setAppName("DataSourceWriteBenchmark")
-    .setIfMissing("spark.master", "local[1]")
-    .set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
-
-  val spark = SparkSession.builder.config(conf).getOrCreate()
+trait DataSourceWriteBenchmark extends SqlBasedBenchmark {
 
   val tempTable = "temp"
   val numRows = 1024 * 1024 * 15
@@ -75,7 +66,7 @@ trait DataSourceWriteBenchmark {
     }
   }
 
-  def runBenchmark(format: String): Unit = {
+  def runDataSourceBenchmark(format: String): Unit = {
     val tableInt = "tableInt"
     val tableDouble = "tableDouble"
     val tableIntString = "tableIntString"
@@ -84,7 +75,7 @@ trait DataSourceWriteBenchmark {
     withTempTable(tempTable) {
       spark.range(numRows).createOrReplaceTempView(tempTable)
       withTable(tableInt, tableDouble, tableIntString, tablePartition, tableBucket) {
-        val benchmark = new Benchmark(s"$format writer benchmark", numRows)
+        val benchmark = new Benchmark(s"$format writer benchmark", numRows, output = output)
         writeNumeric(tableInt, format, benchmark, "Int")
         writeNumeric(tableDouble, format, benchmark, "Double")
         writeIntString(tableIntString, format, benchmark)