1616 */
1717package org .apache .spark .sql .execution .datasources .json
1818
19- import java .io .File
20-
21- import org .apache .spark .SparkConf
2219import org .apache .spark .benchmark .Benchmark
23- import org .apache .spark .sql .{ Row , SparkSession }
24- import org .apache .spark .sql .catalyst . plans . SQLHelper
20+ import org .apache .spark .sql .Row
21+ import org .apache .spark .sql .execution . benchmark . SqlBasedBenchmark
2522import org .apache .spark .sql .functions .lit
2623import org .apache .spark .sql .types ._
2724
2825/**
2926 * The benchmarks aims to measure performance of JSON parsing when encoding is set and isn't.
30- * To run this:
31- * spark-submit --class <this class> --jars <spark sql test jar>
27+ * To run this benchmark:
28+ * {{{
29+ * 1. without sbt:
30+ * bin/spark-submit --class <this class> --jars <spark core test jar>,
31+ * <spark catalyst test jar> <spark sql test jar>
32+ * 2. build/sbt "sql/test:runMain <this class>"
33+ * 3. generate result:
34+ * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
35+ * Results will be written to "benchmarks/JSONBenchmark-results.txt".
36+ * }}}
3237 */
33- object JSONBenchmarks extends SQLHelper {
34- val conf = new SparkConf ()
35-
36- val spark = SparkSession .builder
37- .master(" local[1]" )
38- .appName(" benchmark-json-datasource" )
39- .config(conf)
40- .getOrCreate()
38+
39+ object JSONBenchmark extends SqlBasedBenchmark {
4140 import spark .implicits ._
4241
4342 def schemaInferring (rowsNum : Int ): Unit = {
44- val benchmark = new Benchmark (" JSON schema inferring" , rowsNum)
43+ val benchmark = new Benchmark (" JSON schema inferring" , rowsNum, output = output )
4544
4645 withTempPath { path =>
4746 // scalastyle:off println
@@ -65,21 +64,12 @@ object JSONBenchmarks extends SQLHelper {
6564 .json(path.getAbsolutePath)
6665 }
6766
68- /*
69- Java HotSpot(TM) 64-Bit Server VM 1.8.0_172-b11 on Mac OS X 10.13.5
70- Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
71-
72- JSON schema inferring: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
73- ---------------------------------------------------------------------------------------------
74- No encoding 45908 / 46480 2.2 459.1 1.0X
75- UTF-8 is set 68469 / 69762 1.5 684.7 0.7X
76- */
7767 benchmark.run()
7868 }
7969 }
8070
8171 def perlineParsing (rowsNum : Int ): Unit = {
82- val benchmark = new Benchmark (" JSON per-line parsing" , rowsNum)
72+ val benchmark = new Benchmark (" JSON per-line parsing" , rowsNum, output = output )
8373
8474 withTempPath { path =>
8575 // scalastyle:off println
@@ -107,21 +97,12 @@ object JSONBenchmarks extends SQLHelper {
10797 .count()
10898 }
10999
110- /*
111- Java HotSpot(TM) 64-Bit Server VM 1.8.0_172-b11 on Mac OS X 10.13.5
112- Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
113-
114- JSON per-line parsing: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
115- ---------------------------------------------------------------------------------------------
116- No encoding 9982 / 10237 10.0 99.8 1.0X
117- UTF-8 is set 16373 / 16806 6.1 163.7 0.6X
118- */
119100 benchmark.run()
120101 }
121102 }
122103
123104 def perlineParsingOfWideColumn (rowsNum : Int ): Unit = {
124- val benchmark = new Benchmark (" JSON parsing of wide lines" , rowsNum)
105+ val benchmark = new Benchmark (" JSON parsing of wide lines" , rowsNum, output = output )
125106
126107 withTempPath { path =>
127108 // scalastyle:off println
@@ -156,22 +137,14 @@ object JSONBenchmarks extends SQLHelper {
156137 .count()
157138 }
158139
159- /*
160- Java HotSpot(TM) 64-Bit Server VM 1.8.0_172-b11 on Mac OS X 10.13.5
161- Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
162-
163- JSON parsing of wide lines: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
164- ---------------------------------------------------------------------------------------------
165- No encoding 26038 / 26386 0.4 2603.8 1.0X
166- UTF-8 is set 28343 / 28557 0.4 2834.3 0.9X
167- */
168140 benchmark.run()
169141 }
170142 }
171143
172144 def countBenchmark (rowsNum : Int ): Unit = {
173145 val colsNum = 10
174- val benchmark = new Benchmark (s " Count a dataset with $colsNum columns " , rowsNum)
146+ val benchmark =
147+ new Benchmark (s " Count a dataset with $colsNum columns " , rowsNum, output = output)
175148
176149 withTempPath { path =>
177150 val fields = Seq .tabulate(colsNum)(i => StructField (s " col $i" , IntegerType ))
@@ -195,23 +168,16 @@ object JSONBenchmarks extends SQLHelper {
195168 ds.count()
196169 }
197170
198- /*
199- Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
200-
201- Count a dataset with 10 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
202- ---------------------------------------------------------------------------------------------
203- Select 10 columns + count() 9961 / 10006 1.0 996.1 1.0X
204- Select 1 column + count() 8355 / 8470 1.2 835.5 1.2X
205- count() 2104 / 2156 4.8 210.4 4.7X
206- */
207171 benchmark.run()
208172 }
209173 }
210174
211- def main (args : Array [String ]): Unit = {
212- schemaInferring(100 * 1000 * 1000 )
213- perlineParsing(100 * 1000 * 1000 )
214- perlineParsingOfWideColumn(10 * 1000 * 1000 )
215- countBenchmark(10 * 1000 * 1000 )
175+ override def runBenchmarkSuite (mainArgs : Array [String ]): Unit = {
176+ runBenchmark(" Benchmark for performance of JSON parsing" ) {
177+ schemaInferring(100 * 1000 * 1000 )
178+ perlineParsing(100 * 1000 * 1000 )
179+ perlineParsingOfWideColumn(10 * 1000 * 1000 )
180+ countBenchmark(10 * 1000 * 1000 )
181+ }
216182 }
217183}
0 commit comments