Skip to content

Commit 930ffe2

Browse files
committed
Refactor.
1 parent 0ca10ba commit 930ffe2

3 files changed

Lines changed: 63 additions & 125 deletions

File tree

sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@ Benchmark for performance of subexpression elimination
55
Preparing data for benchmarking ...
66
OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
77
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
8-
from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
9-
-------------------------------------------------------------------------------------------------------------------------
10-
subexpressionElimination off, codegen on 22605 22935 291 0.0 226047196.5 1.0X
11-
subexpressionElimination off, codegen off 21811 22151 303 0.0 218105716.6 1.0X
12-
subexpressionElimination on, codegen on 1353 1385 36 0.0 13531011.3 16.7X
13-
subexpressionElimination on, codegen off 1237 1260 20 0.0 12368657.3 18.3X
8+
from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
9+
------------------------------------------------------------------------------------------------------------------------
10+
subExprElimination false, codegen: true 26447 27127 605 0.0 264467933.4 1.0X
11+
subExprElimination false, codegen: false 25673 26035 546 0.0 256732419.1 1.0X
12+
subExprElimination true, codegen: true 1384 1448 102 0.0 13842910.3 19.1X
13+
subExprElimination true, codegen: false 1244 1347 123 0.0 12442389.3 21.3X
1414

1515
Preparing data for benchmarking ...
1616
OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
1717
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
18-
from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
19-
-------------------------------------------------------------------------------------------------------------------------
20-
subexpressionElimination off, codegen on 32792 33101 282 0.0 327922763.5 1.0X
21-
subexpressionElimination off, codegen off 32809 33433 550 0.0 328088662.6 1.0X
22-
subexpressionElimination on, codegen on 18173 18828 869 0.0 181734709.5 1.8X
23-
subexpressionElimination on, codegen off 33695 33951 287 0.0 336950807.7 1.0X
18+
from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
19+
------------------------------------------------------------------------------------------------------------------------
20+
subexpressionElimination off, codegen on 34631 35449 833 0.0 346309884.0 1.0X
21+
subexpressionElimination off, codegen on 34480 34851 353 0.0 344798490.4 1.0X
22+
subexpressionElimination off, codegen on 16618 16811 291 0.0 166176642.6 2.1X
23+
subexpressionElimination off, codegen on 34316 34667 310 0.0 343157094.7 1.0X
2424

2525

sql/core/benchmarks/SubExprEliminationBenchmark-results.txt

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@ Benchmark for performance of subexpression elimination
55
Preparing data for benchmarking ...
66
OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
77
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
8-
from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
9-
-------------------------------------------------------------------------------------------------------------------------
10-
subexpressionElimination off, codegen on 25887 26105 326 0.0 258868246.6 1.0X
11-
subexpressionElimination off, codegen off 25131 25454 522 0.0 251309329.7 1.0X
12-
subexpressionElimination on, codegen on 2230 2340 106 0.0 22302959.3 11.6X
13-
subexpressionElimination on, codegen off 2185 2254 64 0.0 21852694.0 11.8X
8+
from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
9+
------------------------------------------------------------------------------------------------------------------------
10+
subExprElimination false, codegen: true 22767 23240 424 0.0 227665316.7 1.0X
11+
subExprElimination false, codegen: false 22869 23351 465 0.0 228693464.1 1.0X
12+
subExprElimination true, codegen: true 1328 1340 10 0.0 13280056.2 17.1X
13+
subExprElimination true, codegen: false 1248 1276 31 0.0 12476135.1 18.2X
1414

1515
Preparing data for benchmarking ...
1616
OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
1717
Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
18-
from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
19-
-------------------------------------------------------------------------------------------------------------------------
20-
subexpressionElimination off, codegen on 42687 42805 111 0.0 426873372.5 1.0X
21-
subexpressionElimination off, codegen off 43606 45108 1613 0.0 436055236.3 1.0X
22-
subexpressionElimination on, codegen on 29761 30563 704 0.0 297614324.4 1.4X
23-
subexpressionElimination on, codegen off 41676 42598 955 0.0 416758112.3 1.0X
18+
from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
19+
------------------------------------------------------------------------------------------------------------------------
20+
subexpressionElimination off, codegen on 37691 38846 1004 0.0 376913767.9 1.0X
21+
subexpressionElimination off, codegen on 37852 39124 1103 0.0 378517745.5 1.0X
22+
subexpressionElimination off, codegen on 22900 23085 202 0.0 229000242.5 1.6X
23+
subexpressionElimination off, codegen on 38298 38598 374 0.0 382978731.3 1.0X
2424

2525

sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala

Lines changed: 39 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -52,57 +52,26 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
5252
from_json('value, schema).getField(s"col$idx")
5353
}
5454

55-
// We only benchmark subexpression performance under codegen/non-codegen, so disabling
56-
// json optimization.
57-
benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ =>
58-
withSQLConf(
59-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
60-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
61-
SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
62-
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
63-
val df = spark.read
64-
.text(path.getAbsolutePath)
65-
.select(cols: _*)
66-
df.write.mode("overwrite").format("noop").save()
67-
}
68-
}
69-
70-
benchmark.addCase("subexpressionElimination off, codegen off", numIters) { _ =>
71-
withSQLConf(
72-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
73-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
74-
SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
75-
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
76-
val df = spark.read
77-
.text(path.getAbsolutePath)
78-
.select(cols: _*)
79-
df.write.mode("overwrite").format("noop").save()
80-
}
81-
}
82-
83-
benchmark.addCase("subexpressionElimination on, codegen on", numIters) { _ =>
84-
withSQLConf(
85-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
86-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
87-
SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
55+
Seq(
56+
("false", "true", "CODEGEN_ONLY"),
57+
("false", "false", "NO_CODEGEN"),
58+
("true", "true", "CODEGEN_ONLY"),
59+
("true", "false", "NO_CODEGEN")
60+
).foreach { case (subExprEliminationEnabled, codegenEnabled, codegenFactory) =>
61+
// We only benchmark subexpression performance under codegen/non-codegen, so disabling
62+
// json optimization.
63+
val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled"
64+
benchmark.addCase(caseName, numIters) { _ =>
65+
withSQLConf(
66+
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled,
67+
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled,
68+
SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactory,
8869
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
89-
val df = spark.read
90-
.text(path.getAbsolutePath)
91-
.select(cols: _*)
92-
df.write.mode("overwrite").format("noop").save()
93-
}
94-
}
95-
96-
benchmark.addCase("subexpressionElimination on, codegen off", numIters) { _ =>
97-
withSQLConf(
98-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
99-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
100-
SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
101-
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
102-
val df = spark.read
103-
.text(path.getAbsolutePath)
104-
.select(cols: _*)
105-
df.write.mode("overwrite").format("noop").save()
70+
val df = spark.read
71+
.text(path.getAbsolutePath)
72+
.select(cols: _*)
73+
df.write.mode("overwrite").format("noop").save()
74+
}
10675
}
10776
}
10877

@@ -122,57 +91,26 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
12291
(from_json('value, schema).getField(s"col$idx") >= Literal(100000)).expr
12392
}.asInstanceOf[Seq[Expression]].reduce(Or)
12493

125-
// We only benchmark subexpression performance under codegen/non-codegen, so disabling
126-
// json optimization.
127-
benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ =>
128-
withSQLConf(
129-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
130-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
131-
SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
132-
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
133-
val df = spark.read
134-
.text(path.getAbsolutePath)
135-
.where(Column(predicate))
136-
df.write.mode("overwrite").format("noop").save()
137-
}
138-
}
139-
140-
benchmark.addCase("subexpressionElimination off, codegen off", numIters) { _ =>
141-
withSQLConf(
142-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
143-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
144-
SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
145-
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
146-
val df = spark.read
147-
.text(path.getAbsolutePath)
148-
.where(Column(predicate))
149-
df.write.mode("overwrite").format("noop").save()
150-
}
151-
}
152-
153-
benchmark.addCase("subexpressionElimination on, codegen on", numIters) { _ =>
154-
withSQLConf(
155-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
156-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
157-
SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
158-
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
159-
val df = spark.read
160-
.text(path.getAbsolutePath)
161-
.where(Column(predicate))
162-
df.write.mode("overwrite").format("noop").save()
163-
}
164-
}
165-
166-
benchmark.addCase("subexpressionElimination on, codegen off", numIters) { _ =>
167-
withSQLConf(
168-
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
169-
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
170-
SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
171-
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
172-
val df = spark.read
173-
.text(path.getAbsolutePath)
174-
.where(Column(predicate))
175-
df.write.mode("overwrite").format("noop").save()
94+
Seq(
95+
("false", "true", "CODEGEN_ONLY"),
96+
("false", "false", "NO_CODEGEN"),
97+
("true", "true", "CODEGEN_ONLY"),
98+
("true", "false", "NO_CODEGEN")
99+
).foreach { case (subExprEliminationEnabled, codegenEnabled, codegenFactory) =>
100+
// We only benchmark subexpression performance under codegen/non-codegen, so disabling
101+
// json optimization.
102+
val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled"
103+
benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ =>
104+
withSQLConf(
105+
SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled,
106+
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled,
107+
SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactory,
108+
SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
109+
val df = spark.read
110+
.text(path.getAbsolutePath)
111+
.where(Column(predicate))
112+
df.write.mode("overwrite").format("noop").save()
113+
}
176114
}
177115
}
178116

0 commit comments

Comments
 (0)