Skip to content

Commit a7e5aa6

Browse files
mallmandbtsai
authored andcommitted
[SPARK-25406][SQL] For ParquetSchemaPruningSuite.scala, move calls to withSQLConf inside calls to test
(Link to Jira: https://issues.apache.org/jira/browse/SPARK-25406) ## What changes were proposed in this pull request? The current use of `withSQLConf` in `ParquetSchemaPruningSuite.scala` is incorrect. The desired configuration settings are not being set when running the test cases. This PR fixes that defective usage and addresses the test failures that were previously masked by that defect. ## How was this patch tested? I added code to relevant test cases to print the expected SQL configuration settings and found that the settings were not being set as expected. When I changed the order of calls to `test` and `withSQLConf` I found that the configuration settings were being set as expected. Closes #22394 from mallman/spark-25406-fix_broken_schema_pruning_tests. Authored-by: Michael Allman <msa@allman.ms> Signed-off-by: DB Tsai <d_tsai@apple.com>
1 parent 45c4ebc commit a7e5aa6

1 file changed

Lines changed: 38 additions & 25 deletions

File tree

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -218,20 +218,24 @@ class ParquetSchemaPruningSuite
218218
}
219219

220220
private def testSchemaPruning(testName: String)(testThunk: => Unit) {
221-
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
222-
test(s"Spark vectorized reader - without partition data column - $testName") {
221+
test(s"Spark vectorized reader - without partition data column - $testName") {
222+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
223223
withContacts(testThunk)
224224
}
225-
test(s"Spark vectorized reader - with partition data column - $testName") {
225+
}
226+
test(s"Spark vectorized reader - with partition data column - $testName") {
227+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
226228
withContactsWithDataPartitionColumn(testThunk)
227229
}
228230
}
229231

230-
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
231-
test(s"Parquet-mr reader - without partition data column - $testName") {
232+
test(s"Parquet-mr reader - without partition data column - $testName") {
233+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
232234
withContacts(testThunk)
233235
}
234-
test(s"Parquet-mr reader - with partition data column - $testName") {
236+
}
237+
test(s"Parquet-mr reader - with partition data column - $testName") {
238+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
235239
withContactsWithDataPartitionColumn(testThunk)
236240
}
237241
}
@@ -271,7 +275,7 @@ class ParquetSchemaPruningSuite
271275
MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) ::
272276
Nil
273277

274-
testMixedCasePruning("select with exact column names") {
278+
testExactCaseQueryPruning("select with exact column names") {
275279
val query = sql("select CoL1, coL2.B from mixedcase")
276280
checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
277281
checkAnswer(query.orderBy("id"),
@@ -280,7 +284,7 @@ class ParquetSchemaPruningSuite
280284
Nil)
281285
}
282286

283-
testMixedCasePruning("select with lowercase column names") {
287+
testMixedCaseQueryPruning("select with lowercase column names") {
284288
val query = sql("select col1, col2.b from mixedcase")
285289
checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
286290
checkAnswer(query.orderBy("id"),
@@ -289,7 +293,7 @@ class ParquetSchemaPruningSuite
289293
Nil)
290294
}
291295

292-
testMixedCasePruning("select with different-case column names") {
296+
testMixedCaseQueryPruning("select with different-case column names") {
293297
val query = sql("select cOL1, cOl2.b from mixedcase")
294298
checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
295299
checkAnswer(query.orderBy("id"),
@@ -298,34 +302,43 @@ class ParquetSchemaPruningSuite
298302
Nil)
299303
}
300304

301-
testMixedCasePruning("filter with different-case column names") {
305+
testMixedCaseQueryPruning("filter with different-case column names") {
302306
val query = sql("select id from mixedcase where Col2.b = 2")
303307
checkScan(query, "struct<id:int,coL2:struct<B:int>>")
304308
checkAnswer(query.orderBy("id"), Row(1) :: Nil)
305309
}
306310

307-
private def testMixedCasePruning(testName: String)(testThunk: => Unit) {
308-
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
309-
SQLConf.CASE_SENSITIVE.key -> "true") {
310-
test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
311-
withMixedCaseData(testThunk)
311+
// Tests schema pruning for a query whose column and field names are exactly the same as the table
312+
// schema's column and field names. N.B. this implies that `testThunk` should pass using either a
313+
// case-sensitive or case-insensitive query parser
314+
private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
315+
test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
316+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
317+
SQLConf.CASE_SENSITIVE.key -> "true") {
318+
withMixedCaseData(testThunk)
312319
}
313320
}
314-
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
315-
SQLConf.CASE_SENSITIVE.key -> "false") {
316-
test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
321+
test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
322+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
323+
SQLConf.CASE_SENSITIVE.key -> "true") {
317324
withMixedCaseData(testThunk)
318325
}
319326
}
320-
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
321-
SQLConf.CASE_SENSITIVE.key -> "false") {
322-
test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
323-
withMixedCaseData(testThunk)
327+
testMixedCaseQueryPruning(testName)(testThunk)
328+
}
329+
330+
// Tests schema pruning for a query whose column and field names may differ in case from the table
331+
// schema's column and field names
332+
private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
333+
test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
334+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
335+
SQLConf.CASE_SENSITIVE.key -> "false") {
336+
withMixedCaseData(testThunk)
324337
}
325338
}
326-
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
327-
SQLConf.CASE_SENSITIVE.key -> "true") {
328-
test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
339+
test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
340+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
341+
SQLConf.CASE_SENSITIVE.key -> "false") {
329342
withMixedCaseData(testThunk)
330343
}
331344
}

0 commit comments

Comments
 (0)