@@ -374,8 +374,6 @@ private[parquet] class AppendingParquetOutputFormat(offset: Int)
374374private [parquet] class FilteringParquetRowInputFormat
375375 extends parquet.hadoop.ParquetInputFormat [Row ] with Logging {
376376
377- private var footers : JList [Footer ] = _
378-
379377 private var fileStatuses = Map .empty[Path , FileStatus ]
380378
381379 override def createRecordReader (
@@ -396,46 +394,15 @@ private[parquet] class FilteringParquetRowInputFormat
396394 }
397395 }
398396
399- override def getFooters (jobContext : JobContext ): JList [Footer ] = {
400- import org .apache .spark .sql .parquet .FilteringParquetRowInputFormat .footerCache
401-
402- if (footers eq null ) {
403- val conf = ContextUtil .getConfiguration(jobContext)
404- val cacheMetadata = conf.getBoolean(SQLConf .PARQUET_CACHE_METADATA , true )
405- val statuses = listStatus(jobContext)
406- fileStatuses = statuses.map(file => file.getPath -> file).toMap
407- if (statuses.isEmpty) {
408- footers = Collections .emptyList[Footer ]
409- } else if (! cacheMetadata) {
410- // Read the footers from HDFS
411- footers = getFooters(conf, statuses)
412- } else {
413- // Read only the footers that are not in the footerCache
414- val foundFooters = footerCache.getAllPresent(statuses)
415- val toFetch = new ArrayList [FileStatus ]
416- for (s <- statuses) {
417- if (! foundFooters.containsKey(s)) {
418- toFetch.add(s)
419- }
420- }
421- val newFooters = new mutable.HashMap [FileStatus , Footer ]
422- if (toFetch.size > 0 ) {
423- val startFetch = System .currentTimeMillis
424- val fetched = getFooters(conf, toFetch)
425- logInfo(s " Fetched $toFetch footers in ${System .currentTimeMillis - startFetch} ms " )
426- for ((status, i) <- toFetch.zipWithIndex) {
427- newFooters(status) = fetched.get(i)
428- }
429- footerCache.putAll(newFooters)
430- }
431- footers = new ArrayList [Footer ](statuses.size)
432- for (status <- statuses) {
433- footers.add(newFooters.getOrElse(status, foundFooters.get(status)))
434- }
435- }
436- }
397+ // This is only a temporary solution sicne we need to use fileStatuses in
398+ // both getClientSideSplits and getTaskSideSplits. It can be removed once we get rid of these
399+ // two methods.
400+ override def getSplits (jobContext : JobContext ): JList [InputSplit ] = {
401+ // First set fileStatuses.
402+ val statuses = listStatus(jobContext)
403+ fileStatuses = statuses.map(file => file.getPath -> file).toMap
437404
438- footers
405+ super .getSplits(jobContext)
439406 }
440407
441408 // TODO Remove this method and related code once PARQUET-16 is fixed
0 commit comments