1818 */
1919package org .apache .parquet .hadoop ;
2020
21+ import static org .apache .hadoop .yarn .webapp .hamlet .HamletSpec .InputType .file ;
2122import static org .apache .parquet .Log .DEBUG ;
2223import static org .apache .parquet .bytes .BytesUtils .readIntLittleEndian ;
2324import static org .apache .parquet .filter2 .compat .RowGroupFilter .FilterLevel .DICTIONARY ;
5354import java .util .concurrent .Executors ;
5455import java .util .concurrent .Future ;
5556
57+ import org .apache .commons .math3 .analysis .function .Add ;
58+ import org .apache .hadoop .conf .Configurable ;
5659import org .apache .hadoop .conf .Configuration ;
5760import org .apache .hadoop .fs .FileStatus ;
5861import org .apache .hadoop .fs .FileSystem ;
8891import org .apache .parquet .hadoop .metadata .ColumnChunkMetaData ;
8992import org .apache .parquet .hadoop .metadata .FileMetaData ;
9093import org .apache .parquet .hadoop .metadata .ParquetMetadata ;
94+ import org .apache .parquet .hadoop .util .HadoopDataSource ;
9195import org .apache .parquet .hadoop .util .HiddenFileFilter ;
9296import org .apache .parquet .hadoop .util .HadoopStreams ;
9397import org .apache .parquet .io .SeekableInputStream ;
9498import org .apache .parquet .hadoop .util .counters .BenchmarkCounter ;
9599import org .apache .parquet .io .ParquetDecodingException ;
100+ import org .apache .parquet .io .ParquetDataSource ;
96101
97102/**
98103 * Internal implementation of the Parquet file reader as a block container
@@ -410,8 +415,7 @@ public static final ParquetMetadata readFooter(Configuration configuration, Path
410415 * @throws IOException if an error occurs while reading the file
411416 */
412417 public static ParquetMetadata readFooter (Configuration configuration , Path file , MetadataFilter filter ) throws IOException {
413- FileSystem fileSystem = file .getFileSystem (configuration );
414- return readFooter (configuration , fileSystem .getFileStatus (file ), filter );
418+ return readFooter (HadoopDataSource .fromPath (file , configuration ), filter );
415419 }
416420
417421 /**
@@ -431,12 +435,21 @@ public static final ParquetMetadata readFooter(Configuration configuration, File
431435 * @throws IOException if an error occurs while reading the file
432436 */
433437 public static final ParquetMetadata readFooter (Configuration configuration , FileStatus file , MetadataFilter filter ) throws IOException {
434- FileSystem fileSystem = file .getPath ().getFileSystem (configuration );
435- SeekableInputStream in = HadoopStreams .wrap (fileSystem .open (file .getPath ()));
436- try {
437- return readFooter (file .getLen (), file .getPath ().toString (), in , filter );
438- } finally {
439- in .close ();
438+ return readFooter (HadoopDataSource .fromStatus (file , configuration ), filter );
439+ }
440+
441+ /**
442+ * Reads the meta data block in the footer of the file using provided input stream
443+ * @param file a {@link ParquetDataSource} to read
444+ * @param filter the filter to apply to row groups
445+ * @return the metadata blocks in the footer
446+ * @throws IOException if an error occurs while reading the file
447+ */
448+ public static final ParquetMetadata readFooter (
449+ ParquetDataSource file , MetadataFilter filter ) throws IOException {
450+ try (SeekableInputStream in = file .newStream ()) {
451+ return readFooter (converter , file .getLength (), file .getLocation (),
452+ in , filter );
440453 }
441454 }
442455
@@ -449,7 +462,7 @@ public static final ParquetMetadata readFooter(Configuration configuration, File
449462 * @return the metadata blocks in the footer
450463 * @throws IOException if an error occurs while reading the file
451464 */
452- public static final ParquetMetadata readFooter (long fileLen , String filePath , SeekableInputStream f , MetadataFilter filter ) throws IOException {
465+ private static final ParquetMetadata readFooter (ParquetMetadataConverter converter , long fileLen , String filePath , SeekableInputStream f , MetadataFilter filter ) throws IOException {
453466 if (Log .DEBUG ) {
454467 LOG .debug ("File length " + fileLen );
455468 }
@@ -563,7 +576,7 @@ public ParquetFileReader(Configuration conf, Path file, MetadataFilter filter) t
563576 FileSystem fs = file .getFileSystem (conf );
564577 this .fileStatus = fs .getFileStatus (file );
565578 this .f = HadoopStreams .wrap (fs .open (file ));
566- this .footer = readFooter (fileStatus .getLen (), fileStatus .getPath ().toString (), f , filter );
579+ this .footer = readFooter (converter , fileStatus .getLen (), fileStatus .getPath ().toString (), f , filter );
567580 this .fileMetaData = footer .getFileMetaData ();
568581 this .blocks = footer .getBlocks ();
569582 for (ColumnDescriptor col : footer .getFileMetaData ().getSchema ().getColumns ()) {
@@ -602,7 +615,7 @@ public ParquetMetadata getFooter() {
602615 if (footer == null ) {
603616 try {
604617 // don't read the row groups because this.blocks is always set
605- this .footer = readFooter (fileStatus .getLen (), fileStatus .getPath ().toString (), f , SKIP_ROW_GROUPS );
618+ this .footer = readFooter (converter , fileStatus .getLen (), fileStatus .getPath ().toString (), f , SKIP_ROW_GROUPS );
606619 } catch (IOException e ) {
607620 throw new ParquetDecodingException ("Unable to read file footer" , e );
608621 }
0 commit comments