Skip to content

Commit b64c890

Browse files
author
Robert Kruszewski
committed
allow reading footers from provided file listing and streams
1 parent 60b6d5a commit b64c890

1 file changed

Lines changed: 39 additions & 1 deletion

File tree

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,36 @@ public static List<Footer> readAllFootersInParallel(final Configuration configur
236236
return readAllFootersInParallel(configuration, partFiles, false);
237237
}
238238

239+
/**
240+
* read all the footers of the files provided with input streams
241+
* (not using summary files)
242+
* @param configuration the conf to access the File System
243+
* @param partFiles the files to read with equivalent input streams
244+
* @param skipRowGroups to skip the rowGroup info
245+
* @return the footers
246+
* @throws IOException
247+
*/
248+
public static List<Footer> readAllFootersInParallel(final Configuration configuration, Map<FileStatus, FSDataInputStream> partFiles, final boolean skipRowGroups) throws IOException {
249+
List<Callable<Footer>> footers = new ArrayList<Callable<Footer>>();
250+
for (final Map.Entry<FileStatus, FSDataInputStream> currentFile : partFiles.entrySet()) {
251+
footers.add(new Callable<Footer>() {
252+
@Override
253+
public Footer call() throws Exception {
254+
try {
255+
return new Footer(currentFile.getKey().getPath(), readFooter(currentFile.getValue(), currentFile.getKey(), filter(skipRowGroups)));
256+
} catch (IOException e) {
257+
throw new IOException("Could not read footer for file " + currentFile, e);
258+
}
259+
}
260+
});
261+
}
262+
try {
263+
return runAllInParallel(configuration.getInt(PARQUET_READ_PARALLELISM, 5), footers);
264+
} catch (ExecutionException e) {
265+
throw new IOException("Could not read footer: " + e.getMessage(), e.getCause());
266+
}
267+
}
268+
239269
/**
240270
* read all the footers of the files provided
241271
* (not using summary files)
@@ -440,7 +470,15 @@ public static final ParquetMetadata readFooter(Configuration configuration, File
440470
}
441471
}
442472

443-
private static final ParquetMetadata readFooter(FileStatus file, FSDataInputStream f, MetadataFilter filter) throws IOException {
473+
/**
474+
* Reads the meta data block in the footer of the file using provided input stream
475+
* @param f input stream for the file
476+
* @param file the parquet File
477+
* @param filter the filter to apply to row groups
478+
* @return the metadata blocks in the footer
479+
* @throws IOException if an error occurs while reading the file
480+
*/
481+
public static final ParquetMetadata readFooter(FileStatus file, FSDataInputStream f, MetadataFilter filter) throws IOException {
444482
long l = file.getLen();
445483
if (Log.DEBUG) {
446484
LOG.debug("File length " + l);

0 commit comments

Comments
 (0)