Skip to content

Commit 1960ed2

Browse files
committed
[HUDI-4508] Repair the exception when reading optimized query for mor in hive and presto/trino, if the fileSlice with log file has not base file.
1 parent e04b318 commit 1960ed2

2 files changed

Lines changed: 27 additions & 0 deletions

File tree

hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
3636
import org.apache.hudi.common.model.FileSlice;
3737
import org.apache.hudi.common.model.HoodieBaseFile;
38+
import org.apache.hudi.common.model.HoodieLogFile;
3839
import org.apache.hudi.common.model.HoodieTableQueryType;
3940
import org.apache.hudi.common.table.HoodieTableConfig;
4041
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -253,6 +254,7 @@ private List<FileStatus> listStatusForSnapshotMode(JobConf job,
253254
partitionedFileSlices.values()
254255
.stream()
255256
.flatMap(Collection::stream)
257+
.filter(fileSlice -> checkIfValidFileSlice(fileSlice))
256258
.map(fileSlice -> createFileStatusUnchecked(fileSlice, fileIndex, virtualKeyInfoOpt))
257259
.collect(Collectors.toList())
258260
);
@@ -261,6 +263,20 @@ private List<FileStatus> listStatusForSnapshotMode(JobConf job,
261263
return targetFiles;
262264
}
263265

266+
private boolean checkIfValidFileSlice(FileSlice fileSlice) {
267+
Option<HoodieBaseFile> baseFileOpt = fileSlice.getBaseFile();
268+
Option<HoodieLogFile> latestLogFileOpt = fileSlice.getLatestLogFile();
269+
270+
if (baseFileOpt.isPresent()) {
271+
return true;
272+
} else if (latestLogFileOpt.isPresent()) {
273+
// It happens when reading optimized query to mor.
274+
return false;
275+
} else {
276+
throw new IllegalStateException("Invalid state: base-file has to be present");
277+
}
278+
}
279+
264280
private void validate(List<FileStatus> targetFiles, List<FileStatus> legacyFileStatuses) {
265281
List<FileStatus> diff = CollectionUtils.diff(targetFiles, legacyFileStatuses);
266282
checkState(diff.isEmpty(), "Should be empty");

hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,17 @@ protected FileStatus createFileStatusUnchecked(FileSlice fileSlice, HiveHoodieTa
103103
}
104104
}
105105

106+
private boolean checkIfValidFileSlice(FileSlice fileSlice) {
107+
Option<HoodieBaseFile> baseFileOpt = fileSlice.getBaseFile();
108+
Option<HoodieLogFile> latestLogFileOpt = fileSlice.getLatestLogFile();
109+
110+
if (baseFileOpt.isPresent() || latestLogFileOpt.isPresent()) {
111+
return true;
112+
} else {
113+
throw new IllegalStateException("Invalid state: either base-file or log-file has to be present");
114+
}
115+
}
116+
106117
/**
107118
* Keep the logic of mor_incr_view as same as spark datasource.
108119
* Step1: Get list of commits to be fetched based on start commit and max commits(for snapshot max commits is -1).

0 commit comments

Comments
 (0)