From 6d7cd8c6199329872705337ea0cd3657f1511157 Mon Sep 17 00:00:00 2001 From: "jian.feng" Date: Thu, 4 Aug 2022 23:56:37 +0800 Subject: [PATCH] [HUDI-4538] fall back to full path scan if last commit time synced is earlier than first instant in the timeline --- .../apache/hudi/sync/common/HoodieSyncClient.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java index 32ade18d08117..6c196e11e1329 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java @@ -107,10 +107,20 @@ public MessageType getStorageSchema() { } public List getPartitionsWrittenToSince(Option lastCommitTimeSynced) { + boolean fullPathScan = false; if (!lastCommitTimeSynced.isPresent()) { + fullPathScan = true; LOG.info("Last commit time synced is not known, listing all partitions in " - + config.getString(META_SYNC_BASE_PATH) - + ",FS :" + config.getHadoopFileSystem()); + + config.getString(META_SYNC_BASE_PATH) + + ",FS :" + config.getHadoopFileSystem()); + } else if (lastCommitTimeSynced.get() + .compareTo(metaClient.getActiveTimeline().getCommitsTimeline().firstInstant().get().getTimestamp()) < 0) { + fullPathScan = true; + LOG.info("Last commit time synced is earlier than the first instant in the timeline, listing all partitions in " + + config.getString(META_SYNC_BASE_PATH) + + ",FS :" + config.getHadoopFileSystem()); + } + if (fullPathScan) { HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf()); return FSUtils.getAllPartitionPaths(engineContext, config.getString(META_SYNC_BASE_PATH),