Skip to content

Commit c959079

Browse files
authored
[HUDI-4279] Strength the remote fs view lagging check when latest commit refresh is enabled (#5917)
Signed-off-by: LinMingQiang <[email protected]>
1 parent c7e430b commit c959079

2 files changed

Lines changed: 21 additions & 3 deletions

File tree

hudi-common/src/main/java/org/apache/hudi/common/table/view/RemoteHoodieTableFileSystemView.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
116116
public static final String FILEID_PARAM = "fileid";
117117
public static final String LAST_INSTANT_TS = "lastinstantts";
118118
public static final String TIMELINE_HASH = "timelinehash";
119+
public static final String NUM_INSTANTS = "numinstants";
119120
public static final String REFRESH_OFF = "refreshoff";
120121
public static final String INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM = "includependingcompaction";
121122

@@ -162,6 +163,7 @@ private <T> T executeRequest(String requestPath, Map<String, String> queryParame
162163
// Adding mandatory parameters - Last instants affecting file-slice
163164
timeline.lastInstant().ifPresent(instant -> builder.addParameter(LAST_INSTANT_TS, instant.getTimestamp()));
164165
builder.addParameter(TIMELINE_HASH, timeline.getTimelineHash());
166+
builder.addParameter(NUM_INSTANTS, timeline.countInstants() + "");
165167

166168
String url = builder.toString();
167169
LOG.info("Sending request : (" + url + ")");

hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,9 @@ private boolean isLocalViewBehind(Context ctx) {
121121
String lastKnownInstantFromClient =
122122
ctx.queryParam(RemoteHoodieTableFileSystemView.LAST_INSTANT_TS, HoodieTimeline.INVALID_INSTANT_TS);
123123
String timelineHashFromClient = ctx.queryParam(RemoteHoodieTableFileSystemView.TIMELINE_HASH, "");
124+
String numInstantsFromClient = ctx.queryParam(RemoteHoodieTableFileSystemView.NUM_INSTANTS, "-1");
124125
HoodieTimeline localTimeline =
125126
viewManager.getFileSystemView(basePath).getTimeline().filterCompletedAndCompactionInstants();
126-
String localLastKnownInstant = localTimeline.lastInstant().isPresent() ? localTimeline.lastInstant().get().getTimestamp()
127-
: HoodieTimeline.INVALID_INSTANT_TS;
128127
if (LOG.isDebugEnabled()) {
129128
LOG.debug("Client [ LastTs=" + lastKnownInstantFromClient + ", TimelineHash=" + timelineHashFromClient
130129
+ "], localTimeline=" + localTimeline.getInstants().collect(Collectors.toList()));
@@ -138,14 +137,31 @@ private boolean isLocalViewBehind(Context ctx) {
138137
String localTimelineHash = localTimeline.getTimelineHash();
139138
// refresh if timeline hash mismatches and if local's last known instant < client's last known instant (if config is enabled)
140139
if (!localTimelineHash.equals(timelineHashFromClient)
141-
&& (!timelineServiceConfig.refreshTimelineBasedOnLatestCommit || HoodieTimeline.compareTimestamps(localLastKnownInstant, HoodieTimeline.LESSER_THAN, lastKnownInstantFromClient))) {
140+
&& (!timelineServiceConfig.refreshTimelineBasedOnLatestCommit
141+
|| localTimelineBehind(localTimeline, lastKnownInstantFromClient, numInstantsFromClient))) {
142142
return true;
143143
}
144144

145145
// As a safety check, even if hash is same, ensure instant is present
146146
return !localTimeline.containsOrBeforeTimelineStarts(lastKnownInstantFromClient);
147147
}
148148

149+
private static boolean localTimelineBehind(HoodieTimeline localTimeline, String lastKnownInstantFromClient, String numInstantsFromClient) {
150+
String localLastKnownInstant = localTimeline.lastInstant().isPresent() ? localTimeline.lastInstant().get().getTimestamp()
151+
: HoodieTimeline.INVALID_INSTANT_TS;
152+
// Why comparing the num commits ?
153+
// Assumes there are 4 commits on the timeline:
154+
// timestamp(action): ts_0(commit), ts_1(commit), ts_2(clean), ts_3(commit)
155+
// when ts_1 is in INFLIGHT state, ts_2 clean action is already finished,
156+
// after ts_1 triggers #sync, the local timeline is refreshed as [ts_0, ts_2],
157+
// when ts_1 switches state from INFLIGHT to COMPLETED, no #sync triggers.
158+
// at ts_3, when the fs view snapshot is requested, the ts_3 client timeline should be [ts_0, ts_1, ts_2],
159+
// if we only compare the latest commit, the local timeline is NOT behind, but the fs view is not complete
160+
// because ts_1 is lost.
161+
return HoodieTimeline.compareTimestamps(localLastKnownInstant, HoodieTimeline.LESSER_THAN, lastKnownInstantFromClient)
162+
|| localTimeline.countInstants() < Integer.parseInt(numInstantsFromClient);
163+
}
164+
149165
/**
150166
* Syncs data-set view if local view is behind.
151167
*/

0 commit comments

Comments
 (0)