Skip to content

Commit afa02f9

Browse files
author
Ray Mattingly
committed
simplify
1 parent 36f7ee1 commit afa02f9

1 file changed

Lines changed: 53 additions & 46 deletions

File tree

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import java.util.ArrayList;
2323
import java.util.Collections;
2424
import java.util.List;
25-
import java.util.Set;
2625
import java.util.stream.Collectors;
2726
import org.apache.hadoop.hbase.HRegionLocation;
2827
import org.apache.hadoop.hbase.TableName;
@@ -159,10 +158,12 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
159158
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
160159
return Flow.HAS_MORE_STATE;
161160
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
162-
if (!regions.isEmpty()) {
161+
// if we didn't finish reopening the last batch yet, let's keep trying until we do.
162+
// at that point, the batch will be empty and we can generate a new batch
163+
if (!regions.isEmpty() && currentRegionBatch.isEmpty()) {
164+
currentRegionBatch = regions.stream().limit(reopenBatchSize).collect(Collectors.toList());
163165
batchesProcessed++;
164166
}
165-
currentRegionBatch = regions.stream().limit(reopenBatchSize).collect(Collectors.toList());
166167
for (HRegionLocation loc : currentRegionBatch) {
167168
RegionStateNode regionNode =
168169
env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
@@ -187,59 +188,65 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
187188
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
188189
return Flow.HAS_MORE_STATE;
189190
case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
190-
regions = regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
191-
.filter(l -> l != null).collect(Collectors.toList());
192-
// we need to create a set of region names because the HRegionLocation hashcode is only
193-
// based
194-
// on the server name
195-
Set<byte[]> currentRegionBatchNames = currentRegionBatch.stream()
196-
.map(r -> r.getRegion().getRegionName()).collect(Collectors.toSet());
197-
currentRegionBatch = regions.stream()
198-
.filter(r -> currentRegionBatchNames.contains(r.getRegion().getRegionName()))
199-
.collect(Collectors.toList());
200-
if (currentRegionBatch.isEmpty()) {
201-
if (regions.isEmpty()) {
202-
return Flow.NO_MORE_STATE;
203-
} else {
204-
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
205-
reopenBatchSize = Math.min(reopenBatchSizeMax, 2 * reopenBatchSize);
206-
if (reopenBatchBackoffMillis > 0) {
207-
setBackoffStateAndSuspend(reopenBatchBackoffMillis);
208-
} else {
209-
return Flow.HAS_MORE_STATE;
210-
}
211-
}
191+
// update region lists based on what's been reopened
192+
regions = filterReopened(env, regions);
193+
currentRegionBatch = filterReopened(env, currentRegionBatch);
194+
195+
// existing batch didn't fully reopen, so try to resolve that first.
196+
// since this is a retry, don't do the batch backoff
197+
if (!currentRegionBatch.isEmpty()) {
198+
return reopenIfSchedulable(env, currentRegionBatch, false);
212199
}
213-
if (currentRegionBatch.stream().anyMatch(loc -> canSchedule(env, loc))) {
214-
retryCounter = null;
215-
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
216-
if (reopenBatchBackoffMillis > 0) {
217-
setBackoffStateAndSuspend(reopenBatchBackoffMillis);
218-
} else {
219-
return Flow.HAS_MORE_STATE;
220-
}
221-
}
222-
// We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
223-
// again.
224-
if (retryCounter == null) {
225-
retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
200+
201+
if (regions.isEmpty()) {
202+
return Flow.NO_MORE_STATE;
226203
}
227-
long backoffMillis = retryCounter.getBackoffTimeAndIncrementAttempts();
228-
LOG.info(
229-
"There are still {} region(s) which need to be reopened for table {}. {} are in "
230-
+ "OPENING state, suspend {}secs and try again later",
231-
regions.size(), tableName, currentRegionBatch.size(), backoffMillis / 1000);
232-
setBackoffStateAndSuspend(backoffMillis);
204+
205+
// current batch is finished, schedule more regions
206+
return reopenIfSchedulable(env, regions, true);
233207
default:
234208
throw new UnsupportedOperationException("unhandled state=" + state);
235209
}
236210
}
237211

238-
private void setBackoffStateAndSuspend(long millis) throws ProcedureSuspendedException {
212+
private List<HRegionLocation> filterReopened(MasterProcedureEnv env,
213+
List<HRegionLocation> regionsToCheck) {
214+
return regionsToCheck.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
215+
.filter(l -> l != null).collect(Collectors.toList());
216+
}
217+
218+
private Flow reopenIfSchedulable(MasterProcedureEnv env, List<HRegionLocation> regionsToReopen,
219+
boolean shouldBatchBackoff) throws ProcedureSuspendedException {
220+
if (regionsToReopen.stream().anyMatch(loc -> canSchedule(env, loc))) {
221+
retryCounter = null;
222+
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
223+
reopenBatchSize = Math.min(reopenBatchSizeMax, 2 * reopenBatchSize);
224+
if (shouldBatchBackoff && reopenBatchBackoffMillis > 0) {
225+
setBackoffState(reopenBatchBackoffMillis);
226+
throw new ProcedureSuspendedException();
227+
} else {
228+
return Flow.HAS_MORE_STATE;
229+
}
230+
}
231+
232+
// We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
233+
// again.
234+
if (retryCounter == null) {
235+
retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
236+
}
237+
long backoffMillis = retryCounter.getBackoffTimeAndIncrementAttempts();
238+
LOG.info(
239+
"There are still {} region(s) which need to be reopened for table {}. {} are in "
240+
+ "OPENING state, suspend {}secs and try again later",
241+
regions.size(), tableName, currentRegionBatch.size(), backoffMillis / 1000);
242+
setBackoffState(backoffMillis);
243+
throw new ProcedureSuspendedException();
244+
}
245+
246+
private void setBackoffState(long millis) {
239247
setTimeout(Math.toIntExact(millis));
240248
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
241249
skipPersistence();
242-
throw new ProcedureSuspendedException();
243250
}
244251

245252
private List<HRegionLocation>

0 commit comments

Comments
 (0)