Skip to content

Commit 7d78979

Browse files
committed
HBASE-27897 ConnectionImplementation#locateRegionInMeta should pause and retry when taking user region lock failed (#5258)
Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
1 parent e3e6f4c commit 7d78979

2 files changed

Lines changed: 18 additions & 12 deletions

File tree

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -963,8 +963,12 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool
963963
// Query the meta region
964964
long pauseBase = this.pause;
965965
takeUserRegionLock();
966-
final long lockStartTime = EnvironmentEdgeManager.currentTime();
966+
long lockStartTime = 0;
967+
boolean lockedUserRegion = false;
967968
try {
969+
takeUserRegionLock();
970+
lockStartTime = EnvironmentEdgeManager.currentTime();
971+
lockedUserRegion = true;
968972
// We don't need to check if useCache is enabled or not. Even if useCache is false
969973
// we already cleared the cache for this row before acquiring userRegion lock so if this
970974
// row is present in cache that means some other thread has populated it while we were
@@ -1063,10 +1067,12 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco
10631067
relocateMeta =
10641068
!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException);
10651069
} finally {
1066-
userRegionLock.unlock();
1067-
// update duration of the lock being held
1068-
if (metrics != null) {
1069-
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
1070+
if (lockedUserRegion) {
1071+
userRegionLock.unlock();
1072+
// update duration of the lock being held
1073+
if (metrics != null) {
1074+
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
1075+
}
10701076
}
10711077
}
10721078
try {

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -592,21 +592,21 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE
592592
// obtain the client metrics
593593
MetricsConnection metrics = conn.getConnectionMetrics();
594594
long queueCount = metrics.userRegionLockQueueHist.getCount();
595-
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
596-
queueCount, 2);
595+
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount, 2,
596+
queueCount);
597597

598598
long timeoutCount = metrics.userRegionLockTimeoutCount.getCount();
599-
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
600-
timeoutCount, 1);
599+
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount, 1,
600+
timeoutCount);
601601

602602
long waitingTimerCount = metrics.userRegionLockWaitingTimer.getCount();
603603
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
604-
+ waitingTimerCount, waitingTimerCount, 1);
604+
+ waitingTimerCount, 1, waitingTimerCount);
605605

606606
long heldTimerCount = metrics.userRegionLockHeldTimer.getCount();
607607
assertEquals(
608-
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
609-
heldTimerCount, 1);
608+
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount, 1,
609+
heldTimerCount);
610610
double heldTime = metrics.userRegionLockHeldTimer.getSnapshot().getMax();
611611
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
612612
heldTime >= 2E9);

0 commit comments

Comments
 (0)