From 3a795dbbeb04818f8363a50d718a132268b5ffcd Mon Sep 17 00:00:00 2001 From: Rushabh Date: Wed, 26 Aug 2020 19:05:41 -0700 Subject: [PATCH 1/7] [HBASE-24956] ConnectionManager#locateRegionInMeta waits for user region lock indefinitely. --- .../hbase/client/ConnectionConfiguration.java | 12 ++- .../client/ConnectionImplementation.java | 15 +++- .../hbase/client/LockTimeoutException.java | 30 ++++++++ .../hadoop/hbase/client/TestMetaCache.java | 77 ++++++++++++++++++- 4 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java index 53859c2ac6a2..cb3889ffcb76 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java @@ -12,6 +12,7 @@ package org.apache.hadoop.hbase.client; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.yetus.audience.InterfaceAudience; @@ -62,8 +63,9 @@ public class ConnectionConfiguration { private final int writeRpcTimeout; // toggle for async/sync prefetch private final boolean clientScannerAsyncPrefetch; + private final long scannerTimeoutPeriod; - /** + /** * Constructor * @param conf Configuration object */ @@ -117,6 +119,10 @@ public class ConnectionConfiguration { this.writeRpcTimeout = conf.getInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, conf.getInt(HConstants.HBASE_RPC_TIMEOUT_KEY, HConstants.DEFAULT_HBASE_RPC_TIMEOUT)); + + this.scannerTimeoutPeriod = HBaseConfiguration.getInt(conf, HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, + HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, + HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD); } /** @@ -143,6 +149,7 @@ protected ConnectionConfiguration() { this.readRpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; this.writeRpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; this.rpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; + this.scannerTimeoutPeriod = HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD; } public int getReadRpcTimeout() { @@ -209,4 +216,7 @@ public int getRpcTimeout() { return rpcTimeout; } + public long getScannerTimeoutPeriod() { + return scannerTimeoutPeriod; + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java index b3b7b7db486c..9153a5c5b874 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java @@ -863,7 +863,7 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool } // Query the meta region long pauseBase = this.pause; - userRegionLock.lock(); + takeUserRegionLock(); try { if (useCache) {// re-check cache after get lock RegionLocations locations = getCachedLocation(tableName, row); @@ -968,6 +968,19 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco } } + private void takeUserRegionLock() throws IOException { + try { + long waitTime = connectionConfig.getScannerTimeoutPeriod(); + if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) { + throw new LockTimeoutException("Failed to get user region lock in" + + waitTime + " ms. " + " for accessing meta region server."); + } + } catch (InterruptedException ie) { + LOG.error("Interrupted while waiting for a lock", ie); + throw ExceptionUtil.asInterrupt(ie); + } + } + /** * Put a newly discovered HRegionLocation into the cache. * @param tableName The table name. diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java new file mode 100644 index 000000000000..a193aa322242 --- /dev/null +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java @@ -0,0 +1,30 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import java.io.IOException; + +/* + Thrown whenever we are not able to get the lock within the specified wait time. + */ +public class LockTimeoutException extends IOException { + public LockTimeoutException(String message) { + super(message); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java index 3870244f4cf1..86c0c6aee803 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java @@ -26,6 +26,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CallQueueTooBigException; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -70,8 +72,9 @@ public class TestMetaCache { private static final TableName TABLE_NAME = TableName.valueOf("test_table"); private static final byte[] FAMILY = Bytes.toBytes("fam1"); private static final byte[] QUALIFIER = Bytes.toBytes("qual"); - private static HRegionServer badRS; + private static final Log LOG = LogFactory.getLog(TestMetaCache.class); + /** * @throws java.lang.Exception @@ -369,4 +372,76 @@ public void throwOnScan(FakeRSRpcServices rpcServices, ClientProtos.ScanRequest throws ServiceException { } } + + + @Test + public void testUserRegionLockThrowsException() throws IOException, InterruptedException { + ((FakeRSRpcServices)badRS.getRSRpcServices()).setExceptionInjector(new LockSleepInjector()); + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + conf.set(HConstants.HBASE_CLIENT_RETRIES_NUMBER, "1"); + conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "2000"); + try (ConnectionImplementation conn = + (ConnectionImplementation) ConnectionFactory.createConnection(conf)) { + ClientThread client1 = new ClientThread(conn); + ClientThread client2 = new ClientThread(conn); + client1.start(); + client2.start(); + client1.join(); + client2.join(); + // One thread will get the lock but will sleep in LockExceptionInjector#throwOnScan and + // eventually fail since the sleep time is more than hbase client scanner timeout period. + // Other thread will wait to acquire userRegionLock. + // Have no idea which thread will be scheduled first. So need to check both threads. + + // Both the threads will throw exception. One thread will throw exception since after + // acquiring user region lock, it is sleeping for 5 seconds when the scanner time out period + // is 2 seconds. + // Other thread will throw exception since it was not able to get hold of user region lock + // within 2 seconds. + assertNotNull(client1.getException()); + assertNotNull(client2.getException()); + + assertTrue(client1.getException() instanceof LockTimeoutException + ^ client2.getException() instanceof LockTimeoutException); + } + } + + private class ClientThread extends Thread { + private Exception exception; + private ConnectionImplementation connection; + + private ClientThread(ConnectionImplementation connection) { + this.connection = connection; + } + @Override + public void run() { + byte[] currentKey = HConstants.EMPTY_START_ROW; + try { + connection.getRegionLocation(TABLE_NAME, currentKey, true); + } catch (IOException e) { + LOG.error("Thread id: " + this.getId() + " exception: ", e); + this.exception = e; + } + } + public Exception getException() { + return exception; + } + } + + public static class LockSleepInjector extends ExceptionInjector { + @Override + public void throwOnScan(FakeRSRpcServices rpcServices, ClientProtos.ScanRequest request) { + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + LOG.info("Interrupted exception", e); + } + } + + @Override + public void throwOnGet(FakeRSRpcServices rpcServices, ClientProtos.GetRequest request) { } + + @Override + public void throwOnMutate(FakeRSRpcServices rpcServices, ClientProtos.MutateRequest request) { } + } } From 94f2de8e052ae71a652983f593231169b5371b6a Mon Sep 17 00:00:00 2001 From: Rushabh Date: Thu, 27 Aug 2020 08:50:28 -0700 Subject: [PATCH 2/7] [HBASE-24956] Fixing errors pointed by apache hbase bot. --- .../apache/hadoop/hbase/client/LockTimeoutException.java | 2 ++ .../java/org/apache/hadoop/hbase/client/TestMetaCache.java | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java index a193aa322242..297712430700 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import org.apache.yetus.audience.InterfaceAudience; /* Thrown whenever we are not able to get the lock within the specified wait time. */ +@InterfaceAudience.Public public class LockTimeoutException extends IOException { public LockTimeoutException(String message) { super(message); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java index 86c0c6aee803..12ccd899bee3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java @@ -26,8 +26,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CallQueueTooBigException; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -60,6 +58,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.GetResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @Category({MediumTests.class, ClientTests.class}) public class TestMetaCache { @@ -73,7 +73,7 @@ public class TestMetaCache { private static final byte[] FAMILY = Bytes.toBytes("fam1"); private static final byte[] QUALIFIER = Bytes.toBytes("qual"); private static HRegionServer badRS; - private static final Log LOG = LogFactory.getLog(TestMetaCache.class); + private static final Logger LOG = LoggerFactory.getLogger(TestMetaCache.class); /** From 541b33b8cc30c903eb1a89a4109083e87f279d3d Mon Sep 17 00:00:00 2001 From: Rushabh Date: Thu, 27 Aug 2020 14:16:49 -0700 Subject: [PATCH 3/7] [HBASE-24956] Address CR and fix checkstyle warnings. --- .../hadoop/hbase/client/ConnectionConfiguration.java | 7 ++++--- .../apache/hadoop/hbase/client/LockTimeoutException.java | 4 ++-- .../java/org/apache/hadoop/hbase/client/TestMetaCache.java | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java index cb3889ffcb76..979611e9bf88 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java @@ -120,9 +120,10 @@ public class ConnectionConfiguration { this.writeRpcTimeout = conf.getInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, conf.getInt(HConstants.HBASE_RPC_TIMEOUT_KEY, HConstants.DEFAULT_HBASE_RPC_TIMEOUT)); - this.scannerTimeoutPeriod = HBaseConfiguration.getInt(conf, HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, - HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, - HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD); + this.scannerTimeoutPeriod = HBaseConfiguration.getInt(conf, + HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, + HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, + HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD); } /** diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java index 297712430700..b949f0e2ecb9 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/LockTimeoutException.java @@ -18,14 +18,14 @@ */ package org.apache.hadoop.hbase.client; -import java.io.IOException; +import org.apache.hadoop.hbase.HBaseIOException; import org.apache.yetus.audience.InterfaceAudience; /* Thrown whenever we are not able to get the lock within the specified wait time. */ @InterfaceAudience.Public -public class LockTimeoutException extends IOException { +public class LockTimeoutException extends HBaseIOException { public LockTimeoutException(String message) { super(message); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java index 12ccd899bee3..dbc4bf24c9d9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java @@ -406,7 +406,7 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE } } - private class ClientThread extends Thread { + private final class ClientThread extends Thread { private Exception exception; private ConnectionImplementation connection; From d184c89f8d0363aac1802c6d8d4c7973d9e1d5ed Mon Sep 17 00:00:00 2001 From: Rushabh Date: Fri, 28 Aug 2020 14:21:20 -0700 Subject: [PATCH 4/7] [HBASE-24956] Missed hunks. --- .../hbase/client/ConnectionImplementation.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java index 9153a5c5b874..2cf8b50eedd7 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java @@ -865,11 +865,13 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool long pauseBase = this.pause; takeUserRegionLock(); try { - if (useCache) {// re-check cache after get lock - RegionLocations locations = getCachedLocation(tableName, row); - if (locations != null && locations.getRegionLocation(replicaId) != null) { - return locations; - } + // We don't need to check if useCache is enabled or not. Even if useCache is false + // we already cleared the cache for this row before acquiring userRegion lock so if this + // row is present in cache that means some other thread has populated it while we were + // waiting to acquire user region lock. + RegionLocations locations = getCachedLocation(tableName, row); + if (locations != null && locations.getRegionLocation(replicaId) != null) { + return locations; } if (relocateMeta) { relocateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, @@ -892,7 +894,7 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco } tableNotFound = false; // convert the row result into the HRegionLocation we need! - RegionLocations locations = MetaTableAccessor.getRegionLocations(regionInfoRow); + locations = MetaTableAccessor.getRegionLocations(regionInfoRow); if (locations == null || locations.getRegionLocation(replicaId) == null) { throw new IOException("RegionInfo null in " + tableName + ", row=" + regionInfoRow); } From f3210c8908809fa154118454e6a23f57bc222db1 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Sat, 29 Aug 2020 19:55:06 -0700 Subject: [PATCH 5/7] [HBASE-24956] Addressing CR comments. --- .../apache/hadoop/hbase/client/ConnectionConfiguration.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java index 979611e9bf88..cebbb1ac94c7 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java @@ -120,9 +120,7 @@ public class ConnectionConfiguration { this.writeRpcTimeout = conf.getInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, conf.getInt(HConstants.HBASE_RPC_TIMEOUT_KEY, HConstants.DEFAULT_HBASE_RPC_TIMEOUT)); - this.scannerTimeoutPeriod = HBaseConfiguration.getInt(conf, - HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, - HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, + this.scannerTimeoutPeriod = conf.getInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD); } From 57d680a27bfc3b0b4cc5dc4729a2bc2d0529a367 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Wed, 9 Sep 2020 20:47:18 -0700 Subject: [PATCH 6/7] [HBASE-24956] Addressing CR comments. --- .../client/ConnectionImplementation.java | 10 ++++-- .../hadoop/hbase/client/TestMetaCache.java | 33 +++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java index 2cf8b50eedd7..ee991c27c75a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java @@ -863,8 +863,10 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool } // Query the meta region long pauseBase = this.pause; - takeUserRegionLock(); + boolean lockAcquired = false; try { + takeUserRegionLock(); + lockAcquired = true; // We don't need to check if useCache is enabled or not. Even if useCache is false // we already cleared the cache for this row before acquiring userRegion lock so if this // row is present in cache that means some other thread has populated it while we were @@ -959,7 +961,9 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco relocateMeta = !(e instanceof RegionOfflineException || e instanceof NoServerForRegionException); } finally { - userRegionLock.unlock(); + if (lockAcquired) { + userRegionLock.unlock(); + } } try{ Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries)); @@ -970,7 +974,7 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco } } - private void takeUserRegionLock() throws IOException { + void takeUserRegionLock() throws IOException { try { long waitTime = connectionConfig.getScannerTimeoutPeriod(); if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java index dbc4bf24c9d9..c8e14a29e017 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java @@ -22,6 +22,10 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import java.io.IOException; import java.util.ArrayList; @@ -378,8 +382,8 @@ public void throwOnScan(FakeRSRpcServices rpcServices, ClientProtos.ScanRequest public void testUserRegionLockThrowsException() throws IOException, InterruptedException { ((FakeRSRpcServices)badRS.getRSRpcServices()).setExceptionInjector(new LockSleepInjector()); Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); - conf.set(HConstants.HBASE_CLIENT_RETRIES_NUMBER, "1"); - conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "2000"); + conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0); + conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 2000); try (ConnectionImplementation conn = (ConnectionImplementation) ConnectionFactory.createConnection(conf)) { ClientThread client1 = new ClientThread(conn); @@ -444,4 +448,29 @@ public void throwOnGet(FakeRSRpcServices rpcServices, ClientProtos.GetRequest re @Override public void throwOnMutate(FakeRSRpcServices rpcServices, ClientProtos.MutateRequest request) { } } + + @Test + public void testRetriesOnLockTimeoutException() throws Exception { + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + int numRetries = 1; + conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 1000l); + conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, numRetries); + try (ConnectionImplementation connection = + (ConnectionImplementation) ConnectionFactory.createConnection(conf)) { + ConnectionImplementation spyConnection = spy(connection); + // Throw LockTimeouException whenever we try to get user region lock. + doThrow(new LockTimeoutException("lock timeout")).when(spyConnection).takeUserRegionLock(); + + byte[] row = HConstants.EMPTY_START_ROW; + try { + spyConnection.locateRegion(TABLE_NAME, row, false, true, + RegionReplicaUtil.DEFAULT_REPLICA_ID); + fail("shouldn't have reached here."); + } catch (LockTimeoutException e) { + // Ignore since it is expected. + } + // On LockTimeoutException, we should have tried to get lock (numRetries+1) times. + verify(spyConnection, times(numRetries + 1)).takeUserRegionLock(); + } + } } From be6cda65636c5b3138e63aed40c239a1a1561709 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Tue, 15 Sep 2020 14:20:12 -0700 Subject: [PATCH 7/7] [HBASE-24956] Addressing CR comments. --- .../hbase/client/ConnectionConfiguration.java | 10 ------ .../client/ConnectionImplementation.java | 10 ++---- .../hadoop/hbase/client/TestMetaCache.java | 35 ++----------------- 3 files changed, 6 insertions(+), 49 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java index cebbb1ac94c7..e809c4c5eb25 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionConfiguration.java @@ -12,7 +12,6 @@ package org.apache.hadoop.hbase.client; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.yetus.audience.InterfaceAudience; @@ -63,7 +62,6 @@ public class ConnectionConfiguration { private final int writeRpcTimeout; // toggle for async/sync prefetch private final boolean clientScannerAsyncPrefetch; - private final long scannerTimeoutPeriod; /** * Constructor @@ -119,9 +117,6 @@ public class ConnectionConfiguration { this.writeRpcTimeout = conf.getInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, conf.getInt(HConstants.HBASE_RPC_TIMEOUT_KEY, HConstants.DEFAULT_HBASE_RPC_TIMEOUT)); - - this.scannerTimeoutPeriod = conf.getInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, - HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD); } /** @@ -148,7 +143,6 @@ protected ConnectionConfiguration() { this.readRpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; this.writeRpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; this.rpcTimeout = HConstants.DEFAULT_HBASE_RPC_TIMEOUT; - this.scannerTimeoutPeriod = HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD; } public int getReadRpcTimeout() { @@ -214,8 +208,4 @@ public boolean isClientScannerAsyncPrefetch() { public int getRpcTimeout() { return rpcTimeout; } - - public long getScannerTimeoutPeriod() { - return scannerTimeoutPeriod; - } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java index ee991c27c75a..95c75f7f86c9 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java @@ -863,10 +863,8 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool } // Query the meta region long pauseBase = this.pause; - boolean lockAcquired = false; + takeUserRegionLock(); try { - takeUserRegionLock(); - lockAcquired = true; // We don't need to check if useCache is enabled or not. Even if useCache is false // we already cleared the cache for this row before acquiring userRegion lock so if this // row is present in cache that means some other thread has populated it while we were @@ -961,9 +959,7 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco relocateMeta = !(e instanceof RegionOfflineException || e instanceof NoServerForRegionException); } finally { - if (lockAcquired) { - userRegionLock.unlock(); - } + userRegionLock.unlock(); } try{ Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries)); @@ -976,7 +972,7 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco void takeUserRegionLock() throws IOException { try { - long waitTime = connectionConfig.getScannerTimeoutPeriod(); + long waitTime = connectionConfig.getMetaOperationTimeout(); if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) { throw new LockTimeoutException("Failed to get user region lock in" + waitTime + " ms. " + " for accessing meta region server."); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java index c8e14a29e017..9dc896832493 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java @@ -22,10 +22,6 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; import java.io.IOException; import java.util.ArrayList; @@ -79,7 +75,6 @@ public class TestMetaCache { private static HRegionServer badRS; private static final Logger LOG = LoggerFactory.getLogger(TestMetaCache.class); - /** * @throws java.lang.Exception */ @@ -377,13 +372,14 @@ public void throwOnScan(FakeRSRpcServices rpcServices, ClientProtos.ScanRequest } } - @Test public void testUserRegionLockThrowsException() throws IOException, InterruptedException { ((FakeRSRpcServices)badRS.getRSRpcServices()).setExceptionInjector(new LockSleepInjector()); Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0); + conf.setLong(HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT, 2000); conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 2000); + try (ConnectionImplementation conn = (ConnectionImplementation) ConnectionFactory.createConnection(conf)) { ClientThread client1 = new ClientThread(conn); @@ -401,7 +397,7 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE // acquiring user region lock, it is sleeping for 5 seconds when the scanner time out period // is 2 seconds. // Other thread will throw exception since it was not able to get hold of user region lock - // within 2 seconds. + // within meta operation timeout period. assertNotNull(client1.getException()); assertNotNull(client2.getException()); @@ -448,29 +444,4 @@ public void throwOnGet(FakeRSRpcServices rpcServices, ClientProtos.GetRequest re @Override public void throwOnMutate(FakeRSRpcServices rpcServices, ClientProtos.MutateRequest request) { } } - - @Test - public void testRetriesOnLockTimeoutException() throws Exception { - Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); - int numRetries = 1; - conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 1000l); - conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, numRetries); - try (ConnectionImplementation connection = - (ConnectionImplementation) ConnectionFactory.createConnection(conf)) { - ConnectionImplementation spyConnection = spy(connection); - // Throw LockTimeouException whenever we try to get user region lock. - doThrow(new LockTimeoutException("lock timeout")).when(spyConnection).takeUserRegionLock(); - - byte[] row = HConstants.EMPTY_START_ROW; - try { - spyConnection.locateRegion(TABLE_NAME, row, false, true, - RegionReplicaUtil.DEFAULT_REPLICA_ID); - fail("shouldn't have reached here."); - } catch (LockTimeoutException e) { - // Ignore since it is expected. - } - // On LockTimeoutException, we should have tried to get lock (numRetries+1) times. - verify(spyConnection, times(numRetries + 1)).takeUserRegionLock(); - } - } }