Skip to content

Commit ea331a6

Browse files
committed
HBASE-25292 Improve InetSocketAddress usage discipline (#2669)
Network identities should be bound late. Remote addresses should be resolved at the last possible moment, just before connect(). Network identity mappings can change, so our code should not inappropriately cache them. Otherwise we might miss a change and fail to operate normally. Revert "HBASE-14544 Allow HConnectionImpl to not refresh the dns on errors" Removes hbase.resolve.hostnames.on.failure and related code. We always resolve hostnames, as late as possible. Preserve InetSocketAddress caching per RPC connection. Avoids potential lookups per Call. Replace InetSocketAddress with Address where used as a map key. If we want to key by hostname and/or resolved address we should be explicit about it. Using Address chooses mapping by hostname and port only. Add metrics for potential nameservice resolution attempts, whenever an InetSocketAddress is instantiated for connect; and metrics for failed resolution, whenever InetSocketAddress#isUnresolved on the new instance is true. * Use ServerName directly to build a stub key * Resolve and cache ISA on a RpcChannel as late as possible, at first call * Remove now invalid unit test TestCIBadHostname We resolve DNS at the latest possible time, at first call, and do not resolve hostnames for creating stubs at all, so this unit test cannot work now. Reviewed-by: Mingliang Liu <liuml07@apache.org> Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent a95c0c6 commit ea331a6

25 files changed

Lines changed: 266 additions & 265 deletions

File tree

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import static org.apache.hadoop.hbase.util.FutureUtils.addListener;
2929

3030
import java.io.IOException;
31+
import java.net.InetAddress;
32+
import java.net.InetSocketAddress;
3133
import java.util.Optional;
3234
import java.util.concurrent.CompletableFuture;
3335
import java.util.concurrent.ConcurrentHashMap;
@@ -74,8 +76,6 @@ class AsyncConnectionImpl implements AsyncConnection {
7476
.setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build(),
7577
10, TimeUnit.MILLISECONDS);
7678

77-
private static final String RESOLVE_HOSTNAME_ON_FAIL_KEY = "hbase.resolve.hostnames.on.failure";
78-
7979
private final Configuration conf;
8080

8181
final AsyncConnectionConfiguration connConf;
@@ -90,8 +90,6 @@ class AsyncConnectionImpl implements AsyncConnection {
9090

9191
final RpcControllerFactory rpcControllerFactory;
9292

93-
private final boolean hostnameCanChange;
94-
9593
private final AsyncRegionLocator locator;
9694

9795
final AsyncRpcRetryingCallerFactory callerFactory;
@@ -134,7 +132,6 @@ public AsyncConnectionImpl(Configuration conf, ConnectionRegistry registry, Stri
134132
}
135133
this.rpcClient = RpcClientFactory.createClient(conf, clusterId, metrics.orElse(null));
136134
this.rpcControllerFactory = RpcControllerFactory.instantiate(conf);
137-
this.hostnameCanChange = conf.getBoolean(RESOLVE_HOSTNAME_ON_FAIL_KEY, true);
138135
this.rpcTimeout =
139136
(int) Math.min(Integer.MAX_VALUE, TimeUnit.NANOSECONDS.toMillis(connConf.getRpcTimeoutNs()));
140137
this.locator = new AsyncRegionLocator(this, RETRY_TIMER);
@@ -250,7 +247,7 @@ private ClientService.Interface createRegionServerStub(ServerName serverName) th
250247

251248
ClientService.Interface getRegionServerStub(ServerName serverName) throws IOException {
252249
return ConcurrentMapUtils.computeIfAbsentEx(rsStubs,
253-
getStubKey(ClientService.getDescriptor().getName(), serverName, hostnameCanChange),
250+
getStubKey(ClientService.getDescriptor().getName(), serverName),
254251
() -> createRegionServerStub(serverName));
255252
}
256253

@@ -264,7 +261,7 @@ private AdminService.Interface createAdminServerStub(ServerName serverName) thro
264261

265262
AdminService.Interface getAdminStub(ServerName serverName) throws IOException {
266263
return ConcurrentMapUtils.computeIfAbsentEx(adminSubs,
267-
getStubKey(AdminService.getDescriptor().getName(), serverName, hostnameCanChange),
264+
getStubKey(AdminService.getDescriptor().getName(), serverName),
268265
() -> createAdminServerStub(serverName));
269266
}
270267

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
import java.io.IOException;
3535
import java.io.InterruptedIOException;
3636
import java.lang.reflect.UndeclaredThrowableException;
37+
import java.net.InetAddress;
38+
import java.net.InetSocketAddress;
3739
import java.util.ArrayList;
3840
import java.util.Collections;
3941
import java.util.Date;
@@ -157,9 +159,6 @@ class ConnectionImplementation implements ClusterConnection, Closeable {
157159
public static final String RETRIES_BY_SERVER_KEY = "hbase.client.retries.by.server";
158160
private static final Logger LOG = LoggerFactory.getLogger(ConnectionImplementation.class);
159161

160-
private static final String RESOLVE_HOSTNAME_ON_FAIL_KEY = "hbase.resolve.hostnames.on.failure";
161-
162-
private final boolean hostnamesCanChange;
163162
private final long pause;
164163
private final long pauseForCQTBE;// pause for CallQueueTooBigException, if specified
165164
// The mode tells if HedgedRead, LoadBalance mode is supported.
@@ -297,7 +296,6 @@ class ConnectionImplementation implements ClusterConnection, Closeable {
297296

298297
boolean shouldListen = conf.getBoolean(HConstants.STATUS_PUBLISHED,
299298
HConstants.STATUS_PUBLISHED_DEFAULT);
300-
this.hostnamesCanChange = conf.getBoolean(RESOLVE_HOSTNAME_ON_FAIL_KEY, true);
301299
Class<? extends ClusterStatusListener.Listener> listenerClass =
302300
conf.getClass(ClusterStatusListener.STATUS_LISTENER_CLASS,
303301
ClusterStatusListener.DEFAULT_STATUS_LISTENER_CLASS,
@@ -476,7 +474,7 @@ public Hbck getHbck(ServerName masterServer) throws IOException {
476474
throw new RegionServerStoppedException(masterServer + " is dead.");
477475
}
478476
String key = getStubKey(MasterProtos.HbckService.BlockingInterface.class.getName(),
479-
masterServer, this.hostnamesCanChange);
477+
masterServer);
480478

481479
return new HBaseHbck(
482480
(MasterProtos.HbckService.BlockingInterface) computeIfAbsentEx(stubs, key, () -> {
@@ -1242,7 +1240,7 @@ private MasterProtos.MasterService.BlockingInterface makeStubNoRetries()
12421240
}
12431241
// Use the security info interface name as our stub key
12441242
String key =
1245-
getStubKey(MasterProtos.MasterService.getDescriptor().getName(), sn, hostnamesCanChange);
1243+
getStubKey(MasterProtos.MasterService.getDescriptor().getName(), sn);
12461244
MasterProtos.MasterService.BlockingInterface stub =
12471245
(MasterProtos.MasterService.BlockingInterface) computeIfAbsentEx(stubs, key, () -> {
12481246
BlockingRpcChannel channel = rpcClient.createBlockingRpcChannel(sn, user, rpcTimeout);
@@ -1290,8 +1288,7 @@ public AdminProtos.AdminService.BlockingInterface getAdmin(ServerName serverName
12901288
if (isDeadServer(serverName)) {
12911289
throw new RegionServerStoppedException(serverName + " is dead.");
12921290
}
1293-
String key = getStubKey(AdminProtos.AdminService.BlockingInterface.class.getName(), serverName,
1294-
this.hostnamesCanChange);
1291+
String key = getStubKey(AdminProtos.AdminService.BlockingInterface.class.getName(), serverName);
12951292
return (AdminProtos.AdminService.BlockingInterface) computeIfAbsentEx(stubs, key, () -> {
12961293
BlockingRpcChannel channel =
12971294
this.rpcClient.createBlockingRpcChannel(serverName, user, rpcTimeout);
@@ -1306,7 +1303,7 @@ public BlockingInterface getClient(ServerName serverName) throws IOException {
13061303
throw new RegionServerStoppedException(serverName + " is dead.");
13071304
}
13081305
String key = getStubKey(ClientProtos.ClientService.BlockingInterface.class.getName(),
1309-
serverName, this.hostnamesCanChange);
1306+
serverName);
13101307
return (ClientProtos.ClientService.BlockingInterface) computeIfAbsentEx(stubs, key, () -> {
13111308
BlockingRpcChannel channel =
13121309
this.rpcClient.createBlockingRpcChannel(serverName, user, rpcTimeout);

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.io.IOException;
2626
import java.lang.reflect.UndeclaredThrowableException;
2727
import java.net.InetAddress;
28+
import java.net.InetSocketAddress;
2829
import java.net.UnknownHostException;
2930
import java.util.Arrays;
3031
import java.util.List;
@@ -216,32 +217,17 @@ public boolean isTableDisabled(TableName tableName) throws IOException {
216217
}
217218

218219
/**
219-
* Return retires + 1. The returned value will be in range [1, Integer.MAX_VALUE].
220+
* Get a unique key for the rpc stub to the given server.
220221
*/
221-
static int retries2Attempts(int retries) {
222-
return Math.max(1, retries == Integer.MAX_VALUE ? Integer.MAX_VALUE : retries + 1);
222+
static String getStubKey(String serviceName, ServerName serverName) {
223+
return String.format("%s@%s", serviceName, serverName);
223224
}
224225

225226
/**
226-
* Get a unique key for the rpc stub to the given server.
227+
* Return retires + 1. The returned value will be in range [1, Integer.MAX_VALUE].
227228
*/
228-
static String getStubKey(String serviceName, ServerName serverName, boolean hostnameCanChange) {
229-
// Sometimes, servers go down and they come back up with the same hostname but a different
230-
// IP address. Force a resolution of the rsHostname by trying to instantiate an
231-
// InetSocketAddress, and this way we will rightfully get a new stubKey.
232-
// Also, include the hostname in the key so as to take care of those cases where the
233-
// DNS name is different but IP address remains the same.
234-
String hostname = serverName.getHostname();
235-
int port = serverName.getPort();
236-
if (hostnameCanChange) {
237-
try {
238-
InetAddress ip = InetAddress.getByName(hostname);
239-
return serviceName + "@" + hostname + "-" + ip.getHostAddress() + ":" + port;
240-
} catch (UnknownHostException e) {
241-
LOG.warn("Can not resolve " + hostname + ", please check your network", e);
242-
}
243-
}
244-
return serviceName + "@" + hostname + ":" + port;
229+
static int retries2Attempts(int retries) {
230+
return Math.max(1, retries == Integer.MAX_VALUE ? Integer.MAX_VALUE : retries + 1);
245231
}
246232

247233
static void checkHasFamilies(Mutation mutation) {

hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ public class MetricsConnection implements StatisticTrackable {
6666
private static final String HEAP_BASE = "heapOccupancy_";
6767
private static final String CACHE_BASE = "cacheDroppingExceptions_";
6868
private static final String UNKNOWN_EXCEPTION = "UnknownException";
69+
private static final String NS_LOOKUPS = "nsLookups";
70+
private static final String NS_LOOKUPS_FAILED = "nsLookupsFailed";
6971
private static final String CLIENT_SVC = ClientService.getDescriptor().getName();
7072

7173
/** A container class for collecting details about the RPC call as it percolates. */
@@ -288,6 +290,8 @@ private static interface NewMetric<T> {
288290
protected final Counter hedgedReadWin;
289291
protected final Histogram concurrentCallsPerServerHist;
290292
protected final Histogram numActionsPerServerHist;
293+
protected final Counter nsLookups;
294+
protected final Counter nsLookupsFailed;
291295

292296
// dynamic metrics
293297

@@ -350,6 +354,8 @@ protected Ratio getRatio() {
350354
"concurrentCallsPerServer", scope));
351355
this.numActionsPerServerHist = registry.histogram(name(MetricsConnection.class,
352356
"numActionsPerServer", scope));
357+
this.nsLookups = registry.counter(name(this.getClass(), NS_LOOKUPS, scope));
358+
this.nsLookupsFailed = registry.counter(name(this.getClass(), NS_LOOKUPS_FAILED, scope));
353359

354360
this.reporter = JmxReporter.forRegistry(this.registry).build();
355361
this.reporter.start();
@@ -518,4 +524,12 @@ public void incrCacheDroppingExceptions(Object exception) {
518524
(exception == null? UNKNOWN_EXCEPTION : exception.getClass().getSimpleName()),
519525
cacheDroppingExceptions, counterFactory).inc();
520526
}
527+
528+
public void incrNsLookups() {
529+
this.nsLookups.inc();
530+
}
531+
532+
public void incrNsLookupsFailed() {
533+
this.nsLookupsFailed.inc();
534+
}
521535
}

0 commit comments

Comments
 (0)