Skip to content

Commit 36ea9e2

Browse files
committed
HBASE-23275: Track active master's address in ActiveMasterManager (apache#812)
Currently we just track whether an active master exists. It helps to also track the address of the active master in all the masters to help serve the client RPC requests to know which master is active. Signed-off-by: Nick Dimiduk <[email protected]> Signed-off-by: Andrew Purtell <[email protected]> (cherry picked from commit efebb84) (cherry picked from commit 7429491)
1 parent b228e17 commit 36ea9e2

File tree

3 files changed

+60
-6
lines changed

3 files changed

+60
-6
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
*
33
* Licensed to the Apache Software Foundation (ASF) under one
44
* or more contributor license agreements. See the NOTICE file
@@ -17,7 +17,6 @@
1717
* limitations under the License.
1818
*/
1919
package org.apache.hadoop.hbase.master;
20-
2120
import java.io.IOException;
2221
import java.util.concurrent.atomic.AtomicBoolean;
2322

@@ -57,12 +56,18 @@ public class ActiveMasterManager extends ZooKeeperListener {
5756
final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
5857
final AtomicBoolean clusterShutDown = new AtomicBoolean(false);
5958

59+
// This server's information.
6060
private final ServerName sn;
6161
private int infoPort;
6262
private final Server master;
6363

64+
// Active master's server name. Invalidated anytime active master changes (based on ZK
65+
// notifications) and lazily fetched on-demand.
66+
// ServerName is immutable, so we don't need heavy synchronization around it.
67+
private volatile ServerName activeMasterServerName;
68+
6469
/**
65-
* @param watcher
70+
* @param watcher ZK watcher
6671
* @param sn ServerName
6772
* @param master In an instance of a Master.
6873
*/
@@ -106,6 +111,33 @@ void handle(final String path) {
106111
}
107112
}
108113

114+
/**
115+
* Fetches the active master's ServerName from zookeeper.
116+
*/
117+
private void fetchAndSetActiveMasterServerName() {
118+
LOG.debug("Attempting to fetch active master sn from zk");
119+
try {
120+
activeMasterServerName = MasterAddressTracker.getMasterAddress(watcher);
121+
} catch (IOException | KeeperException e) {
122+
// Log and ignore for now and re-fetch later if needed.
123+
LOG.error("Error fetching active master information", e);
124+
}
125+
}
126+
127+
/**
128+
* @return the currently active master as seen by us or null if one does not exist.
129+
*/
130+
public ServerName getActiveMasterServerName() {
131+
if (!clusterHasActiveMaster.get()) {
132+
return null;
133+
}
134+
if (activeMasterServerName == null) {
135+
fetchAndSetActiveMasterServerName();
136+
}
137+
// It could still be null, but return whatever we have.
138+
return activeMasterServerName;
139+
}
140+
109141
/**
110142
* Handle a change in the master node. Doesn't matter whether this was called
111143
* from a nodeCreated or nodeDeleted event because there are no guarantees
@@ -134,6 +166,9 @@ private void handleMasterNodeChange() {
134166
// Notify any thread waiting to become the active master
135167
clusterHasActiveMaster.notifyAll();
136168
}
169+
// Reset the active master sn. Will be re-fetched later if needed.
170+
// We don't want to make a synchronous RPC under a monitor.
171+
activeMasterServerName = null;
137172
}
138173
} catch (KeeperException ke) {
139174
master.abort("Received an unexpected KeeperException, aborting", ke);
@@ -151,8 +186,8 @@ private void handleMasterNodeChange() {
151186
* @param checkInterval the interval to check if the master is stopped
152187
* @param startupStatus the monitor status to track the progress
153188
* @return True if no issue becoming active master else false if another
154-
* master was running or if some other problem (zookeeper, stop flag has been
155-
* set on this Master)
189+
* master was running or if some other problem (zookeeper, stop flag has been
190+
* set on this Master)
156191
*/
157192
boolean blockUntilBecomingActiveMaster(
158193
int checkInterval, MonitoredTask startupStatus) {
@@ -179,9 +214,13 @@ boolean blockUntilBecomingActiveMaster(
179214
startupStatus.setStatus("Successfully registered as active master.");
180215
this.clusterHasActiveMaster.set(true);
181216
LOG.info("Registered Active Master=" + this.sn);
217+
activeMasterServerName = sn;
182218
return true;
183219
}
184220

221+
// Invalidate the active master name so that subsequent requests do not get any stale
222+
// master information. Will be re-fetched if needed.
223+
activeMasterServerName = null;
185224
// There is another active master running elsewhere or this is a restart
186225
// and the master ephemeral node has not expired yet.
187226
this.clusterHasActiveMaster.set(true);
@@ -208,7 +247,8 @@ boolean blockUntilBecomingActiveMaster(
208247
ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
209248

210249
// We may have failed to delete the znode at the previous step, but
211-
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
250+
// we delete the file anyway: a second attempt to delete the znode is likely to fail
251+
// again.
212252
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
213253
} else {
214254
msg = "Another master is the active master, " + currentMaster +

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3434,6 +3434,10 @@ public LoadBalancer getLoadBalancer() {
34343434
return replicationLoadSourceMap;
34353435
}
34363436

3437+
public ServerName getActiveMaster() {
3438+
return activeMasterManager.getActiveMasterServerName();
3439+
}
3440+
34373441
public String getClusterId() {
34383442
if (activeMaster) {
34393443
return super.getClusterId();

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.hadoop.hbase.master;
2020

21+
import static org.junit.Assert.assertEquals;
2122
import static org.junit.Assert.assertFalse;
2223
import static org.junit.Assert.assertNotNull;
2324
import static org.junit.Assert.assertTrue;
@@ -84,6 +85,7 @@ public static void tearDownAfterClass() throws Exception {
8485
ActiveMasterManager activeMasterManager =
8586
dummyMaster.getActiveMasterManager();
8687
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
88+
assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
8789

8890
// First test becoming the active master uninterrupted
8991
MonitoredTask status = Mockito.mock(MonitoredTask.class);
@@ -92,6 +94,7 @@ public static void tearDownAfterClass() throws Exception {
9294
activeMasterManager.blockUntilBecomingActiveMaster(100, status);
9395
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
9496
assertMaster(zk, master);
97+
assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
9598

9699
// Now pretend master restart
97100
DummyMaster secondDummyMaster = new DummyMaster(zk,master);
@@ -101,6 +104,8 @@ public static void tearDownAfterClass() throws Exception {
101104
activeMasterManager.blockUntilBecomingActiveMaster(100, status);
102105
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
103106
assertMaster(zk, master);
107+
assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
108+
assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get());
104109
}
105110

106111
/**
@@ -128,6 +133,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
128133
ActiveMasterManager activeMasterManager =
129134
ms1.getActiveMasterManager();
130135
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
136+
assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
131137

132138
// First test becoming the active master uninterrupted
133139
ClusterStatusTracker clusterStatusTracker =
@@ -137,6 +143,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
137143
Mockito.mock(MonitoredTask.class));
138144
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
139145
assertMaster(zk, firstMasterAddress);
146+
assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
140147

141148
// New manager will now try to become the active master in another thread
142149
WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
@@ -154,6 +161,8 @@ public void testActiveMasterManagerFromZK() throws Exception {
154161
assertTrue(t.manager.clusterHasActiveMaster.get());
155162
// But secondary one should not be the active master
156163
assertFalse(t.isActiveMaster);
164+
// Verify the active master ServerName is populated in standby master.
165+
assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get());
157166

158167
// Close the first server and delete it's master node
159168
ms1.stop("stopping first server");
@@ -181,6 +190,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
181190

182191
assertTrue(t.manager.clusterHasActiveMaster.get());
183192
assertTrue(t.isActiveMaster);
193+
assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get());
184194

185195
LOG.info("Deleting master node");
186196

0 commit comments

Comments
 (0)