1- /**
1+ /*
22 *
33 * Licensed to the Apache Software Foundation (ASF) under one
44 * or more contributor license agreements. See the NOTICE file
1717 * limitations under the License.
1818 */
1919package org .apache .hadoop .hbase .master ;
20-
2120import java .io .IOException ;
2221import java .util .concurrent .atomic .AtomicBoolean ;
2322
@@ -57,12 +56,18 @@ public class ActiveMasterManager extends ZooKeeperListener {
5756 final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean (false );
5857 final AtomicBoolean clusterShutDown = new AtomicBoolean (false );
5958
59+ // This server's information.
6060 private final ServerName sn ;
6161 private int infoPort ;
6262 private final Server master ;
6363
64+ // Active master's server name. Invalidated anytime active master changes (based on ZK
65+ // notifications) and lazily fetched on-demand.
66+ // ServerName is immutable, so we don't need heavy synchronization around it.
67+ private volatile ServerName activeMasterServerName ;
68+
6469 /**
65- * @param watcher
70+ * @param watcher ZK watcher
6671 * @param sn ServerName
6772 * @param master In an instance of a Master.
6873 */
@@ -106,6 +111,33 @@ void handle(final String path) {
106111 }
107112 }
108113
114+ /**
115+ * Fetches the active master's ServerName from zookeeper.
116+ */
117+ private void fetchAndSetActiveMasterServerName () {
118+ LOG .debug ("Attempting to fetch active master sn from zk" );
119+ try {
120+ activeMasterServerName = MasterAddressTracker .getMasterAddress (watcher );
121+ } catch (IOException | KeeperException e ) {
122+ // Log and ignore for now and re-fetch later if needed.
123+ LOG .error ("Error fetching active master information" , e );
124+ }
125+ }
126+
127+ /**
128+ * @return the currently active master as seen by us or null if one does not exist.
129+ */
130+ public ServerName getActiveMasterServerName () {
131+ if (!clusterHasActiveMaster .get ()) {
132+ return null ;
133+ }
134+ if (activeMasterServerName == null ) {
135+ fetchAndSetActiveMasterServerName ();
136+ }
137+ // It could still be null, but return whatever we have.
138+ return activeMasterServerName ;
139+ }
140+
109141 /**
110142 * Handle a change in the master node. Doesn't matter whether this was called
111143 * from a nodeCreated or nodeDeleted event because there are no guarantees
@@ -134,6 +166,9 @@ private void handleMasterNodeChange() {
134166 // Notify any thread waiting to become the active master
135167 clusterHasActiveMaster .notifyAll ();
136168 }
169+ // Reset the active master sn. Will be re-fetched later if needed.
170+ // We don't want to make a synchronous RPC under a monitor.
171+ activeMasterServerName = null ;
137172 }
138173 } catch (KeeperException ke ) {
139174 master .abort ("Received an unexpected KeeperException, aborting" , ke );
@@ -151,8 +186,8 @@ private void handleMasterNodeChange() {
151186 * @param checkInterval the interval to check if the master is stopped
152187 * @param startupStatus the monitor status to track the progress
153188 * @return True if no issue becoming active master else false if another
154- * master was running or if some other problem (zookeeper, stop flag has been
155- * set on this Master)
189+ * master was running or if some other problem (zookeeper, stop flag has been
190+ * set on this Master)
156191 */
157192 boolean blockUntilBecomingActiveMaster (
158193 int checkInterval , MonitoredTask startupStatus ) {
@@ -179,9 +214,13 @@ boolean blockUntilBecomingActiveMaster(
179214 startupStatus .setStatus ("Successfully registered as active master." );
180215 this .clusterHasActiveMaster .set (true );
181216 LOG .info ("Registered Active Master=" + this .sn );
217+ activeMasterServerName = sn ;
182218 return true ;
183219 }
184220
221+ // Invalidate the active master name so that subsequent requests do not get any stale
222+ // master information. Will be re-fetched if needed.
223+ activeMasterServerName = null ;
185224 // There is another active master running elsewhere or this is a restart
186225 // and the master ephemeral node has not expired yet.
187226 this .clusterHasActiveMaster .set (true );
@@ -208,7 +247,8 @@ boolean blockUntilBecomingActiveMaster(
208247 ZKUtil .deleteNode (this .watcher , this .watcher .getMasterAddressZNode ());
209248
210249 // We may have failed to delete the znode at the previous step, but
211- // we delete the file anyway: a second attempt to delete the znode is likely to fail again.
250+ // we delete the file anyway: a second attempt to delete the znode is likely to fail
251+ // again.
212252 ZNodeClearer .deleteMyEphemeralNodeOnDisk ();
213253 } else {
214254 msg = "Another master is the active master, " + currentMaster +
0 commit comments