2727import java .util .List ;
2828import java .util .Map ;
2929import java .util .Map .Entry ;
30+ import java .util .Objects ;
3031import java .util .Set ;
3132import java .util .concurrent .ConcurrentNavigableMap ;
3233import java .util .concurrent .ConcurrentSkipListMap ;
4445import org .apache .hadoop .hbase .YouAreDeadException ;
4546import org .apache .hadoop .hbase .client .ClusterConnection ;
4647import org .apache .hadoop .hbase .client .RegionInfo ;
48+ import org .apache .hadoop .hbase .conf .ConfigurationObserver ;
4749import org .apache .hadoop .hbase .ipc .HBaseRpcController ;
4850import org .apache .hadoop .hbase .ipc .RemoteWithExtrasException ;
4951import org .apache .hadoop .hbase .ipc .RpcControllerFactory ;
8890 * only after the handler is fully enabled and has completed the handling.
8991 */
9092@ InterfaceAudience .Private
91- public class ServerManager {
93+ public class ServerManager implements ConfigurationObserver {
9294 public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART =
9395 "hbase.master.wait.on.regionservers.maxtostart" ;
9496
@@ -140,6 +142,9 @@ public class ServerManager {
140142 /** Listeners that are called on server events. */
141143 private List <ServerListener > listeners = new CopyOnWriteArrayList <>();
142144
145+ /** Configured value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY */
146+ private volatile boolean rejectDecommissionedHostsConfig ;
147+
143148 /**
144149 * Constructor.
145150 */
@@ -152,6 +157,35 @@ public ServerManager(final MasterServices master, RegionServerList storage) {
152157 this .connection = master .getClusterConnection ();
153158 this .rpcControllerFactory =
154159 this .connection == null ? null : connection .getRpcControllerFactory ();
160+ rejectDecommissionedHostsConfig = getRejectDecommissionedHostsConfig (c );
161+ }
162+
163+ /**
164+ * Implementation of the ConfigurationObserver interface. We are interested in live-loading the
165+ * configuration value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY
166+ * @param conf Server configuration instance
167+ */
168+ @ Override
169+ public void onConfigurationChange (Configuration conf ) {
170+ final boolean newValue = getRejectDecommissionedHostsConfig (conf );
171+ if (rejectDecommissionedHostsConfig == newValue ) {
172+ // no-op
173+ return ;
174+ }
175+
176+ LOG .info ("Config Reload for RejectDecommissionedHosts. previous value: {}, new value: {}" ,
177+ rejectDecommissionedHostsConfig , newValue );
178+
179+ rejectDecommissionedHostsConfig = newValue ;
180+ }
181+
182+ /**
183+ * Reads the value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY from the config and returns it
184+ * @param conf Configuration instance of the Master
185+ */
186+ public boolean getRejectDecommissionedHostsConfig (Configuration conf ) {
187+ return conf .getBoolean (HConstants .REJECT_DECOMMISSIONED_HOSTS_KEY ,
188+ HConstants .REJECT_DECOMMISSIONED_HOSTS_DEFAULT );
155189 }
156190
157191 /**
@@ -191,11 +225,14 @@ ServerName regionServerStartup(RegionServerStartupRequest request, int versionNu
191225 final String hostname =
192226 request .hasUseThisHostnameInstead () ? request .getUseThisHostnameInstead () : ia .getHostName ();
193227 ServerName sn = ServerName .valueOf (hostname , request .getPort (), request .getServerStartCode ());
228+
229+ // Check if the host should be rejected based on it's decommissioned status
230+ checkRejectableDecommissionedStatus (sn );
231+
194232 checkClockSkew (sn , request .getServerCurrentTime ());
195233 checkIsDead (sn , "STARTUP" );
196234 if (!checkAndRecordNewServer (sn , ServerMetricsBuilder .of (sn , versionNumber , version ))) {
197- LOG .warn (
198- "THIS SHOULD NOT HAPPEN, RegionServerStartup" + " could not record the server: " + sn );
235+ LOG .warn ("THIS SHOULD NOT HAPPEN, RegionServerStartup could not record the server: {}" , sn );
199236 }
200237 storage .started (sn );
201238 return sn ;
@@ -257,6 +294,42 @@ public void regionServerReport(ServerName sn, ServerMetrics sl) throws YouAreDea
257294 updateLastFlushedSequenceIds (sn , sl );
258295 }
259296
297+ /**
298+ * Checks if the Master is configured to reject decommissioned hosts or not. When it's configured
299+ * to do so, any RegionServer trying to join the cluster will have it's host checked against the
300+ * list of hosts of currently decommissioned servers and potentially get prevented from reporting
301+ * for duty; otherwise, we do nothing and we let them pass to the next check. See HBASE-28342 for
302+ * details.
303+ * @param sn The ServerName to check for
304+ * @throws DecommissionedHostRejectedException if the Master is configured to reject
305+ * decommissioned hosts and this host exists in the
306+ * list of the decommissioned servers
307+ */
308+ private void checkRejectableDecommissionedStatus (ServerName sn )
309+ throws DecommissionedHostRejectedException {
310+ LOG .info ("Checking decommissioned status of RegionServer {}" , sn .getServerName ());
311+
312+ // If the Master is not configured to reject decommissioned hosts, return early.
313+ if (!rejectDecommissionedHostsConfig ) {
314+ return ;
315+ }
316+
317+ // Look for a match for the hostname in the list of decommissioned servers
318+ for (ServerName server : getDrainingServersList ()) {
319+ if (Objects .equals (server .getHostname (), sn .getHostname ())) {
320+ // Found a match and master is configured to reject decommissioned hosts, throw exception!
321+ LOG .warn (
322+ "Rejecting RegionServer {} from reporting for duty because Master is configured "
323+ + "to reject decommissioned hosts and this host was marked as such in the past." ,
324+ sn .getServerName ());
325+ throw new DecommissionedHostRejectedException (String .format (
326+ "Host %s exists in the list of decommissioned servers and Master is configured to "
327+ + "reject decommissioned hosts" ,
328+ sn .getHostname ()));
329+ }
330+ }
331+ }
332+
260333 /**
261334 * Check is a server of same host and port already exists, if not, or the existed one got a
262335 * smaller start code, record it.
@@ -597,13 +670,8 @@ public synchronized void moveFromOnlineToDeadServers(final ServerName sn) {
597670 * Remove the server from the drain list.
598671 */
599672 public synchronized boolean removeServerFromDrainList (final ServerName sn ) {
600- // Warn if the server (sn) is not online. ServerName is of the form:
601- // <hostname> , <port> , <startcode>
673+ LOG .info ("Removing server {} from the draining list." , sn );
602674
603- if (!this .isServerOnline (sn )) {
604- LOG .warn ("Server " + sn + " is not currently online. "
605- + "Removing from draining list anyway, as requested." );
606- }
607675 // Remove the server from the draining servers lists.
608676 return this .drainingServers .remove (sn );
609677 }
@@ -613,22 +681,23 @@ public synchronized boolean removeServerFromDrainList(final ServerName sn) {
613681 * @return True if the server is added or the server is already on the drain list.
614682 */
615683 public synchronized boolean addServerToDrainList (final ServerName sn ) {
616- // Warn if the server (sn) is not online. ServerName is of the form:
617- // <hostname> , <port> , <startcode>
618-
619- if (!this .isServerOnline (sn )) {
620- LOG .warn ("Server " + sn + " is not currently online. "
621- + "Ignoring request to add it to draining list." );
684+ // If master is not rejecting decommissioned hosts, warn if the server (sn) is not online.
685+ // However, we want to add servers even if they're not online if the master is configured
686+ // to reject decommissioned hosts
687+ if (!rejectDecommissionedHostsConfig && ! this .isServerOnline (sn )) {
688+ LOG .warn ("Server {} is not currently online. Ignoring request to add it to draining list." ,
689+ sn );
622690 return false ;
623691 }
624- // Add the server to the draining servers lists, if it's not already in
625- // it.
692+
693+ // Add the server to the draining servers lists, if it's not already in it.
626694 if (this .drainingServers .contains (sn )) {
627- LOG .warn ("Server " + sn + " is already in the draining server list."
628- + " Ignoring request to add it again." );
695+ LOG .warn (
696+ "Server {} is already in the draining server list. Ignoring request to add it again.", sn );
629697 return true ;
630698 }
631- LOG .info ("Server " + sn + " added to draining server list." );
699+
700+ LOG .info ("Server {} added to draining server list." , sn );
632701 return this .drainingServers .add (sn );
633702 }
634703
0 commit comments