apache · virajjasani · Sep 29, 2023 · Sep 11, 2023 · Sep 12, 2023 · Sep 12, 2023
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcConnection.java
@@ -30,6 +30,7 @@
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.io.crypto.tls.X509Util;
 import org.apache.hadoop.hbase.ipc.BufferCallBeforeInitHandler.BufferCallEvent;
 import org.apache.hadoop.hbase.ipc.HBaseRpcController.CancellationCallback;
@@ -347,7 +348,7 @@ public void operationComplete(ChannelFuture future) throws Exception {
   private void sendRequest0(Call call, HBaseRpcController hrc) throws IOException {
     assert eventLoop.inEventLoop();
     if (reloginInProgress) {
-      throw new IOException("Can not send request because relogin is in progress.");
+      throw new IOException(HConstants.RELOGIN_IS_IN_PROGRESS);
     }
     hrc.notifyOnCancel(new RpcCallback<Object>() {
 

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -91,6 +91,9 @@ public final class HConstants {
   /** Just an array of bytes of the right size. */
   public static final byte[] HFILEBLOCK_DUMMY_HEADER = new byte[HFILEBLOCK_HEADER_SIZE];
 
+  public static final String RELOGIN_IS_IN_PROGRESS =
+    "Can not send request because relogin is in progress.";
+
   // End HFileBlockConstants.
 
   /**

diff --git a/...-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java b/...-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
@@ -22,8 +22,10 @@
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import javax.security.sasl.SaslException;
 import org.apache.hadoop.hbase.CallQueueTooBigException;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
 import org.apache.hadoop.hbase.client.RegionInfo;
@@ -306,6 +308,10 @@ private boolean scheduleForRetry(IOException e) {
           serverName, e.toString(), numberOfAttemptsSoFar);
         return false;
       }
+      if (isSaslError(e) && numberOfAttemptsSoFar == 0) {
+        LOG.warn("{} is not reachable; give up after first attempt", serverName, e);
+        return false;
+      }
       if (e instanceof RegionServerAbortedException || e instanceof RegionServerStoppedException) {
         // A better way is to return true here to let the upper layer quit, and then schedule a
         // background task to check whether the region server is dead. And if it is dead, call
@@ -330,6 +336,52 @@ private boolean scheduleForRetry(IOException e) {
       return true;
     }
 
+    private boolean isSaslError(IOException e) {
+      if (
+        e instanceof SaslException
+          || (e.getMessage() != null && e.getMessage().contains(HConstants.RELOGIN_IS_IN_PROGRESS))
+      ) {
+        return true;
+      }
+      // check 4 level of cause
+      Throwable cause = e.getCause();
+      if (cause == null) {
+        return false;
+      }
+      if (isSaslError(cause)) {
+        return true;
+      }
+      cause = cause.getCause();
+      if (cause == null) {
+        return false;
+      }
+      if (isSaslError(cause)) {
+        return true;
+      }
+      cause = cause.getCause();
+      if (cause == null) {
+        return false;
+      }
+      if (isSaslError(cause)) {
+        return true;
+      }
+      cause = cause.getCause();
+      if (cause == null) {
+        return false;
+      }
+      return isSaslError(cause);
+    }
+
+    private boolean isSaslError(Throwable cause) {
+      if (cause instanceof IOException) {
+        IOException unwrappedException = unwrapException((IOException) cause);
+        return unwrappedException instanceof SaslException
+          || (unwrappedException.getMessage() != null
+            && unwrappedException.getMessage().contains(HConstants.RELOGIN_IS_IN_PROGRESS));
+      }
+      return false;
+    }
+
     private long getMaxWaitTime() {
       if (this.maxWaitTime < 0) {
         // This is the max attempts, not retries, so it should be at least 1.