apache · wchevreuil · Jan 9, 2020 · Jan 2, 2020 · saintstack · Jan 3, 2020
diff --git a/...ver/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/...ver/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -18,6 +18,7 @@
  */
 package org.apache.hadoop.hbase.replication.regionserver;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.ListenableFuture;
 import com.google.common.util.concurrent.Service;
@@ -436,14 +437,30 @@ public String getPeerClusterId() {
   }
 
   @Override
+  @VisibleForTesting
   public Path getCurrentPath() {
-    // only for testing
     for (ReplicationSourceShipperThread worker : workerThreads.values()) {
       if (worker.getCurrentPath() != null) return worker.getCurrentPath();
     }
     return null;
   }
 
+  @VisibleForTesting
+  public Path getLastLoggedPath() {
+    for (ReplicationSourceShipperThread worker : workerThreads.values()) {
+      return worker.getLastLoggedPath();
+    }
+    return null;
+  }
+
+  @VisibleForTesting
+  public long getLastLoggedPosition() {
+    for (ReplicationSourceShipperThread worker : workerThreads.values()) {
+      return worker.getLastLoggedPosition();
+    }
+    return 0;
+  }
+
   private boolean isSourceActive() {
     return !this.stopper.isStopped() && this.sourceRunning;
   }
@@ -478,8 +495,8 @@ public String getStats() {
     for (Map.Entry<String, ReplicationSourceShipperThread> entry : workerThreads.entrySet()) {
       String walGroupId = entry.getKey();
       ReplicationSourceShipperThread worker = entry.getValue();
-      long position = worker.getCurrentPosition();
-      Path currentPath = worker.getCurrentPath();
+      long position = worker.getLastLoggedPosition();
+      Path currentPath = worker.getLastLoggedPath();
       sb.append("walGroup [").append(walGroupId).append("]: ");
       if (currentPath != null) {
         sb.append("currently replicating from: ").append(currentPath).append(" at position: ")
@@ -513,7 +530,7 @@ public class ReplicationSourceShipperThread extends Thread {
     // Last position in the log that we sent to ZooKeeper
     private long lastLoggedPosition = -1;
     // Path of the current log
-    private volatile Path currentPath;
+    private volatile Path lastLoggedPath;
     // Current state of the worker thread
     private WorkerState state;
     ReplicationSourceWALReaderThread entryReader;
@@ -553,21 +570,19 @@ public void run() {
         try {
           WALEntryBatch entryBatch = entryReader.take();
           shipEdits(entryBatch);
-          if (replicationQueueInfo.isQueueRecovered() && entryBatch.getWalEntries().isEmpty()
-              && entryBatch.getLastSeqIds().isEmpty()) {
-            LOG.debug("Finished recovering queue for group " + walGroupId + " of peer "
-                + peerClusterZnode);
+          if (!entryBatch.hasMoreEntries()) {
+            LOG.debug("Finished recovering queue for group "
+                    + walGroupId + " of peer " + peerClusterZnode);
             metrics.incrCompletedRecoveryQueue();
             setWorkerState(WorkerState.FINISHED);
-            continue;
           }
         } catch (InterruptedException e) {
           LOG.trace("Interrupted while waiting for next replication entry batch", e);
           Thread.currentThread().interrupt();
         }
       }
 
-      if (replicationQueueInfo.isQueueRecovered() && getWorkerState() == WorkerState.FINISHED) {
+      if (getWorkerState() == WorkerState.FINISHED) {
         // use synchronize to make sure one last thread will clean the queue
         synchronized (this) {
           Threads.sleep(100);// wait a short while for other worker thread to fully exit
@@ -635,15 +650,13 @@ private void checkBandwidthChangeAndResetThrottler() {
     protected void shipEdits(WALEntryBatch entryBatch) {
       List<Entry> entries = entryBatch.getWalEntries();
       long lastReadPosition = entryBatch.getLastWalPosition();
-      currentPath = entryBatch.getLastWalPath();
+      lastLoggedPath = entryBatch.getLastWalPath();
       int sleepMultiplier = 0;
       if (entries.isEmpty()) {
-        if (lastLoggedPosition != lastReadPosition) {
-          updateLogPosition(lastReadPosition);
-          // if there was nothing to ship and it's not an error
-          // set "ageOfLastShippedOp" to <now> to indicate that we're current
-          metrics.setAgeOfLastShippedOp(EnvironmentEdgeManager.currentTime(), walGroupId);
-        }
+        updateLogPosition(lastReadPosition);
+        // if there was nothing to ship and it's not an error
+        // set "ageOfLastShippedOp" to <now> to indicate that we're current
+        metrics.setAgeOfLastShippedOp(EnvironmentEdgeManager.currentTime(), walGroupId);
         return;
       }
       int currentSize = (int) entryBatch.getHeapSize();
@@ -727,8 +740,7 @@ protected void shipEdits(WALEntryBatch entryBatch) {
     }
 
     private void updateLogPosition(long lastReadPosition) {
-      manager.setPendingShipment(false);
-      manager.logPositionAndCleanOldLogs(currentPath, peerClusterZnode, lastReadPosition,
+      manager.logPositionAndCleanOldLogs(lastLoggedPath, peerClusterZnode, lastReadPosition,
         this.replicationQueueInfo.isQueueRecovered(), false);
       lastLoggedPosition = lastReadPosition;
     }
@@ -740,7 +752,7 @@ public void startup() {
         public void uncaughtException(final Thread t, final Throwable e) {
           RSRpcServices.exitIfOOME(e);
           LOG.error("Unexpected exception in ReplicationSourceWorkerThread," + " currentPath="
-              + getCurrentPath(), e);
+              + getLastLoggedPath(), e);
           stopper.stop("Unexpected exception in ReplicationSourceWorkerThread");
         }
       };
@@ -881,8 +893,12 @@ public Path getCurrentPath() {
       return this.entryReader.getCurrentPath();
     }
 
-    public long getCurrentPosition() {
-      return this.lastLoggedPosition;
+    public Path getLastLoggedPath() {
+      return lastLoggedPath;
+    }
+
+    public long getLastLoggedPosition() {
+      return lastLoggedPosition;
     }
 
     private boolean isWorkerActive() {

diff --git a/.../main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/.../main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
@@ -120,7 +120,6 @@ public class ReplicationSourceManager implements ReplicationListener {
   private final Random rand;
   private final boolean replicationForBulkLoadDataEnabled;
 
-  private boolean pendingShipment;
 
   /**
    * Creates a replication manager and sets the watch on all the other registered region servers
@@ -188,19 +187,13 @@ public ReplicationSourceManager(final ReplicationQueues replicationQueues,
    * @param holdLogInZK if true then the log is retained in ZK
    */
   public synchronized void logPositionAndCleanOldLogs(Path log, String id, long position,
-      boolean queueRecovered, boolean holdLogInZK) {
-    if (!this.pendingShipment) {
-      String fileName = log.getName();
-      this.replicationQueues.setLogPosition(id, fileName, position);
-      if (holdLogInZK) {
-        return;
-      }
-      cleanOldLogs(fileName, id, queueRecovered);
+    boolean queueRecovered, boolean holdLogInZK) {
+    String fileName = log.getName();
+    this.replicationQueues.setLogPosition(id, fileName, position);
+    if (holdLogInZK) {
+      return;
     }
-  }
-
-  public synchronized void setPendingShipment(boolean pendingShipment) {
-    this.pendingShipment = pendingShipment;
+    cleanOldLogs(fileName, id, queueRecovered);
   }
 
   /**