|
28 | 28 | import java.util.HashMap; |
29 | 29 | import java.util.List; |
30 | 30 | import java.util.Map; |
| 31 | +import java.util.NavigableSet; |
31 | 32 | import java.util.TreeMap; |
| 33 | +import java.util.TreeSet; |
32 | 34 | import java.util.UUID; |
33 | 35 | import java.util.concurrent.ConcurrentHashMap; |
34 | 36 | import java.util.concurrent.PriorityBlockingQueue; |
|
37 | 39 | import java.util.concurrent.atomic.AtomicBoolean; |
38 | 40 | import java.util.concurrent.atomic.AtomicLong; |
39 | 41 | import java.util.function.Predicate; |
| 42 | +import java.util.stream.Collectors; |
40 | 43 |
|
41 | 44 | import org.apache.commons.lang3.StringUtils; |
42 | 45 | import org.apache.commons.lang3.mutable.MutableBoolean; |
|
54 | 57 | import org.apache.hadoop.hbase.replication.ChainWALEntryFilter; |
55 | 58 | import org.apache.hadoop.hbase.replication.ClusterMarkingEntryFilter; |
56 | 59 | import org.apache.hadoop.hbase.replication.ReplicationEndpoint; |
| 60 | +import org.apache.hadoop.hbase.replication.ReplicationException; |
57 | 61 | import org.apache.hadoop.hbase.replication.ReplicationPeer; |
58 | 62 | import org.apache.hadoop.hbase.replication.ReplicationQueueInfo; |
59 | 63 | import org.apache.hadoop.hbase.replication.ReplicationQueueStorage; |
| 64 | +import org.apache.hadoop.hbase.replication.ReplicationUtils; |
60 | 65 | import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter; |
61 | 66 | import org.apache.hadoop.hbase.replication.WALEntryFilter; |
62 | 67 | import org.apache.hadoop.hbase.util.Threads; |
63 | 68 | import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; |
| 69 | +import org.apache.hadoop.hbase.wal.SyncReplicationWALProvider; |
64 | 70 | import org.apache.hadoop.hbase.wal.WAL.Entry; |
65 | 71 | import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; |
66 | 72 | import org.apache.yetus.audience.InterfaceAudience; |
| 73 | +import org.apache.zookeeper.KeeperException; |
67 | 74 | import org.slf4j.Logger; |
68 | 75 | import org.slf4j.LoggerFactory; |
69 | 76 | import org.apache.hbase.thirdparty.com.google.common.collect.Lists; |
@@ -95,7 +102,7 @@ public class ReplicationSource implements ReplicationSourceInterface { |
95 | 102 | protected ReplicationQueueInfo replicationQueueInfo; |
96 | 103 |
|
97 | 104 | // The manager of all sources to which we ping back our progress |
98 | | - protected ReplicationSourceManager manager; |
| 105 | + ReplicationSourceManager manager; |
99 | 106 | // Should we stop everything? |
100 | 107 | protected Server server; |
101 | 108 | // How long should we sleep for each retry |
@@ -140,8 +147,6 @@ public class ReplicationSource implements ReplicationSourceInterface { |
140 | 147 | protected final ConcurrentHashMap<String, ReplicationSourceShipper> workerThreads = |
141 | 148 | new ConcurrentHashMap<>(); |
142 | 149 |
|
143 | | - private AtomicLong totalBufferUsed; |
144 | | - |
145 | 150 | public static final String WAIT_ON_ENDPOINT_SECONDS = |
146 | 151 | "hbase.replication.wait.on.endpoint.seconds"; |
147 | 152 | public static final int DEFAULT_WAIT_ON_ENDPOINT_SECONDS = 30; |
@@ -193,7 +198,7 @@ public class ReplicationSource implements ReplicationSourceInterface { |
193 | 198 | * @param metrics metrics for replication source |
194 | 199 | */ |
195 | 200 | @Override |
196 | | - public void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager, |
| 201 | + public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager, |
197 | 202 | ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server, |
198 | 203 | String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider, |
199 | 204 | MetricsSource metrics) throws IOException { |
@@ -221,7 +226,6 @@ public void init(Configuration conf, FileSystem fs, ReplicationSourceManager man |
221 | 226 | defaultBandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0); |
222 | 227 | currentBandwidth = getCurrentBandwidth(); |
223 | 228 | this.throttler = new ReplicationThrottler((double) currentBandwidth / 10.0); |
224 | | - this.totalBufferUsed = manager.getTotalBufferUsed(); |
225 | 229 | this.walFileLengthProvider = walFileLengthProvider; |
226 | 230 |
|
227 | 231 | this.abortOnError = this.conf.getBoolean("replication.source.regionserver.abort", |
@@ -408,9 +412,9 @@ protected ReplicationSourceShipper createNewShipper(String walGroupId, |
408 | 412 |
|
409 | 413 | private ReplicationSourceWALReader createNewWALReader(String walGroupId, |
410 | 414 | PriorityBlockingQueue<Path> queue, long startPosition) { |
411 | | - return replicationPeer.getPeerConfig().isSerial() |
412 | | - ? new SerialReplicationSourceWALReader(fs, conf, queue, startPosition, walEntryFilter, this) |
413 | | - : new ReplicationSourceWALReader(fs, conf, queue, startPosition, walEntryFilter, this); |
| 415 | + return replicationPeer.getPeerConfig().isSerial() ? |
| 416 | + new SerialReplicationSourceWALReader(fs, conf, queue, startPosition, walEntryFilter, this) : |
| 417 | + new ReplicationSourceWALReader(fs, conf, queue, startPosition, walEntryFilter, this); |
414 | 418 | } |
415 | 419 |
|
416 | 420 | /** |
@@ -451,11 +455,6 @@ public ReplicationEndpoint getReplicationEndpoint() { |
451 | 455 | return this.replicationEndpoint; |
452 | 456 | } |
453 | 457 |
|
454 | | - @Override |
455 | | - public ReplicationSourceManager getSourceManager() { |
456 | | - return this.manager; |
457 | | - } |
458 | | - |
459 | 458 | @Override |
460 | 459 | public void tryThrottle(int batchSize) throws InterruptedException { |
461 | 460 | checkBandwidthChangeAndResetThrottler(); |
@@ -783,7 +782,7 @@ public void postShipEdits(List<Entry> entries, int batchSize) { |
783 | 782 | throttler.addPushSize(batchSize); |
784 | 783 | } |
785 | 784 | totalReplicatedEdits.addAndGet(entries.size()); |
786 | | - long newBufferUsed = totalBufferUsed.addAndGet(-batchSize); |
| 785 | + long newBufferUsed = manager.getTotalBufferUsed().addAndGet(-batchSize); |
787 | 786 | // Record the new buffer usage |
788 | 787 | this.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed); |
789 | 788 | } |
@@ -818,4 +817,137 @@ void removeWorker(ReplicationSourceShipper worker) { |
818 | 817 | private String logPeerId(){ |
819 | 818 | return "peerId=" + this.getPeerId() + ","; |
820 | 819 | } |
| 820 | + |
| 821 | + @VisibleForTesting |
| 822 | + public void setWALPosition(WALEntryBatch entryBatch) { |
| 823 | + String fileName = entryBatch.getLastWalPath().getName(); |
| 824 | + interruptOrAbortWhenFail(() -> this.queueStorage |
| 825 | + .setWALPosition(server.getServerName(), getQueueId(), fileName, |
| 826 | + entryBatch.getLastWalPosition(), entryBatch.getLastSeqIds())); |
| 827 | + } |
| 828 | + |
| 829 | + @VisibleForTesting |
| 830 | + public void cleanOldWALs(String log, boolean inclusive) { |
| 831 | + NavigableSet<String> walsToRemove = getWalsToRemove(log, inclusive); |
| 832 | + if (walsToRemove.isEmpty()) { |
| 833 | + return; |
| 834 | + } |
| 835 | + // cleanOldWALs may spend some time, especially for sync replication where we may want to |
| 836 | + // remove remote wals as the remote cluster may have already been down, so we do it outside |
| 837 | + // the lock to avoid block preLogRoll |
| 838 | + cleanOldWALs(walsToRemove); |
| 839 | + } |
| 840 | + |
| 841 | + private NavigableSet<String> getWalsToRemove(String log, boolean inclusive) { |
| 842 | + NavigableSet<String> walsToRemove = new TreeSet<>(); |
| 843 | + String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(log); |
| 844 | + try { |
| 845 | + this.queueStorage.getWALsInQueue(this.server.getServerName(), getQueueId()).forEach(wal -> { |
| 846 | + LOG.debug("getWalsToRemove wal {}", wal); |
| 847 | + String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal); |
| 848 | + if (walPrefix.equals(logPrefix)) { |
| 849 | + walsToRemove.add(wal); |
| 850 | + } |
| 851 | + }); |
| 852 | + } catch (ReplicationException e) { |
| 853 | + // Just log the exception here, as the recovered replication source will try to cleanup again. |
| 854 | + LOG.warn("Failed to read wals in queue {}", getQueueId(), e); |
| 855 | + } |
| 856 | + return walsToRemove.headSet(log, inclusive); |
| 857 | + } |
| 858 | + |
| 859 | + private void removeRemoteWALs(String peerId, String remoteWALDir, Collection<String> wals) |
| 860 | + throws IOException { |
| 861 | + Path remoteWALDirForPeer = ReplicationUtils.getPeerRemoteWALDir(remoteWALDir, peerId); |
| 862 | + FileSystem fs = ReplicationUtils.getRemoteWALFileSystem(conf, remoteWALDir); |
| 863 | + for (String wal : wals) { |
| 864 | + Path walFile = new Path(remoteWALDirForPeer, wal); |
| 865 | + try { |
| 866 | + if (!fs.delete(walFile, false) && fs.exists(walFile)) { |
| 867 | + throw new IOException("Can not delete " + walFile); |
| 868 | + } |
| 869 | + } catch (FileNotFoundException e) { |
| 870 | + // Just ignore since this means the file has already been deleted. |
| 871 | + // The javadoc of the FileSystem.delete methods does not specify the behavior of deleting an |
| 872 | + // inexistent file, so here we deal with both, i.e, check the return value of the |
| 873 | + // FileSystem.delete, and also catch FNFE. |
| 874 | + LOG.debug("The remote wal {} has already been deleted?", walFile, e); |
| 875 | + } |
| 876 | + } |
| 877 | + } |
| 878 | + |
| 879 | + private void cleanOldWALs(NavigableSet<String> wals) { |
| 880 | + LOG.debug("Removing {} logs in the list: {}", wals.size(), wals); |
| 881 | + // The intention here is that, we want to delete the remote wal files ASAP as it may effect the |
| 882 | + // failover time if you want to transit the remote cluster from S to A. And the infinite retry |
| 883 | + // is not a problem, as if we can not contact with the remote HDFS cluster, then usually we can |
| 884 | + // not contact with the HBase cluster either, so the replication will be blocked either. |
| 885 | + if (isSyncReplication()) { |
| 886 | + String peerId = getPeerId(); |
| 887 | + String remoteWALDir = replicationPeer.getPeerConfig().getRemoteWALDir(); |
| 888 | + // Filter out the wals need to be removed from the remote directory. Its name should be the |
| 889 | + // special format, and also, the peer id in its name should match the peer id for the |
| 890 | + // replication source. |
| 891 | + List<String> remoteWals = wals.stream().filter(w -> SyncReplicationWALProvider |
| 892 | + .getSyncReplicationPeerIdFromWALName(w).map(peerId::equals).orElse(false)) |
| 893 | + .collect(Collectors.toList()); |
| 894 | + LOG.debug("Removing {} logs from remote dir {} in the list: {}", remoteWals.size(), |
| 895 | + remoteWALDir, remoteWals); |
| 896 | + if (!remoteWals.isEmpty()) { |
| 897 | + for (int sleepMultiplier = 0;;) { |
| 898 | + try { |
| 899 | + removeRemoteWALs(peerId, remoteWALDir, remoteWals); |
| 900 | + break; |
| 901 | + } catch (IOException e) { |
| 902 | + LOG.warn("Failed to delete remote wals from remote dir {} for peer {}", remoteWALDir, |
| 903 | + peerId); |
| 904 | + } |
| 905 | + if (!isSourceActive()) { |
| 906 | + // skip the following operations |
| 907 | + return; |
| 908 | + } |
| 909 | + if (ReplicationUtils.sleepForRetries("Failed to delete remote wals", sleepForRetries, |
| 910 | + sleepMultiplier, maxRetriesMultiplier)) { |
| 911 | + sleepMultiplier++; |
| 912 | + } |
| 913 | + } |
| 914 | + } |
| 915 | + } |
| 916 | + for (String wal : wals) { |
| 917 | + interruptOrAbortWhenFail( |
| 918 | + () -> this.queueStorage.removeWAL(server.getServerName(), getQueueId(), wal)); |
| 919 | + } |
| 920 | + } |
| 921 | + |
| 922 | + public void cleanUpHFileRefs(List<String> files) { |
| 923 | + interruptOrAbortWhenFail(() -> this.queueStorage.removeHFileRefs(getPeerId(), files)); |
| 924 | + } |
| 925 | + |
| 926 | + @FunctionalInterface |
| 927 | + private interface ReplicationQueueOperation { |
| 928 | + void exec() throws ReplicationException; |
| 929 | + } |
| 930 | + |
| 931 | + /** |
| 932 | + * Refresh replication source will terminate the old source first, then the source thread will be |
| 933 | + * interrupted. Need to handle it instead of abort the region server. |
| 934 | + */ |
| 935 | + private void interruptOrAbortWhenFail(ReplicationQueueOperation op) { |
| 936 | + try { |
| 937 | + op.exec(); |
| 938 | + } catch (ReplicationException e) { |
| 939 | + if (e.getCause() != null && e.getCause() instanceof KeeperException.SystemErrorException |
| 940 | + && e.getCause().getCause() != null && e.getCause() |
| 941 | + .getCause() instanceof InterruptedException) { |
| 942 | + // ReplicationRuntimeException(a RuntimeException) is thrown out here. The reason is |
| 943 | + // that thread is interrupted deep down in the stack, it should pass the following |
| 944 | + // processing logic and propagate to the most top layer which can handle this exception |
| 945 | + // properly. In this specific case, the top layer is ReplicationSourceShipper#run(). |
| 946 | + throw new ReplicationRuntimeException( |
| 947 | + "Thread is interrupted, the replication source may be terminated", |
| 948 | + e.getCause().getCause()); |
| 949 | + } |
| 950 | + server.abort("Failed to operate on replication queue", e); |
| 951 | + } |
| 952 | + } |
821 | 953 | } |
0 commit comments