Skip to content

Commit c74fadd

Browse files
committed
HBASE-27871 Meta replication stuck forever if wal it's still reading gets rolled and deleted (apache#5271)
Signed-off-by: Peter Somogyi <[email protected]> (cherry picked from commit 91627ce)
1 parent 315df73 commit c74fadd

3 files changed

Lines changed: 57 additions & 1 deletion

File tree

hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.concurrent.LinkedBlockingQueue;
2525
import java.util.concurrent.PriorityBlockingQueue;
2626
import java.util.concurrent.TimeUnit;
27+
import java.util.concurrent.atomic.AtomicBoolean;
2728
import java.util.concurrent.atomic.AtomicLong;
2829
import org.apache.hadoop.conf.Configuration;
2930
import org.apache.hadoop.fs.FileSystem;
@@ -79,6 +80,8 @@ class ReplicationSourceWALReader extends Thread {
7980
private long totalBufferQuota;
8081
private final String walGroupId;
8182

83+
AtomicBoolean waitingPeerEnabled = new AtomicBoolean(false);
84+
8285
/**
8386
* Creates a reader worker for a given WAL queue. Reads WAL entries off a given queue, batches the
8487
* entries, and puts them on a batch queue.
@@ -130,8 +133,11 @@ public void run() {
130133
while (isReaderRunning()) { // loop here to keep reusing stream while we can
131134
batch = null;
132135
if (!source.isPeerEnabled()) {
136+
waitingPeerEnabled.set(true);
133137
Threads.sleep(sleepForRetries);
134138
continue;
139+
} else {
140+
waitingPeerEnabled.set(false);
135141
}
136142
if (!checkQuota()) {
137143
continue;

hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryStream.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.io.Closeable;
2121
import java.io.FileNotFoundException;
2222
import java.io.IOException;
23+
import java.io.UncheckedIOException;
2324
import java.util.OptionalLong;
2425
import java.util.concurrent.PriorityBlockingQueue;
2526
import org.apache.hadoop.conf.Configuration;
@@ -386,7 +387,10 @@ private void resetReader() throws IOException {
386387
if (archivedLog != null) {
387388
openReader(archivedLog);
388389
} else {
389-
throw fnfe;
390+
// For now, this could happen only when reading meta wal for meta replicas.
391+
// In this case, raising UncheckedIOException will let the endpoint deal with resetting
392+
// the replication source. See HBASE-27871.
393+
throw new UncheckedIOException(fnfe);
390394
}
391395
} catch (NullPointerException npe) {
392396
throw new IOException("NPE resetting reader, likely HDFS-4380", npe);

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Arrays;
3030
import java.util.List;
3131
import java.util.Objects;
32+
import org.apache.commons.lang3.mutable.MutableObject;
3233
import org.apache.hadoop.conf.Configuration;
3334
import org.apache.hadoop.hbase.Cell;
3435
import org.apache.hadoop.hbase.CellScanner;
@@ -53,6 +54,7 @@
5354
import org.apache.hadoop.hbase.regionserver.HRegionServer;
5455
import org.apache.hadoop.hbase.regionserver.Region;
5556
import org.apache.hadoop.hbase.regionserver.RegionScanner;
57+
import org.apache.hadoop.hbase.replication.ReplicationPeerImpl;
5658
import org.apache.hadoop.hbase.testclassification.LargeTests;
5759
import org.apache.hadoop.hbase.util.Bytes;
5860
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
@@ -225,6 +227,50 @@ public void testCatalogReplicaReplicationWithFlushAndCompaction() throws Excepti
225227
}
226228
}
227229

230+
@Test
231+
public void testCatalogReplicaReplicationWALRolledAndDeleted() throws Exception {
232+
TableName tableName = TableName.valueOf("hbase:meta");
233+
try (Connection connection = ConnectionFactory.createConnection(HTU.getConfiguration());
234+
Table table = connection.getTable(tableName)) {
235+
MiniHBaseCluster cluster = HTU.getHBaseCluster();
236+
cluster.getMaster().balanceSwitch(false);
237+
HRegionServer hrs = cluster.getRegionServer(cluster.getServerHoldingMeta());
238+
ReplicationSource source = (ReplicationSource) hrs.getReplicationSourceService()
239+
.getReplicationManager().catalogReplicationSource.get();
240+
((ReplicationPeerImpl) source.replicationPeer).setPeerState(false);
241+
// there's small chance source reader has passed the peer state check but not yet read the
242+
// wal, which could allow it to read some added entries before the wal gets deleted,
243+
// so we are making sure here we only proceed once the reader loop has managed to
244+
// detect the peer is disabled.
245+
HTU.waitFor(2000, 100, true, () -> {
246+
MutableObject<Boolean> readerWaiting = new MutableObject<>(true);
247+
source.logQueue.getQueues().keySet()
248+
.forEach(w -> readerWaiting.setValue(readerWaiting.getValue()
249+
&& source.workerThreads.get(w).entryReader.waitingPeerEnabled.get()));
250+
return readerWaiting.getValue();
251+
});
252+
// load the data to the table
253+
for (int i = 0; i < 5; i++) {
254+
LOG.info("Writing data from " + i * 1000 + " to " + (i * 1000 + 1000));
255+
HTU.loadNumericRows(table, HConstants.CATALOG_FAMILY, i * 1000, i * 1000 + 1000);
256+
LOG.info("flushing table");
257+
HTU.flush(tableName);
258+
LOG.info("compacting table");
259+
if (i < 4) {
260+
HTU.compact(tableName, false);
261+
}
262+
}
263+
HTU.getHBaseCluster().getMaster().getLogCleaner().runCleaner();
264+
((ReplicationPeerImpl) source.replicationPeer).setPeerState(true);
265+
// now loads more data without flushing nor compacting
266+
for (int i = 5; i < 10; i++) {
267+
LOG.info("Writing data from " + i * 1000 + " to " + (i * 1000 + 1000));
268+
HTU.loadNumericRows(table, HConstants.CATALOG_FAMILY, i * 1000, i * 1000 + 1000);
269+
}
270+
verifyReplication(tableName, numOfMetaReplica, 0, 10000, HConstants.CATALOG_FAMILY);
271+
}
272+
}
273+
228274
@Test
229275
public void testCatalogReplicaReplicationWithReplicaMoved() throws Exception {
230276
MiniHBaseCluster cluster = HTU.getMiniHBaseCluster();

0 commit comments

Comments
 (0)