Skip to content

Commit f701d7b

Browse files
brfrn169S O'Donnell
authored andcommitted
HDFS-15386. ReplicaNotFoundException keeps happening in DN after removing multiple DN's data directories (#2052)
Contributed by Toshihiro Suzuki. (cherry picked from commit 545a0a1) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java (cherry picked from commit e5a02a7)
1 parent f30abb3 commit f701d7b

2 files changed

Lines changed: 96 additions & 10 deletions

File tree

  • hadoop-hdfs-project/hadoop-hdfs/src

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,8 @@ public void removeVolumes(
557557
// Unlike updating the volumeMap in addVolume(), this operation does
558558
// not scan disks.
559559
for (String bpid : volumeMap.getBlockPoolList()) {
560-
List<ReplicaInfo> blocks = new ArrayList<>();
560+
List<ReplicaInfo> blocks = blkToInvalidate
561+
.computeIfAbsent(bpid, (k) -> new ArrayList<>());
561562
for (Iterator<ReplicaInfo> it =
562563
volumeMap.replicas(bpid).iterator(); it.hasNext();) {
563564
ReplicaInfo block = it.next();
@@ -570,9 +571,7 @@ public void removeVolumes(
570571
it.remove();
571572
}
572573
}
573-
blkToInvalidate.put(bpid, blocks);
574574
}
575-
576575
storageToRemove.add(sd.getStorageUuid());
577576
storageLocationsToRemove.remove(sdLocation);
578577
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java

Lines changed: 94 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import com.google.common.base.Supplier;
2121
import com.google.common.collect.Lists;
2222

23+
import java.io.OutputStream;
2324
import java.nio.file.Files;
2425
import java.nio.file.Paths;
2526
import org.apache.commons.io.FileUtils;
@@ -105,6 +106,8 @@
105106
import static org.mockito.Mockito.doThrow;
106107
import static org.mockito.Mockito.mock;
107108
import static org.mockito.Mockito.spy;
109+
import static org.mockito.Mockito.times;
110+
import static org.mockito.Mockito.verify;
108111
import static org.mockito.Mockito.when;
109112

110113
import org.slf4j.Logger;
@@ -264,16 +267,24 @@ public void testAddVolumeWithSameStorageUuid() throws IOException {
264267
}
265268

266269
@Test(timeout = 30000)
267-
public void testRemoveVolumes() throws IOException {
270+
public void testRemoveOneVolume() throws IOException {
268271
// Feed FsDataset with block metadata.
269-
final int NUM_BLOCKS = 100;
270-
for (int i = 0; i < NUM_BLOCKS; i++) {
271-
String bpid = BLOCK_POOL_IDS[NUM_BLOCKS % BLOCK_POOL_IDS.length];
272+
final int numBlocks = 100;
273+
for (int i = 0; i < numBlocks; i++) {
274+
String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
272275
ExtendedBlock eb = new ExtendedBlock(bpid, i);
273-
try (ReplicaHandler replica =
274-
dataset.createRbw(StorageType.DEFAULT, null, eb, false)) {
276+
ReplicaHandler replica = null;
277+
try {
278+
replica = dataset.createRbw(StorageType.DEFAULT, null, eb,
279+
false);
280+
} finally {
281+
if (replica != null) {
282+
replica.close();
283+
}
275284
}
276285
}
286+
287+
// Remove one volume
277288
final String[] dataDirs =
278289
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
279290
final String volumePathToRemove = dataDirs[0];
@@ -296,6 +307,11 @@ public void testRemoveVolumes() throws IOException {
296307
assertEquals("The volume has been removed from the storageMap.",
297308
expectedNumVolumes, dataset.storageMap.size());
298309

310+
// DataNode.notifyNamenodeDeletedBlock() should be called 50 times
311+
// as we deleted one volume that has 50 blocks
312+
verify(datanode, times(50))
313+
.notifyNamenodeDeletedBlock(any(), any());
314+
299315
try {
300316
dataset.asyncDiskService.execute(volumeToRemove,
301317
new Runnable() {
@@ -313,10 +329,81 @@ public void run() {}
313329
totalNumReplicas += dataset.volumeMap.size(bpid);
314330
}
315331
assertEquals("The replica infos on this volume has been removed from the "
316-
+ "volumeMap.", NUM_BLOCKS / NUM_INIT_VOLUMES,
332+
+ "volumeMap.", numBlocks / NUM_INIT_VOLUMES,
317333
totalNumReplicas);
318334
}
319335

336+
@Test(timeout = 30000)
337+
public void testRemoveTwoVolumes() throws IOException {
338+
// Feed FsDataset with block metadata.
339+
final int numBlocks = 100;
340+
for (int i = 0; i < numBlocks; i++) {
341+
String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
342+
ExtendedBlock eb = new ExtendedBlock(bpid, i);
343+
ReplicaHandler replica = null;
344+
try {
345+
replica = dataset.createRbw(StorageType.DEFAULT, null, eb,
346+
false);
347+
} finally {
348+
if (replica != null) {
349+
replica.close();
350+
}
351+
}
352+
}
353+
354+
// Remove two volumes
355+
final String[] dataDirs =
356+
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
357+
Set<StorageLocation> volumesToRemove = new HashSet<>();
358+
volumesToRemove.add(StorageLocation.parse(dataDirs[0]));
359+
volumesToRemove.add(StorageLocation.parse(dataDirs[1]));
360+
361+
FsVolumeReferences volReferences = dataset.getFsVolumeReferences();
362+
Set<FsVolumeImpl> volumes = new HashSet<>();
363+
for (FsVolumeSpi vol: volReferences) {
364+
for (StorageLocation volume : volumesToRemove) {
365+
if (vol.getStorageLocation().equals(volume)) {
366+
volumes.add((FsVolumeImpl) vol);
367+
}
368+
}
369+
}
370+
assertEquals(2, volumes.size());
371+
volReferences.close();
372+
373+
dataset.removeVolumes(volumesToRemove, true);
374+
int expectedNumVolumes = dataDirs.length - 2;
375+
assertEquals("The volume has been removed from the volumeList.",
376+
expectedNumVolumes, getNumVolumes());
377+
assertEquals("The volume has been removed from the storageMap.",
378+
expectedNumVolumes, dataset.storageMap.size());
379+
380+
// DataNode.notifyNamenodeDeletedBlock() should be called 100 times
381+
// as we deleted 2 volumes that have 100 blocks totally
382+
verify(datanode, times(100))
383+
.notifyNamenodeDeletedBlock(any(), any());
384+
385+
for (FsVolumeImpl volume : volumes) {
386+
try {
387+
dataset.asyncDiskService.execute(volume,
388+
new Runnable() {
389+
@Override
390+
public void run() {}
391+
});
392+
fail("Expect RuntimeException: the volume has been removed from the "
393+
+ "AsyncDiskService.");
394+
} catch (RuntimeException e) {
395+
GenericTestUtils.assertExceptionContains("Cannot find volume", e);
396+
}
397+
}
398+
399+
int totalNumReplicas = 0;
400+
for (String bpid : dataset.volumeMap.getBlockPoolList()) {
401+
totalNumReplicas += dataset.volumeMap.size(bpid);
402+
}
403+
assertEquals("The replica infos on this volume has been removed from the "
404+
+ "volumeMap.", 0, totalNumReplicas);
405+
}
406+
320407
@Test(timeout = 5000)
321408
public void testRemoveNewlyAddedVolume() throws IOException {
322409
final int numExistingVolumes = getNumVolumes();

0 commit comments

Comments
 (0)