-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HDFS-17401. EC: Excess internal block may not be able to be deleted correctly when it's stored in fallback storage #6597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
7b46bfa
547083f
d1b3fbb
a5e2fe4
f02b16d
91bac30
3c6c7d2
1fdac01
5eba3db
299e710
4448529
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ | |
| import org.apache.hadoop.hdfs.protocol.LocatedBlocks; | ||
| import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; | ||
| import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies; | ||
| import org.apache.hadoop.hdfs.protocol.HdfsConstants; | ||
| import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; | ||
| import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped; | ||
| import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; | ||
|
|
@@ -575,5 +576,78 @@ public void testReconstructionWithStorageTypeNotEnough() throws Exception { | |
| cluster.shutdown(); | ||
| } | ||
| } | ||
| @Test | ||
| public void testDeleteOverReplicatedStripedBlock() throws Exception { | ||
| final HdfsConfiguration conf = new HdfsConfiguration(); | ||
| conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1); | ||
| conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, | ||
| false); | ||
| StorageType[][] st = new StorageType[groupSize + 2][1]; | ||
| for (int i = 0; i < st.length-1; i++){ | ||
| st[i] = new StorageType[]{StorageType.SSD}; | ||
| } | ||
| st[st.length -1] = new StorageType[]{StorageType.DISK}; | ||
|
|
||
| cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2) | ||
| .storagesPerDatanode(1) | ||
| .storageTypes(st) | ||
| .build(); | ||
| cluster.waitActive(); | ||
| DistributedFileSystem fs = cluster.getFileSystem(); | ||
| fs.enableErasureCodingPolicy( | ||
| StripedFileTestUtil.getDefaultECPolicy().getName()); | ||
| try { | ||
| fs.mkdirs(dirPath); | ||
| fs.setErasureCodingPolicy(dirPath, | ||
| StripedFileTestUtil.getDefaultECPolicy().getName()); | ||
| fs.setStoragePolicy(dirPath, HdfsConstants.ALLSSD_STORAGE_POLICY_NAME); | ||
| DFSTestUtil.createFile(fs, filePath, | ||
| cellSize * dataBlocks * 2, (short) 1, 0L); | ||
| // Stop a dn | ||
| LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0); | ||
| LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock(); | ||
| DatanodeInfo dnToStop = block.getLocations()[0]; | ||
|
|
||
| MiniDFSCluster.DataNodeProperties dnProp = | ||
| cluster.stopDataNode(dnToStop.getXferAddr()); | ||
| cluster.setDataNodeDead(dnToStop); | ||
|
|
||
| // Wait for reconstruction to happen | ||
| DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000); | ||
|
|
||
| DatanodeInfo dnToStop2 = block.getLocations()[1]; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here may be add |
||
| cluster.setDataNodeDead(dnToStop2); | ||
| DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000); | ||
|
|
||
| // Bring the dn back: 10 internal blocks now | ||
| cluster.restartDataNode(dnProp); | ||
| cluster.waitActive(); | ||
| DFSTestUtil.verifyClientStats(conf, cluster); | ||
|
|
||
| // Currently namenode is able to track the missing block. And restart NN | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here why need restart namenode? Maybe update this logic to avoid using |
||
| cluster.restartNameNode(true); | ||
|
|
||
| for (DataNode dn : cluster.getDataNodes()) { | ||
| DataNodeTestUtils.triggerBlockReport(dn); | ||
| } | ||
|
|
||
| Thread.sleep(3000); // Wait 3 running cycles of redundancy monitor | ||
| for (DataNode dn : cluster.getDataNodes()) { | ||
| DataNodeTestUtils.triggerHeartbeat(dn); | ||
| } | ||
| boolean isDeletedRedundantBlock = true; | ||
| blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0); | ||
| block = (LocatedStripedBlock) blks.getLastLocatedBlock(); | ||
| BitSet bitSet = new BitSet(groupSize); | ||
| for (byte index : block.getBlockIndices()) { | ||
| if(bitSet.get(index)){ | ||
| isDeletedRedundantBlock = false; | ||
| } | ||
| bitSet.set(index); | ||
| } | ||
| assertTrue(isDeletedRedundantBlock); | ||
| } finally { | ||
| cluster.shutdown(); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
About internalExcessTypes, could you explain which case will empty ? thanks~