Skip to content

Commit 50caba1

Browse files
committed
HDFS-15207. VolumeScanner skip to scan blocks accessed during recent scan peroid. Contributed by Yang Yun.
1 parent a3f44da commit 50caba1

5 files changed

Lines changed: 80 additions & 0 deletions

File tree

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
846846
public static final int DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT = 21 * 24; // 3 weeks.
847847
public static final String DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND = "dfs.block.scanner.volume.bytes.per.second";
848848
public static final long DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT = 1048576L;
849+
public static final String DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED =
850+
"dfs.block.scanner.skip.recent.accessed";
851+
public static final boolean DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT =
852+
false;
849853
public static final String DFS_DATANODE_TRANSFERTO_ALLOWED_KEY = "dfs.datanode.transferTo.allowed";
850854
public static final boolean DFS_DATANODE_TRANSFERTO_ALLOWED_DEFAULT = true;
851855
public static final String DFS_HEARTBEAT_INTERVAL_KEY = "dfs.heartbeat.interval";

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockScanner.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
package org.apache.hadoop.hdfs.server.datanode;
2020

21+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED;
22+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT;
2123
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND;
2224
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND_DEFAULT;
2325
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY;
@@ -112,6 +114,7 @@ static class Conf {
112114
final long maxStalenessMs;
113115
final long scanPeriodMs;
114116
final long cursorSaveMs;
117+
final boolean skipRecentAccessed;
115118
final Class<? extends ScanResultHandler> resultHandler;
116119

117120
private static long getUnitTestLong(Configuration conf, String key,
@@ -163,6 +166,9 @@ private static long getConfiguredScanPeriodMs(Configuration conf) {
163166
this.cursorSaveMs = Math.max(0L, getUnitTestLong(conf,
164167
INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS,
165168
INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS_DEFAULT));
169+
this.skipRecentAccessed = conf.getBoolean(
170+
DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED,
171+
DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED_DEFAULT);
166172
if (allowUnitTestSettings) {
167173
this.resultHandler = (Class<? extends ScanResultHandler>)
168174
conf.getClass(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@
1919
package org.apache.hadoop.hdfs.server.datanode;
2020

2121
import java.io.DataOutputStream;
22+
import java.io.File;
2223
import java.io.FileNotFoundException;
2324
import java.io.IOException;
25+
import java.nio.file.Files;
26+
import java.nio.file.attribute.BasicFileAttributes;
2427
import java.util.ArrayList;
2528
import java.util.Iterator;
2629
import java.util.LinkedHashSet;
@@ -32,6 +35,7 @@
3235
import com.google.common.cache.Cache;
3336
import com.google.common.cache.CacheBuilder;
3437
import org.apache.hadoop.hdfs.protocol.Block;
38+
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
3539
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
3640
import org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf;
3741
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference;
@@ -540,6 +544,24 @@ private long runLoop(ExtendedBlock suspectBlock) {
540544
this, curBlockIter.getBlockPoolId());
541545
saveBlockIterator(curBlockIter);
542546
return 0;
547+
} else if (conf.skipRecentAccessed) {
548+
// Check the access time of block file to avoid scanning recently
549+
// changed blocks, reducing disk IO.
550+
try {
551+
BlockLocalPathInfo blockLocalPathInfo =
552+
volume.getDataset().getBlockLocalPathInfo(block);
553+
BasicFileAttributes attr = Files.readAttributes(
554+
new File(blockLocalPathInfo.getBlockPath()).toPath(),
555+
BasicFileAttributes.class);
556+
if (System.currentTimeMillis() - attr.lastAccessTime().
557+
to(TimeUnit.MILLISECONDS) < conf.scanPeriodMs) {
558+
return 0;
559+
}
560+
561+
} catch (IOException ioe) {
562+
LOG.debug("Failed to get access time of block {}",
563+
block, ioe);
564+
}
543565
}
544566
}
545567
if (curBlockIter != null) {

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,6 +1586,16 @@
15861586
</description>
15871587
</property>
15881588

1589+
<property>
1590+
<name>dfs.block.scanner.skip.recent.accessed</name>
1591+
<value>false</value>
1592+
<description>
1593+
If this is true, scanner will check the access time of block file to avoid
1594+
scanning blocks accessed during recent scan peroid, reducing disk IO.
1595+
This feature will not work if the DataNode volume has noatime mount option.
1596+
</description>
1597+
</property>
1598+
15891599
<property>
15901600
<name>dfs.datanode.readahead.bytes</name>
15911601
<value>4194304</value>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hdfs.server.datanode;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED;
2021
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY;
2122
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND;
2223
import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_DATANODE_SCAN_PERIOD_MS;
@@ -25,6 +26,7 @@
2526
import static org.junit.Assert.assertEquals;
2627
import static org.junit.Assert.assertTrue;
2728
import static org.junit.Assert.assertFalse;
29+
import static org.junit.Assert.fail;
2830

2931
import java.io.Closeable;
3032
import java.io.File;
@@ -974,4 +976,40 @@ public Boolean get() {
974976
info.blocksScanned = 0;
975977
}
976978
}
979+
980+
@Test
981+
public void testSkipRecentAccessFile() throws Exception {
982+
Configuration conf = new Configuration();
983+
conf.setBoolean(DFS_BLOCK_SCANNER_SKIP_RECENT_ACCESSED, true);
984+
conf.setLong(INTERNAL_DFS_DATANODE_SCAN_PERIOD_MS, 2000L);
985+
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
986+
TestScanResultHandler.class.getName());
987+
final TestContext ctx = new TestContext(conf, 1);
988+
final int totalBlocks = 5;
989+
ctx.createFiles(0, totalBlocks, 4096);
990+
991+
final TestScanResultHandler.Info info =
992+
TestScanResultHandler.getInfo(ctx.volumes.get(0));
993+
synchronized (info) {
994+
info.shouldRun = true;
995+
info.notify();
996+
}
997+
try {
998+
GenericTestUtils.waitFor(() -> {
999+
synchronized (info) {
1000+
return info.blocksScanned > 0;
1001+
}
1002+
}, 10, 500);
1003+
fail("Scan nothing for all files are accessed in last period.");
1004+
} catch (TimeoutException e) {
1005+
LOG.debug("Timeout for all files are accessed in last period.");
1006+
}
1007+
synchronized (info) {
1008+
info.shouldRun = false;
1009+
info.notify();
1010+
}
1011+
assertEquals("Should not scan block accessed in last period",
1012+
0, info.blocksScanned);
1013+
ctx.close();
1014+
}
9771015
}

0 commit comments

Comments
 (0)