Skip to content

Commit de1cfdb

Browse files
committed
HADOOP-17628. DistCp contract test speedup.
* Address review comments * log IOStats after each test case. Important: as the cached FS retains statistics, the numbers get bigger over time. * HDFS test is now reinstated, as we've identified that most of its long execution time is from the large file upload/download suites. Disable them and its execution time drops from 4m to 30s, which means it can then be used to make sure the contract suite is consistent between HDFS and the object stores. Change-Id: I6d1cf5bf42916035a806fa9e78c003074f8f12b9
1 parent 8c9d528 commit de1cfdb

3 files changed

Lines changed: 41 additions & 15 deletions

File tree

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
package org.apache.hadoop.fs.contract.s3a;
2020

21-
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
2221
import static org.apache.hadoop.fs.s3a.Constants.*;
2322
import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS;
2423
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
@@ -74,14 +73,8 @@ public void testDistCpWithIterator() throws Exception {
7473
renames);
7574
}
7675

77-
@Override
78-
public void testDirectWrite() throws Exception {
79-
skip("Not needed as all tests are direct by default");
80-
}
81-
8276
@Override
8377
public void testNonDirectWrite() throws Exception {
84-
// ContractTestUtils.skip("disabled for peformance reasons");
8578
final long renames = getRenameOperationCount();
8679
super.testNonDirectWrite();
8780
assertEquals("Expected 2 renames for a non-direct write distcp", 2L,

hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818

1919
package org.apache.hadoop.tools.contract;
2020

21+
import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO;
2122
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
23+
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel;
2224
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID;
2325

2426
import java.io.IOException;
@@ -170,12 +172,20 @@ public void setup() throws Exception {
170172
localDir =
171173
localFS.makeQualified(new Path(new Path(
172174
GenericTestUtils.getTestDir().toURI()), testSubDir + "/local"));
175+
localFS.delete(localDir, true);
173176
mkdirs(localFS, localDir);
174-
remoteDir = path(testSubDir + "/remote");
177+
Path testSubPath = path(testSubDir);
178+
remoteDir = new Path(testSubPath, "remote");
175179
// test teardown does this, but IDE-based test debugging can skip
176180
// that teardown; this guarantees the initial state is clean
177181
remoteFS.delete(remoteDir, true);
178-
localFS.delete(localDir, true);
182+
}
183+
184+
@Override
185+
public void teardown() throws Exception {
186+
// if remote FS supports IOStatistics log it.
187+
logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, getRemoteFS());
188+
super.teardown();
179189
}
180190

181191
/**
@@ -550,9 +560,9 @@ private Path distCpDeepDirectoryStructure(FileSystem srcFS,
550560
private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
551561
Path dstDir) throws Exception {
552562
int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB,
553-
DEFAULT_DISTCP_SIZE_KB);
563+
getDefaultDistCPSizeKb());
554564
if (fileSizeKb < 1) {
555-
skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " too small");
565+
skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " is zero");
556566
}
557567
initPathFields(srcDir, dstDir);
558568
Path largeFile1 = new Path(inputDir, "file1");
@@ -573,6 +583,18 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
573583
verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3);
574584
}
575585

586+
/**
587+
* Override point. What is the default distcp size
588+
* for large files if not overridden by
589+
* {@link #SCALE_TEST_DISTCP_FILE_SIZE_KB}.
590+
* If 0 then, unless overridden in the configuration,
591+
* the large file tests will not run.
592+
* @return file size.
593+
*/
594+
protected int getDefaultDistCPSizeKb() {
595+
return DEFAULT_DISTCP_SIZE_KB;
596+
}
597+
576598
/**
577599
* Executes DistCp and asserts that the job finished successfully.
578600
* The choice of direct/indirect is based on the value of
@@ -629,14 +651,14 @@ private static void mkdirs(FileSystem fs, Path dir) throws Exception {
629651
@Test
630652
public void testDirectWrite() throws Exception {
631653
describe("copy file from local to remote using direct write option");
654+
if (directWriteAlways()) {
655+
skip("not needed as all other tests use the -direct option.");
656+
}
632657
directWrite(localFS, localDir, remoteFS, remoteDir, true);
633658
}
634659

635660
@Test
636661
public void testNonDirectWrite() throws Exception {
637-
if (directWriteAlways()) {
638-
skip("not needed");
639-
}
640662
describe("copy file from local to remote without using direct write " +
641663
"option");
642664
directWrite(localFS, localDir, remoteFS, remoteDir, false);

hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/OptionalTestHDFSContractDistCp.java renamed to hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@
3030
* Verifies that the HDFS passes all the tests in
3131
* {@link AbstractContractDistCpTest}.
3232
* As such, it acts as an in-module validation of this contract test itself.
33+
* It does skip the large file test cases for speed.
3334
*/
34-
public class OptionalTestHDFSContractDistCp extends AbstractContractDistCpTest {
35+
public class TestHDFSContractDistCp extends AbstractContractDistCpTest {
3536

3637
@BeforeClass
3738
public static void createCluster() throws IOException {
@@ -47,4 +48,14 @@ public static void teardownCluster() throws IOException {
4748
protected AbstractFSContract createContract(Configuration conf) {
4849
return new HDFSContract(conf);
4950
}
51+
52+
/**
53+
* Turn off the large file tests as they are very slow and there
54+
* are many other distcp to HDFS tests which verify such things.
55+
* @return 0
56+
*/
57+
@Override
58+
protected int getDefaultDistCPSizeKb() {
59+
return 0;
60+
}
5061
}

0 commit comments

Comments
 (0)