From 6f839a1c248655eb6515751deefea57b29d74d61 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 27 Jul 2021 15:13:05 +0100 Subject: [PATCH 1/4] HADOOP-17628. Distcp contract test is really slow with ABFS and S3A; timing out. Changes * subclass can declare whether or not -direct should be default * all tests then switch to that * major cut back on default depth/width of directories * if you set the file size of "scale.test.distcp.file.size.kb" to 0 the large file test case is skipped. * aggressive cutback on all needless mkdir() calls. S3A suite * declares -direct always ABFS suites * deletes superfluous ITestAbfsFileSystemContractSecureDistCp * uses abfs scale test timeout * only runs with -Dscale All these changes bring execution time down from 4 min to 2 min against each store. Change-Id: Ide8c369fb7b97b84c2099e36a8925e183901d9e1 --- .../apache/hadoop/test/GenericTestUtils.java | 4 +- .../contract/s3a/ITestS3AContractDistCp.java | 51 ++++---- .../ITestAbfsFileSystemContractDistCp.java | 9 ++ ...estAbfsFileSystemContractSecureDistCp.java | 49 -------- .../contract/AbstractContractDistCpTest.java | 114 ++++++++++++++---- 5 files changed, 124 insertions(+), 103 deletions(-) delete mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 4f6a6259556c3..2edfce53896ed 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -943,8 +943,8 @@ public static List createFiles(final FileSystem fs, final int fileCount, final int dirCount) throws IOException { return createDirsAndFiles(fs, destDir, depth, fileCount, dirCount, - new ArrayList(fileCount), - new ArrayList(dirCount)); + new ArrayList<>(fileCount), + new ArrayList<>(dirCount)); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java index ae54dfee0004e..bbbc323827262 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java @@ -18,19 +18,18 @@ package org.apache.hadoop.fs.contract.s3a; -import java.io.FileNotFoundException; -import java.io.IOException; - -import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; - import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageStatistics; -import org.apache.hadoop.fs.s3a.FailureInjectionPolicy; import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; +import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER; +import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_DISK; +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; +import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; + /** * Contract test suite covering S3A integration with DistCp. * Uses the block output stream, buffered to disk. This is the @@ -60,41 +59,33 @@ protected Configuration createConfiguration() { } @Override - protected S3AContract createContract(Configuration conf) { - return new S3AContract(conf); + protected boolean directWriteAlways() { + return true; } - /** - * Always inject the delay path in, so if the destination is inconsistent, - * and uses this key, inconsistency triggered. - * @param filepath path string in - * @return path on the remote FS for distcp - * @throws IOException IO failure - */ @Override - protected Path path(final String filepath) throws IOException { - Path path = super.path(filepath); - return new Path(path, FailureInjectionPolicy.DEFAULT_DELAY_KEY_SUBSTRING); + protected S3AContract createContract(Configuration conf) { + return new S3AContract(conf); } @Override - public void testDirectWrite() throws Exception { + public void testDistCpWithIterator() throws Exception { final long renames = getRenameOperationCount(); - super.testDirectWrite(); + super.testDistCpWithIterator(); assertEquals("Expected no renames for a direct write distcp", 0L, getRenameOperationCount() - renames); } + @Override + public void testDirectWrite() throws Exception { + skip("Not needed as all tests are direct by default"); + } + @Override public void testNonDirectWrite() throws Exception { +// ContractTestUtils.skip("disabled for peformance reasons"); final long renames = getRenameOperationCount(); - try { - super.testNonDirectWrite(); - } catch (FileNotFoundException e) { - // We may get this exception when data is written to a DELAY_LISTING_ME - // directory causing verification of the distcp success to fail if - // S3Guard is not enabled - } + super.testNonDirectWrite(); assertEquals("Expected 2 renames for a non-direct write distcp", 2L, getRenameOperationCount() - renames); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java index 0c7db73cf79eb..3f06509241f74 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java @@ -19,16 +19,24 @@ package org.apache.hadoop.fs.azurebfs.contract; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; import org.junit.Assume; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; + /** * Contract test for distCp operation. */ public class ITestAbfsFileSystemContractDistCp extends AbstractContractDistCpTest { private final ABFSContractTestBinding binding; + @Override + protected int getTestTimeoutMillis() { + return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + } + public ITestAbfsFileSystemContractDistCp() throws Exception { binding = new ABFSContractTestBinding(); Assume.assumeTrue(binding.getAuthType() != AuthType.OAuth); @@ -38,6 +46,7 @@ public ITestAbfsFileSystemContractDistCp() throws Exception { public void setup() throws Exception { binding.setup(); super.setup(); + assumeScaleTestsEnabled(binding.getRawConfiguration()); } @Override diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java deleted file mode 100644 index fa77c2e649ce1..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azurebfs.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; - -/** - * Contract test for secure distCP operation. - */ -public class ITestAbfsFileSystemContractSecureDistCp extends AbstractContractDistCpTest { - private final ABFSContractTestBinding binding; - - public ITestAbfsFileSystemContractSecureDistCp() throws Exception { - binding = new ABFSContractTestBinding(); - } - - @Override - public void setup() throws Exception { - binding.setup(); - super.setup(); - } - - @Override - protected Configuration createConfiguration() { - return binding.getRawConfiguration(); - } - - @Override - protected AbfsFileSystemContract createContract(Configuration conf) { - return new AbfsFileSystemContract(conf, true); - } -} diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java index 159338f0d441f..5df4e449e397e 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -77,6 +77,22 @@ public abstract class AbstractContractDistCpTest protected static final int MB = 1024 * 1024; + /** + * Default depth for a directory tree: {@value}. + */ + protected static final int DEFAULT_DEPTH = 3; + + /** + * Default width for a directory tree: {@value}. + * Total dir size is + *
+   *   DEFAULT_WITH^DEFAULT_DEPTH
+   * 
+ * So the duration of a test run grows rapidly with this value. + * This has very significant consequences for object storage runs. + */ + protected static final int DEFAULT_WIDTH = 2; + @Rule public TestName testName = new TestName(); @@ -156,7 +172,6 @@ public void setup() throws Exception { GenericTestUtils.getTestDir().toURI()), testSubDir + "/local")); mkdirs(localFS, localDir); remoteDir = path(testSubDir + "/remote"); - mkdirs(remoteFS, remoteDir); // test teardown does this, but IDE-based test debugging can skip // that teardown; this guarantees the initial state is clean remoteFS.delete(remoteDir, true); @@ -325,6 +340,7 @@ private Job distCpUpdate(final Path srcDir, final Path destDir) .withDeleteMissing(true) .withSyncFolder(true) .withCRC(true) + .withDirectWrite(directWriteAlways()) .withOverwrite(false))); } @@ -378,6 +394,7 @@ public void testTrackDeepDirectoryStructureToRemote() throws Exception { inputDirUnderOutputDir) .withTrackMissing(trackDir) .withSyncFolder(true) + .withDirectWrite(directWriteAlways()) .withOverwrite(false))); lsR("tracked udpate", remoteFS, destDir); @@ -476,7 +493,7 @@ public void testSetJobId() throws Exception { remoteFS.create(new Path(remoteDir, "file1")).close(); DistCpTestUtils .assertRunDistCp(DistCpConstants.SUCCESS, remoteDir.toString(), - localDir.toString(), null, conf); + localDir.toString(), getDefaultCLIOptionsOrNull(), conf); assertNotNull("DistCp job id isn't set", conf.get(CONF_LABEL_DISTCP_JOB_ID)); } @@ -532,13 +549,15 @@ private Path distCpDeepDirectoryStructure(FileSystem srcFS, */ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstDir) throws Exception { + int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, + DEFAULT_DISTCP_SIZE_KB); + if (fileSizeKb < 1) { + skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " too small"); + } initPathFields(srcDir, dstDir); Path largeFile1 = new Path(inputDir, "file1"); Path largeFile2 = new Path(inputDir, "file2"); Path largeFile3 = new Path(inputDir, "file3"); - mkdirs(srcFS, inputDir); - int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, - DEFAULT_DISTCP_SIZE_KB); int fileSizeMb = fileSizeKb / 1024; getLogger().info("{} with file size {}", testName.getMethodName(), fileSizeMb); byte[] data1 = dataset((fileSizeMb + 1) * MB, 33, 43); @@ -549,7 +568,6 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, createFile(srcFS, largeFile3, true, data3); Path target = new Path(dstDir, "outputDir"); runDistCp(inputDir, target); - ContractTestUtils.assertIsDirectory(dstFS, target); verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1); verifyFileContents(dstFS, new Path(target, "inputDir/file2"), data2); verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3); @@ -557,14 +575,18 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, /** * Executes DistCp and asserts that the job finished successfully. - * + * The choice of direct/indirect is based on the value of + * {@link #directWriteAlways()}. * @param src source path * @param dst destination path * @throws Exception if there is a failure */ private void runDistCp(Path src, Path dst) throws Exception { - runDistCp(buildWithStandardOptions( - new DistCpOptions.Builder(Collections.singletonList(src), dst))); + if (directWriteAlways()) { + runDistCpDirectWrite(src, dst); + } else { + runDistCpWithRename(src, dst); + } } /** @@ -612,6 +634,9 @@ public void testDirectWrite() throws Exception { @Test public void testNonDirectWrite() throws Exception { + if (directWriteAlways()) { + skip("not needed"); + } describe("copy file from local to remote without using direct write " + "option"); directWrite(localFS, localDir, remoteFS, remoteDir, false); @@ -623,8 +648,6 @@ public void testDistCpWithIterator() throws Exception { Path source = new Path(remoteDir, "src"); Path dest = new Path(localDir, "dest"); dest = localFS.makeQualified(dest); - mkdirs(remoteFS, source); - verifyPathExists(remoteFS, "", source); GenericTestUtils .createFiles(remoteFS, source, getDepth(), getWidth(), getWidth()); @@ -632,8 +655,9 @@ public void testDistCpWithIterator() throws Exception { GenericTestUtils.LogCapturer log = GenericTestUtils.LogCapturer.captureLogs(SimpleCopyListing.LOG); + String options = "-useiterator -update -delete" + getDefaultCLIOptions(); DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest.toString(), "-useiterator -update -delete", conf); + dest.toString(), options, conf); // Check the target listing was also done using iterator. Assertions.assertThat(log.getOutput()).contains( @@ -644,11 +668,11 @@ public void testDistCpWithIterator() throws Exception { } public int getDepth() { - return 3; + return DEFAULT_DEPTH; } public int getWidth() { - return 10; + return DEFAULT_WIDTH; } private int getTotalFiles() { @@ -659,6 +683,41 @@ private int getTotalFiles() { return totalFiles; } + /** + * Override point: should direct write always be used? + * false by default; enable for stores where rename is slow. + * @return true if direct write should be used in all tests. + */ + protected boolean directWriteAlways() { + return false; + } + + /** + * Return the default options for distcp, including, + * if {@link #directWriteAlways()} is true, + * the -direct option. + * Append or prepend this to string CLIs. + * @return default options. + */ + protected String getDefaultCLIOptions() { + return directWriteAlways() + ? " -direct " + : ""; + } + + /** + * Return the default options for distcp, including, + * if {@link #directWriteAlways()} is true, + * the -direct option, null if there are no + * defaults. + * @return default options. + */ + protected String getDefaultCLIOptionsOrNull() { + return directWriteAlways() + ? " -direct " + : null; + } + /** * Executes a test with support for using direct write option. * @@ -683,7 +742,7 @@ private void directWrite(FileSystem srcFS, Path srcDir, FileSystem dstFS, if (directWrite) { runDistCpDirectWrite(inputDir, target); } else { - runDistCp(inputDir, target); + runDistCpWithRename(inputDir, target); } ContractTestUtils.assertIsDirectory(dstFS, target); lsR("Destination tree after distcp", dstFS, target); @@ -709,6 +768,21 @@ private Job runDistCpDirectWrite(final Path srcDir, final Path destDir) Collections.singletonList(srcDir), destDir) .withDirectWrite(true))); } + /** + * Run distcp srcDir destDir. + * @param srcDir local source directory + * @param destDir remote destination directory + * @return the completed job + * @throws Exception any failure. + */ + private Job runDistCpWithRename(Path srcDir, final Path destDir) + throws Exception { + describe("\nDistcp from " + srcDir + " to " + destDir); + return runDistCp(buildWithStandardOptions( + new DistCpOptions.Builder( + Collections.singletonList(srcDir), destDir) + .withDirectWrite(false))); + } @Test public void testDistCpWithFile() throws Exception { @@ -718,7 +792,6 @@ public void testDistCpWithFile() throws Exception { Path dest = new Path(localDir, "file"); dest = localFS.makeQualified(dest); - mkdirs(remoteFS, remoteDir); mkdirs(localFS, localDir); int len = 4; @@ -729,7 +802,7 @@ public void testDistCpWithFile() throws Exception { verifyPathExists(localFS, "", localDir); DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest.toString(), null, conf); + dest.toString(), getDefaultCLIOptionsOrNull(), conf); Assertions .assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) @@ -739,15 +812,12 @@ public void testDistCpWithFile() throws Exception { @Test public void testDistCpWithUpdateExistFile() throws Exception { - describe("Now update an exist file."); + describe("Now update an existing file."); Path source = new Path(remoteDir, "file"); Path dest = new Path(localDir, "file"); dest = localFS.makeQualified(dest); - mkdirs(remoteFS, remoteDir); - mkdirs(localFS, localDir); - int len = 4; int base = 0x40; byte[] block = dataset(len, base, base + len); @@ -758,7 +828,7 @@ public void testDistCpWithUpdateExistFile() throws Exception { verifyPathExists(remoteFS, "", source); verifyPathExists(localFS, "", dest); DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest.toString(), "-delete -update", conf); + dest.toString(), "-delete -update" + getDefaultCLIOptions(), conf); Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) .hasSize(1); From 8c9d528468f03424dfa16650c0b67b71651c4bf6 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 27 Jul 2021 18:47:37 +0100 Subject: [PATCH 2/4] HADOOP-17628. ITestS3AContractDistCp tweaks Change-Id: I464f2071db2a0b17ac7589ea0ccb9b671d065c65 --- .../contract/s3a/ITestS3AContractDistCp.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java index bbbc323827262..aa7e239143b51 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java @@ -18,18 +18,15 @@ package org.apache.hadoop.fs.contract.s3a; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.StorageStatistics; -import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; - import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; -import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER; -import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_DISK; -import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE; -import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageStatistics; +import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; + /** * Contract test suite covering S3A integration with DistCp. * Uses the block output stream, buffered to disk. This is the @@ -72,8 +69,9 @@ protected S3AContract createContract(Configuration conf) { public void testDistCpWithIterator() throws Exception { final long renames = getRenameOperationCount(); super.testDistCpWithIterator(); - assertEquals("Expected no renames for a direct write distcp", 0L, - getRenameOperationCount() - renames); + assertEquals("Expected no renames for a direct write distcp", + getRenameOperationCount(), + renames); } @Override From de1cfdb43770696929466725e839ffbe7c14883d Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 28 Jul 2021 12:35:26 +0100 Subject: [PATCH 3/4] HADOOP-17628. DistCp contract test speedup. * Address review comments * log IOStats after each test case. Important: as the cached FS retains statistics, the numbers get bigger over time. * HDFS test is now reinstated, as we've identified that most of its long execution time is from the large file upload/download suites. Disable them and its execution time drops from 4m to 30s, which means it can then be used to make sure the contract suite is consistent between HDFS and the object stores. Change-Id: I6d1cf5bf42916035a806fa9e78c003074f8f12b9 --- .../contract/s3a/ITestS3AContractDistCp.java | 7 ---- .../contract/AbstractContractDistCpTest.java | 36 +++++++++++++++---- ...istCp.java => TestHDFSContractDistCp.java} | 13 ++++++- 3 files changed, 41 insertions(+), 15 deletions(-) rename hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/{OptionalTestHDFSContractDistCp.java => TestHDFSContractDistCp.java} (81%) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java index aa7e239143b51..04fa94bf8623b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.contract.s3a; -import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; @@ -74,14 +73,8 @@ public void testDistCpWithIterator() throws Exception { renames); } - @Override - public void testDirectWrite() throws Exception { - skip("Not needed as all tests are direct by default"); - } - @Override public void testNonDirectWrite() throws Exception { -// ContractTestUtils.skip("disabled for peformance reasons"); final long renames = getRenameOperationCount(); super.testNonDirectWrite(); assertEquals("Expected 2 renames for a non-direct write distcp", 2L, diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java index 5df4e449e397e..736dfd592638c 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -18,7 +18,9 @@ package org.apache.hadoop.tools.contract; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID; import java.io.IOException; @@ -170,12 +172,20 @@ public void setup() throws Exception { localDir = localFS.makeQualified(new Path(new Path( GenericTestUtils.getTestDir().toURI()), testSubDir + "/local")); + localFS.delete(localDir, true); mkdirs(localFS, localDir); - remoteDir = path(testSubDir + "/remote"); + Path testSubPath = path(testSubDir); + remoteDir = new Path(testSubPath, "remote"); // test teardown does this, but IDE-based test debugging can skip // that teardown; this guarantees the initial state is clean remoteFS.delete(remoteDir, true); - localFS.delete(localDir, true); + } + + @Override + public void teardown() throws Exception { + // if remote FS supports IOStatistics log it. + logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, getRemoteFS()); + super.teardown(); } /** @@ -550,9 +560,9 @@ private Path distCpDeepDirectoryStructure(FileSystem srcFS, private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstDir) throws Exception { int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, - DEFAULT_DISTCP_SIZE_KB); + getDefaultDistCPSizeKb()); if (fileSizeKb < 1) { - skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " too small"); + skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " is zero"); } initPathFields(srcDir, dstDir); Path largeFile1 = new Path(inputDir, "file1"); @@ -573,6 +583,18 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3); } + /** + * Override point. What is the default distcp size + * for large files if not overridden by + * {@link #SCALE_TEST_DISTCP_FILE_SIZE_KB}. + * If 0 then, unless overridden in the configuration, + * the large file tests will not run. + * @return file size. + */ + protected int getDefaultDistCPSizeKb() { + return DEFAULT_DISTCP_SIZE_KB; + } + /** * Executes DistCp and asserts that the job finished successfully. * The choice of direct/indirect is based on the value of @@ -629,14 +651,14 @@ private static void mkdirs(FileSystem fs, Path dir) throws Exception { @Test public void testDirectWrite() throws Exception { describe("copy file from local to remote using direct write option"); + if (directWriteAlways()) { + skip("not needed as all other tests use the -direct option."); + } directWrite(localFS, localDir, remoteFS, remoteDir, true); } @Test public void testNonDirectWrite() throws Exception { - if (directWriteAlways()) { - skip("not needed"); - } describe("copy file from local to remote without using direct write " + "option"); directWrite(localFS, localDir, remoteFS, remoteDir, false); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/OptionalTestHDFSContractDistCp.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java similarity index 81% rename from hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/OptionalTestHDFSContractDistCp.java rename to hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java index d8c74240794be..61a16b1e816fd 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/OptionalTestHDFSContractDistCp.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java @@ -30,8 +30,9 @@ * Verifies that the HDFS passes all the tests in * {@link AbstractContractDistCpTest}. * As such, it acts as an in-module validation of this contract test itself. + * It does skip the large file test cases for speed. */ -public class OptionalTestHDFSContractDistCp extends AbstractContractDistCpTest { +public class TestHDFSContractDistCp extends AbstractContractDistCpTest { @BeforeClass public static void createCluster() throws IOException { @@ -47,4 +48,14 @@ public static void teardownCluster() throws IOException { protected AbstractFSContract createContract(Configuration conf) { return new HDFSContract(conf); } + + /** + * Turn off the large file tests as they are very slow and there + * are many other distcp to HDFS tests which verify such things. + * @return 0 + */ + @Override + protected int getDefaultDistCPSizeKb() { + return 0; + } } From 6e0e4d90e9bc3864c73df95376f3af5056eb044b Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 30 Jul 2021 19:34:25 +0100 Subject: [PATCH 4/4] HADOOP-17628 change directWriteAlways() method to shouldUseDirectWrite() Change-Id: Ie64af6e109935243cd4c65454cf8ee66ec6fbf58 --- .../contract/s3a/ITestS3AContractDistCp.java | 2 +- .../contract/AbstractContractDistCpTest.java | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java index 04fa94bf8623b..f4f5c176b37f8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java @@ -55,7 +55,7 @@ protected Configuration createConfiguration() { } @Override - protected boolean directWriteAlways() { + protected boolean shouldUseDirectWrite() { return true; } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java index 736dfd592638c..04aeea665c244 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -350,7 +350,7 @@ private Job distCpUpdate(final Path srcDir, final Path destDir) .withDeleteMissing(true) .withSyncFolder(true) .withCRC(true) - .withDirectWrite(directWriteAlways()) + .withDirectWrite(shouldUseDirectWrite()) .withOverwrite(false))); } @@ -404,7 +404,7 @@ public void testTrackDeepDirectoryStructureToRemote() throws Exception { inputDirUnderOutputDir) .withTrackMissing(trackDir) .withSyncFolder(true) - .withDirectWrite(directWriteAlways()) + .withDirectWrite(shouldUseDirectWrite()) .withOverwrite(false))); lsR("tracked udpate", remoteFS, destDir); @@ -598,13 +598,13 @@ protected int getDefaultDistCPSizeKb() { /** * Executes DistCp and asserts that the job finished successfully. * The choice of direct/indirect is based on the value of - * {@link #directWriteAlways()}. + * {@link #shouldUseDirectWrite()}. * @param src source path * @param dst destination path * @throws Exception if there is a failure */ private void runDistCp(Path src, Path dst) throws Exception { - if (directWriteAlways()) { + if (shouldUseDirectWrite()) { runDistCpDirectWrite(src, dst); } else { runDistCpWithRename(src, dst); @@ -651,7 +651,7 @@ private static void mkdirs(FileSystem fs, Path dir) throws Exception { @Test public void testDirectWrite() throws Exception { describe("copy file from local to remote using direct write option"); - if (directWriteAlways()) { + if (shouldUseDirectWrite()) { skip("not needed as all other tests use the -direct option."); } directWrite(localFS, localDir, remoteFS, remoteDir, true); @@ -710,32 +710,32 @@ private int getTotalFiles() { * false by default; enable for stores where rename is slow. * @return true if direct write should be used in all tests. */ - protected boolean directWriteAlways() { + protected boolean shouldUseDirectWrite() { return false; } /** * Return the default options for distcp, including, - * if {@link #directWriteAlways()} is true, + * if {@link #shouldUseDirectWrite()} is true, * the -direct option. * Append or prepend this to string CLIs. * @return default options. */ protected String getDefaultCLIOptions() { - return directWriteAlways() + return shouldUseDirectWrite() ? " -direct " : ""; } /** * Return the default options for distcp, including, - * if {@link #directWriteAlways()} is true, + * if {@link #shouldUseDirectWrite()} is true, * the -direct option, null if there are no * defaults. * @return default options. */ protected String getDefaultCLIOptionsOrNull() { - return directWriteAlways() + return shouldUseDirectWrite() ? " -direct " : null; }