diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 4f6a6259556c3..2edfce53896ed 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -943,8 +943,8 @@ public static List createFiles(final FileSystem fs, final int fileCount, final int dirCount) throws IOException { return createDirsAndFiles(fs, destDir, depth, fileCount, dirCount, - new ArrayList(fileCount), - new ArrayList(dirCount)); + new ArrayList<>(fileCount), + new ArrayList<>(dirCount)); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java index ae54dfee0004e..f4f5c176b37f8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java @@ -18,17 +18,12 @@ package org.apache.hadoop.fs.contract.s3a; -import java.io.FileNotFoundException; -import java.io.IOException; - import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageStatistics; -import org.apache.hadoop.fs.s3a.FailureInjectionPolicy; import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; /** @@ -60,41 +55,28 @@ protected Configuration createConfiguration() { } @Override - protected S3AContract createContract(Configuration conf) { - return new S3AContract(conf); + protected boolean shouldUseDirectWrite() { + return true; } - /** - * Always inject the delay path in, so if the destination is inconsistent, - * and uses this key, inconsistency triggered. - * @param filepath path string in - * @return path on the remote FS for distcp - * @throws IOException IO failure - */ @Override - protected Path path(final String filepath) throws IOException { - Path path = super.path(filepath); - return new Path(path, FailureInjectionPolicy.DEFAULT_DELAY_KEY_SUBSTRING); + protected S3AContract createContract(Configuration conf) { + return new S3AContract(conf); } @Override - public void testDirectWrite() throws Exception { + public void testDistCpWithIterator() throws Exception { final long renames = getRenameOperationCount(); - super.testDirectWrite(); - assertEquals("Expected no renames for a direct write distcp", 0L, - getRenameOperationCount() - renames); + super.testDistCpWithIterator(); + assertEquals("Expected no renames for a direct write distcp", + getRenameOperationCount(), + renames); } @Override public void testNonDirectWrite() throws Exception { final long renames = getRenameOperationCount(); - try { - super.testNonDirectWrite(); - } catch (FileNotFoundException e) { - // We may get this exception when data is written to a DELAY_LISTING_ME - // directory causing verification of the distcp success to fail if - // S3Guard is not enabled - } + super.testNonDirectWrite(); assertEquals("Expected 2 renames for a non-direct write distcp", 2L, getRenameOperationCount() - renames); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java index 0c7db73cf79eb..3f06509241f74 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java @@ -19,16 +19,24 @@ package org.apache.hadoop.fs.azurebfs.contract; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; import org.junit.Assume; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; + /** * Contract test for distCp operation. */ public class ITestAbfsFileSystemContractDistCp extends AbstractContractDistCpTest { private final ABFSContractTestBinding binding; + @Override + protected int getTestTimeoutMillis() { + return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + } + public ITestAbfsFileSystemContractDistCp() throws Exception { binding = new ABFSContractTestBinding(); Assume.assumeTrue(binding.getAuthType() != AuthType.OAuth); @@ -38,6 +46,7 @@ public ITestAbfsFileSystemContractDistCp() throws Exception { public void setup() throws Exception { binding.setup(); super.setup(); + assumeScaleTestsEnabled(binding.getRawConfiguration()); } @Override diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java deleted file mode 100644 index fa77c2e649ce1..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azurebfs.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; - -/** - * Contract test for secure distCP operation. - */ -public class ITestAbfsFileSystemContractSecureDistCp extends AbstractContractDistCpTest { - private final ABFSContractTestBinding binding; - - public ITestAbfsFileSystemContractSecureDistCp() throws Exception { - binding = new ABFSContractTestBinding(); - } - - @Override - public void setup() throws Exception { - binding.setup(); - super.setup(); - } - - @Override - protected Configuration createConfiguration() { - return binding.getRawConfiguration(); - } - - @Override - protected AbfsFileSystemContract createContract(Configuration conf) { - return new AbfsFileSystemContract(conf, true); - } -} diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java index 159338f0d441f..04aeea665c244 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -18,7 +18,9 @@ package org.apache.hadoop.tools.contract; +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID; import java.io.IOException; @@ -77,6 +79,22 @@ public abstract class AbstractContractDistCpTest protected static final int MB = 1024 * 1024; + /** + * Default depth for a directory tree: {@value}. + */ + protected static final int DEFAULT_DEPTH = 3; + + /** + * Default width for a directory tree: {@value}. + * Total dir size is + *
+   *   DEFAULT_WITH^DEFAULT_DEPTH
+   * 
+ * So the duration of a test run grows rapidly with this value. + * This has very significant consequences for object storage runs. + */ + protected static final int DEFAULT_WIDTH = 2; + @Rule public TestName testName = new TestName(); @@ -154,13 +172,20 @@ public void setup() throws Exception { localDir = localFS.makeQualified(new Path(new Path( GenericTestUtils.getTestDir().toURI()), testSubDir + "/local")); + localFS.delete(localDir, true); mkdirs(localFS, localDir); - remoteDir = path(testSubDir + "/remote"); - mkdirs(remoteFS, remoteDir); + Path testSubPath = path(testSubDir); + remoteDir = new Path(testSubPath, "remote"); // test teardown does this, but IDE-based test debugging can skip // that teardown; this guarantees the initial state is clean remoteFS.delete(remoteDir, true); - localFS.delete(localDir, true); + } + + @Override + public void teardown() throws Exception { + // if remote FS supports IOStatistics log it. + logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, getRemoteFS()); + super.teardown(); } /** @@ -325,6 +350,7 @@ private Job distCpUpdate(final Path srcDir, final Path destDir) .withDeleteMissing(true) .withSyncFolder(true) .withCRC(true) + .withDirectWrite(shouldUseDirectWrite()) .withOverwrite(false))); } @@ -378,6 +404,7 @@ public void testTrackDeepDirectoryStructureToRemote() throws Exception { inputDirUnderOutputDir) .withTrackMissing(trackDir) .withSyncFolder(true) + .withDirectWrite(shouldUseDirectWrite()) .withOverwrite(false))); lsR("tracked udpate", remoteFS, destDir); @@ -476,7 +503,7 @@ public void testSetJobId() throws Exception { remoteFS.create(new Path(remoteDir, "file1")).close(); DistCpTestUtils .assertRunDistCp(DistCpConstants.SUCCESS, remoteDir.toString(), - localDir.toString(), null, conf); + localDir.toString(), getDefaultCLIOptionsOrNull(), conf); assertNotNull("DistCp job id isn't set", conf.get(CONF_LABEL_DISTCP_JOB_ID)); } @@ -532,13 +559,15 @@ private Path distCpDeepDirectoryStructure(FileSystem srcFS, */ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstDir) throws Exception { + int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, + getDefaultDistCPSizeKb()); + if (fileSizeKb < 1) { + skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " is zero"); + } initPathFields(srcDir, dstDir); Path largeFile1 = new Path(inputDir, "file1"); Path largeFile2 = new Path(inputDir, "file2"); Path largeFile3 = new Path(inputDir, "file3"); - mkdirs(srcFS, inputDir); - int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, - DEFAULT_DISTCP_SIZE_KB); int fileSizeMb = fileSizeKb / 1024; getLogger().info("{} with file size {}", testName.getMethodName(), fileSizeMb); byte[] data1 = dataset((fileSizeMb + 1) * MB, 33, 43); @@ -549,22 +578,37 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, createFile(srcFS, largeFile3, true, data3); Path target = new Path(dstDir, "outputDir"); runDistCp(inputDir, target); - ContractTestUtils.assertIsDirectory(dstFS, target); verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1); verifyFileContents(dstFS, new Path(target, "inputDir/file2"), data2); verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3); } + /** + * Override point. What is the default distcp size + * for large files if not overridden by + * {@link #SCALE_TEST_DISTCP_FILE_SIZE_KB}. + * If 0 then, unless overridden in the configuration, + * the large file tests will not run. + * @return file size. + */ + protected int getDefaultDistCPSizeKb() { + return DEFAULT_DISTCP_SIZE_KB; + } + /** * Executes DistCp and asserts that the job finished successfully. - * + * The choice of direct/indirect is based on the value of + * {@link #shouldUseDirectWrite()}. * @param src source path * @param dst destination path * @throws Exception if there is a failure */ private void runDistCp(Path src, Path dst) throws Exception { - runDistCp(buildWithStandardOptions( - new DistCpOptions.Builder(Collections.singletonList(src), dst))); + if (shouldUseDirectWrite()) { + runDistCpDirectWrite(src, dst); + } else { + runDistCpWithRename(src, dst); + } } /** @@ -607,6 +651,9 @@ private static void mkdirs(FileSystem fs, Path dir) throws Exception { @Test public void testDirectWrite() throws Exception { describe("copy file from local to remote using direct write option"); + if (shouldUseDirectWrite()) { + skip("not needed as all other tests use the -direct option."); + } directWrite(localFS, localDir, remoteFS, remoteDir, true); } @@ -623,8 +670,6 @@ public void testDistCpWithIterator() throws Exception { Path source = new Path(remoteDir, "src"); Path dest = new Path(localDir, "dest"); dest = localFS.makeQualified(dest); - mkdirs(remoteFS, source); - verifyPathExists(remoteFS, "", source); GenericTestUtils .createFiles(remoteFS, source, getDepth(), getWidth(), getWidth()); @@ -632,8 +677,9 @@ public void testDistCpWithIterator() throws Exception { GenericTestUtils.LogCapturer log = GenericTestUtils.LogCapturer.captureLogs(SimpleCopyListing.LOG); + String options = "-useiterator -update -delete" + getDefaultCLIOptions(); DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest.toString(), "-useiterator -update -delete", conf); + dest.toString(), options, conf); // Check the target listing was also done using iterator. Assertions.assertThat(log.getOutput()).contains( @@ -644,11 +690,11 @@ public void testDistCpWithIterator() throws Exception { } public int getDepth() { - return 3; + return DEFAULT_DEPTH; } public int getWidth() { - return 10; + return DEFAULT_WIDTH; } private int getTotalFiles() { @@ -659,6 +705,41 @@ private int getTotalFiles() { return totalFiles; } + /** + * Override point: should direct write always be used? + * false by default; enable for stores where rename is slow. + * @return true if direct write should be used in all tests. + */ + protected boolean shouldUseDirectWrite() { + return false; + } + + /** + * Return the default options for distcp, including, + * if {@link #shouldUseDirectWrite()} is true, + * the -direct option. + * Append or prepend this to string CLIs. + * @return default options. + */ + protected String getDefaultCLIOptions() { + return shouldUseDirectWrite() + ? " -direct " + : ""; + } + + /** + * Return the default options for distcp, including, + * if {@link #shouldUseDirectWrite()} is true, + * the -direct option, null if there are no + * defaults. + * @return default options. + */ + protected String getDefaultCLIOptionsOrNull() { + return shouldUseDirectWrite() + ? " -direct " + : null; + } + /** * Executes a test with support for using direct write option. * @@ -683,7 +764,7 @@ private void directWrite(FileSystem srcFS, Path srcDir, FileSystem dstFS, if (directWrite) { runDistCpDirectWrite(inputDir, target); } else { - runDistCp(inputDir, target); + runDistCpWithRename(inputDir, target); } ContractTestUtils.assertIsDirectory(dstFS, target); lsR("Destination tree after distcp", dstFS, target); @@ -709,6 +790,21 @@ private Job runDistCpDirectWrite(final Path srcDir, final Path destDir) Collections.singletonList(srcDir), destDir) .withDirectWrite(true))); } + /** + * Run distcp srcDir destDir. + * @param srcDir local source directory + * @param destDir remote destination directory + * @return the completed job + * @throws Exception any failure. + */ + private Job runDistCpWithRename(Path srcDir, final Path destDir) + throws Exception { + describe("\nDistcp from " + srcDir + " to " + destDir); + return runDistCp(buildWithStandardOptions( + new DistCpOptions.Builder( + Collections.singletonList(srcDir), destDir) + .withDirectWrite(false))); + } @Test public void testDistCpWithFile() throws Exception { @@ -718,7 +814,6 @@ public void testDistCpWithFile() throws Exception { Path dest = new Path(localDir, "file"); dest = localFS.makeQualified(dest); - mkdirs(remoteFS, remoteDir); mkdirs(localFS, localDir); int len = 4; @@ -729,7 +824,7 @@ public void testDistCpWithFile() throws Exception { verifyPathExists(localFS, "", localDir); DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest.toString(), null, conf); + dest.toString(), getDefaultCLIOptionsOrNull(), conf); Assertions .assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) @@ -739,15 +834,12 @@ public void testDistCpWithFile() throws Exception { @Test public void testDistCpWithUpdateExistFile() throws Exception { - describe("Now update an exist file."); + describe("Now update an existing file."); Path source = new Path(remoteDir, "file"); Path dest = new Path(localDir, "file"); dest = localFS.makeQualified(dest); - mkdirs(remoteFS, remoteDir); - mkdirs(localFS, localDir); - int len = 4; int base = 0x40; byte[] block = dataset(len, base, base + len); @@ -758,7 +850,7 @@ public void testDistCpWithUpdateExistFile() throws Exception { verifyPathExists(remoteFS, "", source); verifyPathExists(localFS, "", dest); DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest.toString(), "-delete -update", conf); + dest.toString(), "-delete -update" + getDefaultCLIOptions(), conf); Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) .hasSize(1); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/OptionalTestHDFSContractDistCp.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java similarity index 81% rename from hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/OptionalTestHDFSContractDistCp.java rename to hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java index d8c74240794be..61a16b1e816fd 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/OptionalTestHDFSContractDistCp.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/TestHDFSContractDistCp.java @@ -30,8 +30,9 @@ * Verifies that the HDFS passes all the tests in * {@link AbstractContractDistCpTest}. * As such, it acts as an in-module validation of this contract test itself. + * It does skip the large file test cases for speed. */ -public class OptionalTestHDFSContractDistCp extends AbstractContractDistCpTest { +public class TestHDFSContractDistCp extends AbstractContractDistCpTest { @BeforeClass public static void createCluster() throws IOException { @@ -47,4 +48,14 @@ public static void teardownCluster() throws IOException { protected AbstractFSContract createContract(Configuration conf) { return new HDFSContract(conf); } + + /** + * Turn off the large file tests as they are very slow and there + * are many other distcp to HDFS tests which verify such things. + * @return 0 + */ + @Override + protected int getDefaultDistCPSizeKb() { + return 0; + } }