Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ private DistCpConstants() {
public static final String CONF_LABEL_SPLIT_RATIO =
"distcp.dynamic.split.ratio";
public static final String CONF_LABEL_DIRECT_WRITE = "distcp.direct.write";
public static final String CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES =
"distcp.update.root.directory.attributes";

/* Total bytes to be copied. Updated by copylisting. Unfiltered count */
public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ public boolean shouldUseIterator() {
return options.shouldUseIterator();
}

public boolean shouldUpdateRootDirectoryAttributes() {
return options.shouldUpdateRootDirectoryAttributes();
}

public final boolean splitLargeFile() {
return options.getBlocksPerChunk() > 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,11 @@ public enum DistCpOptionSwitch {
USE_ITERATOR(DistCpConstants.CONF_LABEL_USE_ITERATOR,
new Option("useiterator", false,
"Use single threaded list status iterator to build "
+ "the listing to save the memory utilisation at the client"));
+ "the listing to save the memory utilisation at the client")),

UPDATE_ROOT_DIRECTORY_ATTRIBUTES(DistCpConstants.CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES,
new Option("updateRootDirectoryAttributes", false,
"Update root directory attributes (eg permissions, ownership ...)"));

public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct";
private final String confLabel;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ public final class DistCpOptions {

private final boolean useIterator;

private final boolean updateRootDirectoryAttributes;

/**
* File attributes for preserve.
*
Expand Down Expand Up @@ -228,6 +230,8 @@ private DistCpOptions(Builder builder) {
this.directWrite = builder.directWrite;

this.useIterator = builder.useIterator;

this.updateRootDirectoryAttributes = builder.updateRootDirectoryAttributes;
}

public Path getSourceFileListing() {
Expand Down Expand Up @@ -374,6 +378,10 @@ public boolean shouldUseIterator() {
return useIterator;
}

public boolean shouldUpdateRootDirectoryAttributes() {
return updateRootDirectoryAttributes;
}

/**
* Add options to configuration. These will be used in the Mapper/committer
*
Expand Down Expand Up @@ -427,6 +435,9 @@ public void appendToConf(Configuration conf) {

DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.USE_ITERATOR,
String.valueOf(useIterator));

DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.UPDATE_ROOT_DIRECTORY_ATTRIBUTES,
String.valueOf(updateRootDirectoryAttributes));
}

/**
Expand Down Expand Up @@ -465,6 +476,7 @@ public String toString() {
", verboseLog=" + verboseLog +
", directWrite=" + directWrite +
", useiterator=" + useIterator +
", updateRootDirectoryAttributes=" + updateRootDirectoryAttributes +
'}';
}

Expand Down Expand Up @@ -518,6 +530,8 @@ public static class Builder {

private boolean useIterator = false;

private boolean updateRootDirectoryAttributes = false;

public Builder(List<Path> sourcePaths, Path targetPath) {
Preconditions.checkArgument(sourcePaths != null && !sourcePaths.isEmpty(),
"Source paths should not be null or empty!");
Expand Down Expand Up @@ -780,6 +794,11 @@ public Builder withUseIterator(boolean useItr) {
this.useIterator = useItr;
return this;
}

public Builder withUpdateRootDirectoryAttributes(boolean updateRootDirectoryAttrs) {
this.updateRootDirectoryAttributes = updateRootDirectoryAttrs;
return this;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ public static DistCpOptions parse(String[] args)
.withDirectWrite(
command.hasOption(DistCpOptionSwitch.DIRECT_WRITE.getSwitch()))
.withUseIterator(
command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch()));
command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch()))
.withUpdateRootDirectoryAttributes(
command.hasOption(DistCpOptionSwitch.UPDATE_ROOT_DIRECTORY_ATTRIBUTES.getSwitch()));

if (command.hasOption(DistCpOptionSwitch.DIFF.getSwitch())) {
String[] snapshots = getVals(command,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -616,10 +616,13 @@ private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
DistCpContext context) throws IOException {
boolean syncOrOverwrite = context.shouldSyncFolder() ||
context.shouldOverwrite();
boolean skipRootPath = syncOrOverwrite &&
!context.shouldUpdateRootDirectoryAttributes();
for (CopyListingFileStatus fs : fileStatus) {
if (fs.getPath().equals(sourcePathRoot) &&
fs.isDirectory() && syncOrOverwrite) {
// Skip the root-paths when syncOrOverwrite
fs.isDirectory() && skipRootPath) {
// Skip the root-paths when skipRootPath (syncOrOverwrite and
// update root directory is not a must).
LOG.debug("Skip {}", fs.getPath());
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public class CopyCommitter extends FileOutputCommitter {
private boolean ignoreFailures = false;
private boolean skipCrc = false;
private int blocksPerChunk = 0;
private boolean updateRootDirectoryAttributes = false;

/**
* Create a output committer
Expand All @@ -100,6 +101,8 @@ public void commitJob(JobContext jobContext) throws IOException {
Configuration conf = jobContext.getConfiguration();
syncFolder = conf.getBoolean(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, false);
overwrite = conf.getBoolean(DistCpConstants.CONF_LABEL_OVERWRITE, false);
updateRootDirectoryAttributes =
conf.getBoolean(CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES, false);
targetPathExists = conf.getBoolean(
DistCpConstants.CONF_LABEL_TARGET_PATH_EXISTS, true);
ignoreFailures = conf.getBoolean(
Expand Down Expand Up @@ -336,9 +339,13 @@ private void preserveFileAttributesForDirectories(Configuration conf)

Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
//
// Skip the root folder when syncOrOverwrite is true.
// Skip the root folder when skipRootDirectoryAttributes is true.
//
if (targetRoot.equals(targetFile) && syncOrOverwrite) continue;
boolean skipRootDirectoryAttributes =
syncOrOverwrite && !updateRootDirectoryAttributes;
if (targetRoot.equals(targetFile) && skipRootDirectoryAttributes) {
continue;
}

FileSystem targetFS = targetFile.getFileSystem(conf);
DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes,
Expand Down
1 change: 1 addition & 0 deletions hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ Command Line Options
| `-xtrack <path>` | Save information about missing source files to the specified path. | This option is only valid with `-update` option. This is an experimental property and it cannot be used with `-atomic` option. |
| `-direct` | Write directly to destination paths | Useful for avoiding potentially very expensive temporary file rename operations when the destination is an object store |
| `-useiterator` | Uses single threaded listStatusIterator to build listing | Useful for saving memory at the client side. Using this option will ignore the numListstatusThreads option |
| `-updateRootDirectoryAttributes` | Update root directory attributes (eg permissions, ownership ...) | Useful if you need to enforce root directory attributes update when using distcp |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you replace updateRoot instead of updateRootDirectoryAttributes or give another short name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, since we have no other suggestions I will go with updateRoot.

Copy link
Contributor Author

@mohan3d mohan3d Feb 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ferhui should I re-work the internal code so everything is updateRoot instead of updateRootDirectoryAttributes or only the enduser distcp tool?

Better to be consistent and use updateRoot everywhere.


Architecture of DistCp
----------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public void testToString() {
"atomicWorkPath=null, logPath=null, sourceFileListing=abc, " +
"sourcePaths=null, targetPath=xyz, filtersFile='null', " +
"blocksPerChunk=0, copyBufferSize=8192, verboseLog=false, " +
"directWrite=false, useiterator=false}";
"directWrite=false, useiterator=false, updateRootDirectoryAttributes=false}";
String optionString = option.toString();
Assert.assertEquals(val, optionString);
Assert.assertNotSame(DistCpOptionSwitch.ATOMIC_COMMIT.toString(),
Expand Down Expand Up @@ -563,4 +563,15 @@ public void testAppendToConf() {
"otherwise it may not be fetched properly",
expectedValForEmptyConfigKey, config.get(""));
}

@Test
public void testUpdateRootDirectoryAttributes() {
final DistCpOptions options = new DistCpOptions.Builder(
Collections.singletonList(
new Path("hdfs://localhost:8020/source")),
new Path("hdfs://localhost:8020/target/"))
.withUpdateRootDirectoryAttributes(true)
.build();
Assert.assertTrue(options.shouldUpdateRootDirectoryAttributes());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static org.apache.hadoop.test.GenericTestUtils.getMethodName;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayOutputStream;
Expand All @@ -44,6 +45,8 @@
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.tools.util.DistCpTestUtils;
import org.apache.hadoop.util.ToolRunner;
import org.junit.AfterClass;
import org.junit.Assert;
Expand Down Expand Up @@ -551,4 +554,41 @@ public void testSourceRoot() throws Exception {
String[] args2 = new String[]{rootStr, tgtStr2};
Assert.assertThat(ToolRunner.run(conf, new DistCp(), args2), is(0));
}

@Test
public void testUpdateRootDirectoryAttributes() throws Exception {
FileSystem fs = cluster.getFileSystem();

Path source = new Path("/src");
Path dest1 = new Path("/dest1");
Path dest2 = new Path("/dest2");

fs.delete(source, true);
fs.delete(dest1, true);
fs.delete(dest2, true);

// Create a source dir
fs.mkdirs(source);
fs.setOwner(source, "userA", "groupA");
fs.setTimes(source, new Random().nextLong(), new Random().nextLong());

GenericTestUtils.createFiles(fs, source, 3, 5, 5);

// should not preserve attrs
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
dest1.toString(), "-p -update", conf);

FileStatus srcStatus = fs.getFileStatus(source);
FileStatus destStatus1 = fs.getFileStatus(dest1);
assertNotEquals(destStatus1.getOwner(), srcStatus.getOwner());
assertNotEquals(destStatus1.getModificationTime(), srcStatus.getModificationTime());

// should preserve attrs
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
dest2.toString(), "-p -update -updateRootDirectoryAttributes", conf);

FileStatus destStatus2 = fs.getFileStatus(dest2);
assertEquals(destStatus2.getOwner(), srcStatus.getOwner());
assertEquals(destStatus2.getModificationTime(), srcStatus.getModificationTime());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -804,4 +804,18 @@ public void testExclusionsOption() {
"hdfs://localhost:8020/target/"});
assertThat(options.getFiltersFile()).isEqualTo("/tmp/filters.txt");
}

@Test
public void testParseUpdateRootDirectoryAttributes() {
DistCpOptions options = OptionsParser.parse(new String[] {
"hdfs://localhost:8020/source/first",
"hdfs://localhost:8020/target/"});
Assert.assertFalse(options.shouldUpdateRootDirectoryAttributes());

options = OptionsParser.parse(new String[] {
"-updateRootDirectoryAttributes",
"hdfs://localhost:8020/source/first",
"hdfs://localhost:8020/target/"});
Assert.assertTrue(options.shouldUpdateRootDirectoryAttributes());
}
}