Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import java.io.IOException;
import java.util.Objects;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
Expand All @@ -32,6 +35,7 @@
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.output.committer.manifest.impl.InternalConstants;
import org.apache.hadoop.mapreduce.lib.output.committer.manifest.impl.ManifestCommitterSupport;
import org.apache.hadoop.mapreduce.lib.output.committer.manifest.stages.AbstractJobOrTaskStage;
import org.apache.hadoop.mapreduce.lib.output.committer.manifest.stages.StageConfig;
import org.apache.hadoop.mapreduce.lib.output.committer.manifest.stages.StageEventCallbacks;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
Expand All @@ -51,6 +55,9 @@
*/
public final class ManifestCommitterConfig implements IOStatisticsSource {

private static final Logger LOG = LoggerFactory.getLogger(
ManifestCommitterConfig.class);

/**
* Final destination of work.
* This is <i>unqualified</i>.
Expand Down Expand Up @@ -153,6 +160,12 @@ public final class ManifestCommitterConfig implements IOStatisticsSource {
*/
private final int writerQueueCapacity;

/**
* How many attempts to save a task manifest by save and rename
* before giving up.
*/
private final int saveManifestAttempts;

/**
* Constructor.
* @param outputPath destination path of the job.
Expand Down Expand Up @@ -198,6 +211,14 @@ public final class ManifestCommitterConfig implements IOStatisticsSource {
this.writerQueueCapacity = conf.getInt(
OPT_WRITER_QUEUE_CAPACITY,
DEFAULT_WRITER_QUEUE_CAPACITY);
int attempts = conf.getInt(OPT_MANIFEST_SAVE_ATTEMPTS,
OPT_MANIFEST_SAVE_ATTEMPTS_DEFAULT);
if (attempts < 1) {
LOG.warn("Invalid value for {}: {}",
OPT_MANIFEST_SAVE_ATTEMPTS, attempts);
attempts = 1;
}
this.saveManifestAttempts = attempts;

// if constructed with a task attempt, build the task ID and path.
if (context instanceof TaskAttemptContext) {
Expand Down Expand Up @@ -332,6 +353,10 @@ public String getName() {
return name;
}

public int getSaveManifestAttempts() {
return saveManifestAttempts;
}

/**
* Get writer queue capacity.
* @return the queue capacity
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ public final class ManifestCommitterConstants {
* Should dir cleanup do parallel deletion of task attempt dirs
* before trying to delete the toplevel dirs.
* For GCS this may deliver speedup, while on ABFS it may avoid
* timeouts in certain deployments.
* timeouts in certain deployments, something
* {@link #OPT_CLEANUP_PARALLEL_DELETE_BASE_FIRST}
* can alleviate.
* Value: {@value}.
*/
public static final String OPT_CLEANUP_PARALLEL_DELETE =
Expand All @@ -143,6 +145,20 @@ public final class ManifestCommitterConstants {
*/
public static final boolean OPT_CLEANUP_PARALLEL_DELETE_DIRS_DEFAULT = true;

/**
* Should parallel cleanup try to delete teh base first?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: the

* Best for azure as it skips the task attempt deletions unless
* the toplevel delete fails.
* Value: {@value}.
*/
public static final String OPT_CLEANUP_PARALLEL_DELETE_BASE_FIRST =
OPT_PREFIX + "cleanup.parallel.delete.base.first";

/**
* Default value of option {@link #OPT_CLEANUP_PARALLEL_DELETE_BASE_FIRST}: {@value}.
*/
public static final boolean OPT_CLEANUP_PARALLEL_DELETE_BASE_FIRST_DEFAULT = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As it is bad for GCS, shouldn't the default be false?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

really don't know here. In the docs I try to cover this


/**
* Threads to use for IO.
*/
Expand Down Expand Up @@ -260,6 +276,19 @@ public final class ManifestCommitterConstants {
*/
public static final int DEFAULT_WRITER_QUEUE_CAPACITY = OPT_IO_PROCESSORS_DEFAULT;

/**
* How many attempts to save a task manifest by save and rename
* before giving up.
* Value: {@value}.
*/
public static final String OPT_MANIFEST_SAVE_ATTEMPTS =
OPT_PREFIX + "manifest.save.attempts";

/**
* Default value of {@link #OPT_MANIFEST_SAVE_ATTEMPTS}: {@value}.
*/
public static final int OPT_MANIFEST_SAVE_ATTEMPTS_DEFAULT = 5;

private ManifestCommitterConstants() {
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ public final class ManifestCommitterStatisticNames {
public static final String OP_STAGE_TASK_SCAN_DIRECTORY
= "task_stage_scan_directory";

/** Delete a directory: {@value}. */
public static final String OP_DELETE_DIR = "op_delete_dir";

private ManifestCommitterStatisticNames() {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ private InternalConstants() {
OP_CREATE_ONE_DIRECTORY,
OP_DIRECTORY_SCAN,
OP_DELETE,
OP_DELETE_DIR,
OP_DELETE_FILE_UNDER_DESTINATION,
OP_GET_FILE_STATUS,
OP_IS_DIRECTORY,
Expand Down Expand Up @@ -127,4 +128,11 @@ private InternalConstants() {
/** Schemas of filesystems we know to not work with this committer. */
public static final Set<String> UNSUPPORTED_FS_SCHEMAS =
ImmutableSet.of("s3a", "wasb");

/**
* Interval in milliseconds between save retries.
* Value {@value} milliseconds.
*/
public static final int SAVE_SLEEP_INTERVAL = 500;

}
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,36 @@ public boolean isFile(Path path) throws IOException {
public abstract boolean delete(Path path, boolean recursive)
throws IOException;

/**
* Forward to {@code delete(Path, true)}
* unless overridden.
* <p>
* If it returns without an error: there is no file at
* the end of the path.
* @param path path
* @return outcome
* @throws IOException failure.
*/
public boolean deleteFile(Path path)
throws IOException {
return delete(path, false);
}

/**
* Acquire the delete capacity then call {@code FileSystem#delete(Path, true)}
* or equivalent.
* <p>
* If it returns without an error: there is nothing at
* the end of the path.
* @param path path
* @return outcome
* @throws IOException failure.
*/
public boolean rmdir(Path path)
throws IOException {
return delete(path, true);
}

/**
* Forward to {@link FileSystem#mkdirs(Path)}.
* Usual "what does 'false' mean" ambiguity.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ public boolean delete(Path path, boolean recursive)
return fileSystem.delete(path, recursive);
}

@Override
public boolean rmdir(final Path path) throws IOException {
return fileSystem.delete(path, true);
}

@Override
public boolean mkdirs(Path path)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import org.apache.hadoop.fs.Path;

import static org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterStatisticNames.OP_DELETE_DIR;
import static org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterStatisticNames.OP_STAGE_TASK_ABORT_TASK;

/**
Expand Down Expand Up @@ -55,7 +56,11 @@ protected Path executeStage(final Boolean suppressExceptions)
final Path dir = getTaskAttemptDir();
if (dir != null) {
LOG.info("{}: Deleting task attempt directory {}", getName(), dir);
deleteDir(dir, suppressExceptions);
if (suppressExceptions) {
deleteDirSuppressingExceptions(dir, OP_DELETE_DIR);
} else {
deleteDir(dir, OP_DELETE_DIR);
}
}
return dir;
}
Expand Down
Loading