-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HADOOP-19256 Integrate PutIfNotExist functionality into S3A createFile() #7011
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
19cc916
b5637c0
1f78fdc
a36fa15
192b1f6
0eddecb
6f4af93
d66dc79
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -79,6 +79,7 @@ | |
| import static org.apache.hadoop.fs.s3a.Statistic.*; | ||
| import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.CONTENT_TYPE_OCTET_STREAM; | ||
| import static org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent.*; | ||
| import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.IF_NONE_MATCH; | ||
| import static org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext.EMPTY_BLOCK_OUTPUT_STREAM_STATISTICS; | ||
| import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; | ||
| import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation; | ||
|
|
@@ -141,7 +142,11 @@ class S3ABlockOutputStream extends OutputStream implements | |
| private static final String E_NOT_SYNCABLE = | ||
| "S3A streams are not Syncable. See HADOOP-17597."; | ||
|
|
||
| public static final String IF_NONE_MATCH_HEADER = "If-None-Match"; | ||
| /** | ||
| * How long to wait for uploads to complete after being cancelled before | ||
| * the blocks themselves are closed: 15 seconds. | ||
| */ | ||
| private static final Duration TIME_TO_AWAIT_CANCEL_COMPLETION = Duration.ofSeconds(15); | ||
|
|
||
| /** Object being uploaded. */ | ||
| private final String key; | ||
|
|
@@ -686,7 +691,7 @@ private long putObject() throws IOException { | |
| final S3ADataBlocks.DataBlock block = getActiveBlock(); | ||
| final long size = block.dataSize(); | ||
| final S3ADataBlocks.BlockUploadData uploadData = block.startUpload(); | ||
| PutObjectRequest putObjectRequest = uploadData.hasFile() ? | ||
| PutObjectRequest putObjectRequest = | ||
| writeOperationHelper.createPutObjectRequest( | ||
| key, | ||
| uploadData.getSize(), | ||
|
|
@@ -696,32 +701,16 @@ private long putObject() throws IOException { | |
| PutObjectRequest.Builder maybeModifiedPutIfAbsentRequest = putObjectRequest.toBuilder(); | ||
| Map<String, String> optionHeaders = builder.putOptions.getHeaders(); | ||
|
|
||
| if (optionHeaders != null && optionHeaders.containsKey(IF_NONE_MATCH_HEADER)) { | ||
| if (optionHeaders != null && optionHeaders.containsKey(IF_NONE_MATCH)) { | ||
| maybeModifiedPutIfAbsentRequest.overrideConfiguration( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as mentioned below as well, think we should upgrade SDK and then use the new .ifNoneMatch(). Also I would recommend you move all of this logic into a new private method in this class.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually, I would move this logic to RequestFactoryImpl. But you will need to add in a flag to the method signature, "isConditonalPutEnabled", and only add the if-none-match header in when sConditonalPutEnabled is true. Basically, you only want to add |
||
| override -> override.putHeader(IF_NONE_MATCH_HEADER, optionHeaders.get(IF_NONE_MATCH_HEADER))); | ||
| override -> override.putHeader(IF_NONE_MATCH, optionHeaders.get(IF_NONE_MATCH))); | ||
| } | ||
|
|
||
| final PutObjectRequest finalizedRequest = maybeModifiedPutIfAbsentRequest.build(); | ||
|
|
||
| BlockUploadProgress progressCallback = | ||
| new BlockUploadProgress(block, progressListener, now()); | ||
| statistics.blockUploadQueued(size); | ||
| ListenableFuture<PutObjectResponse> putObjectResult = | ||
| executorService.submit(() -> { | ||
| try { | ||
| // the putObject call automatically closes the input | ||
| // stream afterwards. | ||
| PutObjectResponse response = | ||
| writeOperationHelper.putObject(finalizedRequest, builder.putOptions, uploadData, | ||
| uploadData.hasFile(), statistics); | ||
| progressCallback.progressChanged(REQUEST_BYTE_TRANSFER_EVENT); | ||
| return response; | ||
| } finally { | ||
| cleanupWithLogger(LOG, uploadData, block); | ||
| } | ||
| }); | ||
| clearActiveBlock(); | ||
| //wait for completion | ||
| try { | ||
| progressCallback.progressChanged(PUT_STARTED_EVENT); | ||
| // the putObject call automatically closes the upload data | ||
|
|
@@ -1411,6 +1400,11 @@ public static final class BlockOutputStreamBuilder { | |
| */ | ||
| private boolean isMultipartUploadEnabled; | ||
|
|
||
| /** | ||
| * Is conditional create enables. | ||
| */ | ||
| private boolean isConditionalEnabled; | ||
diljotgrewal marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| private BlockOutputStreamBuilder() { | ||
| } | ||
|
|
||
|
|
@@ -1572,5 +1566,11 @@ public BlockOutputStreamBuilder withMultipartEnabled( | |
| isMultipartUploadEnabled = value; | ||
| return this; | ||
| } | ||
|
|
||
| public BlockOutputStreamBuilder withConditionalEnabled( | ||
| final boolean value){ | ||
| isConditionalEnabled = value; | ||
| return this; | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,34 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.fs.s3a.impl; | ||
|
|
||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.hadoop.fs.FSDataOutputStream; | ||
| import org.apache.hadoop.fs.FSDataOutputStreamBuilder; | ||
| import org.apache.hadoop.fs.FileSystem; | ||
| import org.apache.hadoop.fs.Path; | ||
| import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; | ||
| import org.apache.hadoop.fs.s3a.performance.AbstractS3ACostTest; | ||
| import org.apache.hadoop.fs.s3a.RemoteFileChangedException; | ||
| import org.apache.hadoop.fs.s3a.S3ATestUtils; | ||
| import org.apache.hadoop.io.IOUtils; | ||
|
|
||
| import org.assertj.core.api.Assertions; | ||
| import org.junit.Assert; | ||
| import org.junit.Test; | ||
| import software.amazon.awssdk.services.s3.model.S3Exception; | ||
|
|
@@ -18,20 +37,23 @@ | |
| import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; | ||
| import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER; | ||
| import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_ARRAY; | ||
| import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_IF_NONE_MATCH; | ||
| import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CONDITIONAL_FILE_CREATE; | ||
| import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD; | ||
| import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE; | ||
| import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; | ||
| import static org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfNotEnabled; | ||
| import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; | ||
| import static org.apache.hadoop.fs.s3a.impl.InternalConstants.UPLOAD_PART_COUNT_LIMIT; | ||
| import static org.apache.hadoop.fs.s3a.scale.ITestS3AMultipartUploadSizeLimits.MPU_SIZE; | ||
| import static org.apache.hadoop.fs.s3a.scale.S3AScaleTestBase._1MB; | ||
|
|
||
|
|
||
| public class ITestS3APutIfMatch extends AbstractS3ATestBase { | ||
| public class ITestS3APutIfMatch extends AbstractS3ACostTest { | ||
|
|
||
| private Configuration conf; | ||
|
|
||
| @Override | ||
| protected Configuration createConfiguration() { | ||
| public Configuration createConfiguration() { | ||
| Configuration conf = super.createConfiguration(); | ||
| S3ATestUtils.disableFilesystemCaching(conf); | ||
| removeBaseAndBucketOverrides(conf, | ||
diljotgrewal marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
@@ -45,6 +67,14 @@ protected Configuration createConfiguration() { | |
| return conf; | ||
| } | ||
|
|
||
| @Override | ||
| public void setup() throws Exception { | ||
| super.setup(); | ||
| conf = createConfiguration(); | ||
| skipIfNotEnabled(conf, FS_S3A_CONDITIONAL_FILE_CREATE, | ||
| "Skipping IfNoneMatch tests"); | ||
| } | ||
|
|
||
| protected String getBlockOutputBufferName() { | ||
| return FAST_UPLOAD_BUFFER_ARRAY; | ||
| } | ||
|
|
@@ -61,7 +91,7 @@ private static void createFileWithIfNoneMatchFlag(FileSystem fs, | |
| byte[] data, | ||
| String ifMatchTag) throws Exception { | ||
| FSDataOutputStreamBuilder builder = fs.createFile(path); | ||
| builder.must(FS_S3A_CREATE_IF_NONE_MATCH, ifMatchTag); | ||
| builder.must(FS_S3A_CONDITIONAL_FILE_CREATE, ifMatchTag); | ||
| FSDataOutputStream stream = builder.create().build(); | ||
| if (data != null && data.length > 0) { | ||
| stream.write(data); | ||
|
|
@@ -85,7 +115,7 @@ public void testPutIfAbsentConflict() throws IOException { | |
| Assert.assertEquals(RemoteFileChangedException.class, e.getClass()); | ||
diljotgrewal marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| S3Exception s3Exception = (S3Exception) e.getCause(); | ||
|
||
| Assert.assertEquals(s3Exception.statusCode(), 412); | ||
| Assertions.assertThat(s3Exception.statusCode()).isEqualTo(412); | ||
diljotgrewal marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
|
|
@@ -95,7 +125,6 @@ public void testPutIfAbsentLargeFileConflict() throws IOException { | |
| FileSystem fs = getFileSystem(); | ||
| Path testFile = methodPath(); | ||
|
|
||
| fs.mkdirs(testFile.getParent()); | ||
| // enough bytes for Multipart Upload | ||
| byte[] fileBytes = dataset(6 * _1MB, 'a', 'z' - 'a'); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are ways to do this with small files, this matters because a 6MB file is large enough that it'd have to become a scale test. Yes, it is a scale test when people are testing across the planet. Do not worry about this right now, but be aware before merging into trunk the large file test will have to be moved to a subset of S3AScaleTestBase, and this test replaced with something using a write to a magic path –or even better, we add another new create file option to force multipart uploads always. Designing for that makes me think that a followup to this should move to an enumset of CreateFile flags |
||
|
|
||
|
|
@@ -107,7 +136,7 @@ public void testPutIfAbsentLargeFileConflict() throws IOException { | |
|
|
||
| // Error gets caught here: | ||
| S3Exception s3Exception = (S3Exception) e.getCause(); | ||
| Assert.assertEquals(s3Exception.statusCode(), 412); | ||
| Assertions.assertThat(s3Exception.statusCode()).isEqualTo(412); | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.