From c3d02378fff27afc71a0ff2c660af66d19e619ab Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 28 Oct 2024 17:47:34 +0000 Subject: [PATCH 1/4] HADOOP-19317. S3A: add option to enable/disable 100 CONTINUE Option fs.s3a.connection.expect.continue controls whether or not an HTTP PUT request to the S3 store Sets the Expect: 100-continue header and awaits a 100 CONTINUE response from the server before uploading any data. This allows for throttling and other problems to be detected fast. The default is "true". This has long been the implicit setting; this change simply allows callers to disable it. Change-Id: Ib21d2511db356c1b5781fc6b6531cb9282c3504c --- .../org/apache/hadoop/fs/s3a/Constants.java | 15 +++ .../hadoop/fs/s3a/impl/AWSClientConfig.java | 21 +++- .../markdown/tools/hadoop-aws/connecting.md | 101 +++++++++++++++++- .../contract/s3a/ITestS3AContractCreate.java | 19 +++- .../scale/ITestS3AHugeFilesNoMultipart.java | 6 ++ 5 files changed, 155 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 67854e65720fd..6a4b3585d3016 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -444,6 +444,21 @@ private Constants() { public static final Duration DEFAULT_CONNECTION_IDLE_TIME_DURATION = Duration.ofSeconds(60); + /** + * Should PUT requests await a 100 CONTINUE responses before uploading + * data? + *

+ * Value: {@value}. + */ + public static final String CONNECTION_EXPECT_CONTINUE = + "fs.s3a.connection.expect.continue"; + + /** + * Default value for {@link #CONNECTION_EXPECT_CONTINUE}. + */ + public static final boolean CONNECTION_EXPECT_CONTINUE_DEFAULT = true; + + // socket send buffer to be used in Amazon client public static final String SOCKET_SEND_BUFFER = "fs.s3a.socket.send.buffer"; public static final int DEFAULT_SOCKET_SEND_BUFFER = 8 * 1024; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index afd3ed7ff3315..4ea43e7a66eef 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -45,6 +45,8 @@ import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_ACQUISITION_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; +import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE; +import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_IDLE_TIME; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_KEEPALIVE; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_TTL; @@ -149,6 +151,7 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con .connectionMaxIdleTime(conn.getMaxIdleTime()) .connectionTimeout(conn.getEstablishTimeout()) .connectionTimeToLive(conn.getConnectionTTL()) + .expectContinueEnabled(conn.isExpectContinueEnabled()) .maxConnections(conn.getMaxConnections()) .socketTimeout(conn.getSocketTimeout()) .tcpKeepAlive(conn.isKeepAlive()) @@ -491,7 +494,7 @@ public String toString() { * All the connection settings, wrapped as a class for use by * both the sync and async client. */ - static class ConnectionSettings { + static final class ConnectionSettings { private final int maxConnections; private final boolean keepAlive; private final Duration acquisitionTimeout; @@ -499,6 +502,7 @@ static class ConnectionSettings { private final Duration establishTimeout; private final Duration maxIdleTime; private final Duration socketTimeout; + private final boolean expectContinueEnabled; private ConnectionSettings( final int maxConnections, @@ -507,7 +511,8 @@ private ConnectionSettings( final Duration connectionTTL, final Duration establishTimeout, final Duration maxIdleTime, - final Duration socketTimeout) { + final Duration socketTimeout, + final boolean expectContinueEnabled) { this.maxConnections = maxConnections; this.keepAlive = keepAlive; this.acquisitionTimeout = acquisitionTimeout; @@ -515,6 +520,7 @@ private ConnectionSettings( this.establishTimeout = establishTimeout; this.maxIdleTime = maxIdleTime; this.socketTimeout = socketTimeout; + this.expectContinueEnabled = expectContinueEnabled; } int getMaxConnections() { @@ -545,6 +551,10 @@ Duration getSocketTimeout() { return socketTimeout; } + boolean isExpectContinueEnabled() { + return expectContinueEnabled; + } + @Override public String toString() { return "ConnectionSettings{" + @@ -555,6 +565,7 @@ public String toString() { ", establishTimeout=" + establishTimeout + ", maxIdleTime=" + maxIdleTime + ", socketTimeout=" + socketTimeout + + ", expectContinueEnabled=" + expectContinueEnabled + '}'; } } @@ -615,6 +626,9 @@ static ConnectionSettings createConnectionSettings(Configuration conf) { DEFAULT_SOCKET_TIMEOUT_DURATION, TimeUnit.MILLISECONDS, minimumOperationDuration); + final boolean expectContinueEnabled = conf.getBoolean(CONNECTION_EXPECT_CONTINUE, + CONNECTION_EXPECT_CONTINUE_DEFAULT); + return new ConnectionSettings( maxConnections, keepAlive, @@ -622,7 +636,8 @@ static ConnectionSettings createConnectionSettings(Configuration conf) { connectionTTL, establishTimeout, maxIdleTime, - socketTimeout); + socketTimeout, + expectContinueEnabled); } /** diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index 6fa37750ded8c..5aa48c249eceb 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -150,7 +150,19 @@ If you are working with third party stores, please check [third party stores in See [Timeouts](performance.html#timeouts). -### Low-level Network Options +### Low-level Network/Http Options + +The S3A connector uses [Apache HttpClient](https://hc.apache.org/index.html) to connect to +S3 Stores. +The client is configured to create a pool of HTTP connections with S3, so that once +the initial set of connections have been made they can re-used for followup operations. + +Core aspects of pool settings are +* The pool size is set by `fs.s3a.connection.maximum` -if a process asks for more connections than this + threads will be blocked until they are available. +* The time blocked before an exception is raised is set in `fs.s3a.connection.acquisition.timeout`. +* The time an idle connection will be kept in the pool is set by `fs.s3a.connection.idle.time`. +* The time limit for even a non-idle connection to be kept open is set in `fs.s3a.connection.ttl`. ```xml @@ -163,6 +175,69 @@ See [Timeouts](performance.html#timeouts). + + fs.s3a.connection.acquisition.timeout + 60s + + Time to wait for an HTTP connection from the pool. + Too low: operations fail on a busy process. + When high, it isn't obvious that the connection pool is overloaded, + simply that jobs are slow. + + + + + fs.s3a.connection.request.timeout + 60s + + Total time for a single request to take from the HTTP verb to the + response from the server. + 0 means "no limit" + + + + + fs.s3a.connection.part.upload.timeout + 15m + + Timeout for uploading all of a small object or a single part + of a larger one. + + + + + fs.s3a.connection.ttl + 5m + + Expiration time of an Http connection from the connection pool: + + + + + fs.s3a.connection.idle.time + 60s + + Time for an idle HTTP connection to be kept the HTTP connection + pool before being closed. + Too low: overhead of creating connections. + Too high, risk of stale connections and inability to use the + adaptive load balancing of the S3 front end. + + + + + fs.s3a.connection.expect.continue + true + + Should PUT requests await a 100 CONTINUE responses before uploading + data? + This should normally be left alone unless a third party store which + does not support it is encountered, or file upload over long + distance networks time out. + (see HADOOP-19317 as an example) + + + fs.s3a.connection.ssl.enabled true @@ -485,6 +560,30 @@ If `storediag` doesn't connect to your S3 store, *nothing else will*. Based on the experience of people who field support calls, here are some of the main connectivity issues which cause problems. +### Connection pool overloaded + +If more connections are needed than the HTTP connection pool has, +then worker threads will block until one is enabled. + +If the wait exceeds the time set in `fs.s3a.connection.acquisition.timeout`, +the operation will fail with `"Timeout waiting for connection from pool`. + +This may be retried, but time has been lost, which results in slower operations. +If queries suddenly gets slower as the number of active operations increase, +This is a possible cause. + +Fixes: + +Increase the value of `fs.s3a.connection.maximum`. +This is the general fix on query engines (Hive, Spark) which do not keep files open past the duration of a single task. + +Applications which keep files on, or simply open many files without closing them, +may be leaking http connections due to keeping too many files open. +This can only be fixed at a code level +* Applications must call `close()` on an input stream when the contents of the file are longer needed. +* If an input stream needs to be kept around for reuse, call `unbuffer()` on it. + This will free up the connection until another read operation is invoked. + ### Inconsistent configuration across a cluster All hosts in the cluster need to have the configuration secrets; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java index a6590e99e6caf..000caf328837e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java @@ -29,6 +29,8 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.s3a.S3ATestUtils; +import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; /** @@ -47,8 +49,8 @@ public class ITestS3AContractCreate extends AbstractContractCreateTest { @Parameterized.Parameters public static Collection params() { return Arrays.asList(new Object[][]{ - {false}, - {true} + {false, false}, + {true, true} }); } @@ -57,8 +59,15 @@ public static Collection params() { */ private final boolean createPerformance; - public ITestS3AContractCreate(final boolean createPerformance) { + /** + * Expect a 100-continue response? + */ + private final boolean expectContinue; + + public ITestS3AContractCreate(final boolean createPerformance, + final boolean expectContinue) { this.createPerformance = createPerformance; + this.expectContinue = expectContinue; } @Override @@ -71,6 +80,10 @@ protected Configuration createConfiguration() { final Configuration conf = setPerformanceFlags( super.createConfiguration(), createPerformance ? "create" : ""); + removeBaseAndBucketOverrides( + conf, + CONNECTION_EXPECT_CONTINUE); + conf.setBoolean(CONNECTION_EXPECT_CONTINUE, expectContinue); S3ATestUtils.disableFilesystemCaching(conf); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java index aa702f158e369..3ee8bdbf128e7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesNoMultipart.java @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.s3a.Constants; import static org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_EXPECT_CONTINUE; import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD; import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE; import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; @@ -69,18 +70,23 @@ private boolean isMultipartCopyEnabled() { * Create a configuration without multipart upload, * and a long request timeout to allow for a very slow * PUT in close. + *

+ * 100-continue is disabled so as to verify the behavior + * on a large PUT. * @return the configuration to create the test FS with. */ @Override protected Configuration createScaleConfiguration() { Configuration conf = super.createScaleConfiguration(); removeBaseAndBucketOverrides(conf, + CONNECTION_EXPECT_CONTINUE, IO_CHUNK_BUFFER_SIZE, MIN_MULTIPART_THRESHOLD, MULTIPART_UPLOADS_ENABLED, MULTIPART_SIZE, PART_UPLOAD_TIMEOUT, REQUEST_TIMEOUT); + conf.setBoolean(CONNECTION_EXPECT_CONTINUE, false); conf.setInt(IO_CHUNK_BUFFER_SIZE, 655360); conf.setInt(MIN_MULTIPART_THRESHOLD, MULTIPART_MIN_SIZE); conf.setInt(MULTIPART_SIZE, MULTIPART_MIN_SIZE); From 4cac2f561c1cabd353a32778560221fda3f7fcdd Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 8 Nov 2024 16:50:42 +0000 Subject: [PATCH 2/4] HADOOP-19317. doc update. Going to pull some of this into the other PR Change-Id: I2c54a74072b1cfaae7c8f85c031c731a0c3792ed --- .../org/apache/hadoop/fs/s3a/Constants.java | 1 - .../markdown/tools/hadoop-aws/connecting.md | 30 ++++++++++++------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 6a4b3585d3016..4d329540dde72 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -458,7 +458,6 @@ private Constants() { */ public static final boolean CONNECTION_EXPECT_CONTINUE_DEFAULT = true; - // socket send buffer to be used in Amazon client public static final String SOCKET_SEND_BUFFER = "fs.s3a.socket.send.buffer"; public static final int DEFAULT_SOCKET_SEND_BUFFER = 8 * 1024; diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index 5aa48c249eceb..3e513439ea264 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -157,8 +157,8 @@ S3 Stores. The client is configured to create a pool of HTTP connections with S3, so that once the initial set of connections have been made they can re-used for followup operations. -Core aspects of pool settings are -* The pool size is set by `fs.s3a.connection.maximum` -if a process asks for more connections than this +Core aspects of pool settings are: +* The pool size is set by `fs.s3a.connection.maximum` -if a process asks for more connections than this then threads will be blocked until they are available. * The time blocked before an exception is raised is set in `fs.s3a.connection.acquisition.timeout`. * The time an idle connection will be kept in the pool is set by `fs.s3a.connection.idle.time`. @@ -563,26 +563,34 @@ some of the main connectivity issues which cause problems. ### Connection pool overloaded If more connections are needed than the HTTP connection pool has, -then worker threads will block until one is enabled. +then worker threads will block until one is freed. If the wait exceeds the time set in `fs.s3a.connection.acquisition.timeout`, the operation will fail with `"Timeout waiting for connection from pool`. This may be retried, but time has been lost, which results in slower operations. If queries suddenly gets slower as the number of active operations increase, -This is a possible cause. +then this is a possible cause. Fixes: Increase the value of `fs.s3a.connection.maximum`. -This is the general fix on query engines (Hive, Spark) which do not keep files open past the duration of a single task. - -Applications which keep files on, or simply open many files without closing them, -may be leaking http connections due to keeping too many files open. +This is the general fix on query engines such as Apache Spark, and Apache Impala +which run many workers threads simultaneously, and do not keep files open past +the duration of a single task within a larger query. + +It can also surface with applications which deliberately keep files open +for extended periods. +These should ideally call `unbuffer()` on the input streams. +This will free up the connection until another read operation is invoked -yet +still re-open faster than if `open(Path)` were invoked. + +Applications may also be "leaking" http connections by failing to +close() them. This can only be fixed at a code level -* Applications must call `close()` on an input stream when the contents of the file are longer needed. -* If an input stream needs to be kept around for reuse, call `unbuffer()` on it. - This will free up the connection until another read operation is invoked. + +Applications MUST call `close()` on an input stream when the contents of +the file are longer needed. ### Inconsistent configuration across a cluster From f56874cb62da0738537608407909f7a119d22d1c Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 8 Nov 2024 17:07:45 +0000 Subject: [PATCH 3/4] HADOOP-19317. doc changes Move connecting.md section on http leak to HADOOP-19330 PR Change-Id: Ic32817db48340578b4bd0bb926c7be8753cf2fcf --- .../markdown/tools/hadoop-aws/connecting.md | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index 3e513439ea264..91d981c539081 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -560,38 +560,6 @@ If `storediag` doesn't connect to your S3 store, *nothing else will*. Based on the experience of people who field support calls, here are some of the main connectivity issues which cause problems. -### Connection pool overloaded - -If more connections are needed than the HTTP connection pool has, -then worker threads will block until one is freed. - -If the wait exceeds the time set in `fs.s3a.connection.acquisition.timeout`, -the operation will fail with `"Timeout waiting for connection from pool`. - -This may be retried, but time has been lost, which results in slower operations. -If queries suddenly gets slower as the number of active operations increase, -then this is a possible cause. - -Fixes: - -Increase the value of `fs.s3a.connection.maximum`. -This is the general fix on query engines such as Apache Spark, and Apache Impala -which run many workers threads simultaneously, and do not keep files open past -the duration of a single task within a larger query. - -It can also surface with applications which deliberately keep files open -for extended periods. -These should ideally call `unbuffer()` on the input streams. -This will free up the connection until another read operation is invoked -yet -still re-open faster than if `open(Path)` were invoked. - -Applications may also be "leaking" http connections by failing to -close() them. -This can only be fixed at a code level - -Applications MUST call `close()` on an input stream when the contents of -the file are longer needed. - ### Inconsistent configuration across a cluster All hosts in the cluster need to have the configuration secrets; From 79a4cbf41efa29527f780daf23a6267f3900ece3 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 19 Nov 2024 14:24:21 +0000 Subject: [PATCH 4/4] HADOOP-19317. review feedback Change-Id: I7e511aae105dbcd6a1f33cefb00ed531b265cb9c --- .../hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index 91d981c539081..17563c2e94b2f 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -155,7 +155,7 @@ See [Timeouts](performance.html#timeouts). The S3A connector uses [Apache HttpClient](https://hc.apache.org/index.html) to connect to S3 Stores. The client is configured to create a pool of HTTP connections with S3, so that once -the initial set of connections have been made they can re-used for followup operations. +the initial set of connections have been made they can be re-used for followup operations. Core aspects of pool settings are: * The pool size is set by `fs.s3a.connection.maximum` -if a process asks for more connections than this then