-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HADOOP-19096: [ABFS] [CST Optimization] Enhancing Client-Side Throttling Metrics Updating Logic #6276
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HADOOP-19096: [ABFS] [CST Optimization] Enhancing Client-Side Throttling Metrics Updating Logic #6276
Changes from 2 commits
e043dd8
dc3938b
e2d43f2
ad8ce43
108d6de
fa18485
e7ba13b
b4df6fd
94e2559
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ | |
| import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; | ||
|
|
||
| import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; | ||
| import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.*; | ||
|
|
||
| /** | ||
| * The AbfsRestOperation for Rest AbfsClient. | ||
|
|
@@ -277,6 +278,7 @@ String getClientLatency() { | |
| private boolean executeHttpOperation(final int retryCount, | ||
| TracingContext tracingContext) throws AzureBlobFileSystemException { | ||
| AbfsHttpOperation httpOperation; | ||
| boolean wasExceptionThrown = false; | ||
|
||
|
|
||
| try { | ||
| // initialize the HTTP request and open the connection | ||
|
|
@@ -314,7 +316,23 @@ private boolean executeHttpOperation(final int retryCount, | |
| } else if (httpOperation.getStatusCode() == HttpURLConnection.HTTP_UNAVAILABLE) { | ||
| incrementCounter(AbfsStatistic.SERVER_UNAVAILABLE, 1); | ||
| } | ||
|
|
||
| // If no exception occurred here it means http operation was successfully complete and | ||
| // a response from server has been received which might be failure or success. | ||
| LOG.debug("HttpRequest: {}: {}", operationType, httpOperation); | ||
|
|
||
| // If request failed at server and should be retried, we will determine failure reason and retry policy here | ||
anujmodi2021 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (client.getRetryPolicy().shouldRetry(retryCount, httpOperation.getStatusCode())) { | ||
| int status = httpOperation.getStatusCode(); | ||
| failureReason = RetryReason.getAbbreviation(null, status, httpOperation.getStorageErrorMessage()); | ||
| return false; | ||
| } | ||
|
|
||
| // If the request has succeeded or failed with non-retrial error, save the operation and return. | ||
| result = httpOperation; | ||
|
|
||
| } catch (UnknownHostException ex) { | ||
| wasExceptionThrown = true; | ||
|
||
| String hostname = null; | ||
| hostname = httpOperation.getHost(); | ||
| failureReason = RetryReason.getAbbreviation(ex, null, null); | ||
|
|
@@ -325,6 +343,7 @@ private boolean executeHttpOperation(final int retryCount, | |
| } | ||
| return false; | ||
| } catch (IOException ex) { | ||
| wasExceptionThrown = true; | ||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug("HttpRequestFailure: {}, {}", httpOperation, ex); | ||
| } | ||
|
|
@@ -337,32 +356,31 @@ private boolean executeHttpOperation(final int retryCount, | |
|
|
||
| return false; | ||
| } finally { | ||
| int status = httpOperation.getStatusCode(); | ||
| /* | ||
| A status less than 300 (2xx range) or greater than or equal | ||
| to 500 (5xx range) should contribute to throttling metrics being updated. | ||
| Less than 200 or greater than or equal to 500 show failed operations. 2xx | ||
| range contributes to successful operations. 3xx range is for redirects | ||
| and 4xx range is for user errors. These should not be a part of | ||
| throttling backoff computation. | ||
| Updating Client Side Throttling Metrics for relevant response status codes. | ||
| 1. Status code in 2xx range: Successful Operations should contribute | ||
| 2. Status code in 3xx range: Redirection Operations should not contribute | ||
| 3. Status code in 4xx range: User Errors should not contribute | ||
| 4. Status code in 503 range: Server Error should contribute as following: | ||
| a. 503, Ingress Over Account Limit: Should Contribute | ||
| b. 503, Egress Over Account Limit: Should Contribute | ||
| c. 503, TPS Over Account Limit: Should Contribute | ||
| d. 503, Other Server Throttling: Should not contribute | ||
| 5. Status code in 5xx range other than 503: Should not contribute | ||
| 6. IOException and UnknownHostExceptions: Should not contribute | ||
| */ | ||
| boolean updateMetricsResponseCode = (status < HttpURLConnection.HTTP_MULT_CHOICE | ||
| || status >= HttpURLConnection.HTTP_INTERNAL_ERROR); | ||
| if (updateMetricsResponseCode) { | ||
| int statusCode = httpOperation.getStatusCode(); | ||
| boolean shouldUpdateCSTMetrics = (statusCode < HttpURLConnection.HTTP_MULT_CHOICE // Case 1 | ||
anujmodi2021 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| || INGRESS_LIMIT_BREACH_ABBREVIATION.equals(failureReason) // Case 4.a | ||
| || EGRESS_LIMIT_BREACH_ABBREVIATION.equals(failureReason) // Case 4.b | ||
| || OPERATION_LIMIT_BREACH_ABBREVIATION.equals(failureReason)) // Case 4.c | ||
| && !wasExceptionThrown; // Case 6 | ||
|
||
|
|
||
| if (shouldUpdateCSTMetrics) { | ||
| intercept.updateMetrics(operationType, httpOperation); | ||
| } | ||
| } | ||
|
|
||
| LOG.debug("HttpRequest: {}: {}", operationType, httpOperation); | ||
|
|
||
| if (client.getRetryPolicy().shouldRetry(retryCount, httpOperation.getStatusCode())) { | ||
| int status = httpOperation.getStatusCode(); | ||
| failureReason = RetryReason.getAbbreviation(null, status, httpOperation.getStorageErrorMessage()); | ||
| return false; | ||
| } | ||
|
|
||
| result = httpOperation; | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,12 +20,8 @@ | |
|
|
||
| import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; | ||
| import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT; | ||
| import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; | ||
| import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT; | ||
| import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; | ||
| import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; | ||
| import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE; | ||
| import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION; | ||
| import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.*; | ||
| import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.*; | ||
anujmodi2021 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| /** | ||
| * Category that can capture server-response errors for 5XX status-code. | ||
|
|
@@ -56,10 +52,14 @@ String getAbbreviation(final Integer statusCode, | |
| splitedServerErrorMessage)) { | ||
| return EGRESS_LIMIT_BREACH_ABBREVIATION; | ||
| } | ||
| if (OPERATION_BREACH_MESSAGE.equalsIgnoreCase( | ||
| if (TPS_OVER_ACCOUNT_LIMIT.getErrorMessage().equalsIgnoreCase( | ||
| splitedServerErrorMessage)) { | ||
| return OPERATION_LIMIT_BREACH_ABBREVIATION; | ||
|
||
| } | ||
| if (OTHER_SERVER_THROTTLING.getErrorMessage().equalsIgnoreCase( | ||
| splitedServerErrorMessage)) { | ||
| return OTHER_SERVER_THROTTLING_ABBREVIATION; | ||
| } | ||
| return HTTP_UNAVAILABLE + ""; | ||
| } | ||
| return statusCode + ""; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.