-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HADOOP-19559: S3A Analytics-Accelerator Add IoStatistics support #7763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -459,6 +459,14 @@ public enum Statistic { | |
| "Gauge of active memory in use", | ||
| TYPE_GAUGE), | ||
|
|
||
| ANALYTICS_GET_REQUESTS( | ||
| StreamStatisticNames.STREAM_READ_ANALYTICS_GET_REQUESTS, | ||
| "GET requests made by analytics streams", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is nit but i believe an important one. We have to make sure we are giving right name, description as well as doing it in the right place. 2/ Next is the name for Stream read we had 3/ we should make sure the messages are similar to others. For example, we should call this |
||
| TYPE_COUNTER), | ||
| ANALYTICS_HEAD_REQUESTS( | ||
| StreamStatisticNames.STREAM_READ_ANALYTICS_HEAD_REQUESTS, | ||
| "HEAD requests made by analytics streams", | ||
| TYPE_COUNTER), | ||
| /* Stream Write statistics */ | ||
|
|
||
| STREAM_WRITE_EXCEPTIONS( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.fs.s3a.impl.streams; | ||
vaibhav5140 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; | ||
| import org.apache.hadoop.fs.statistics.DurationTracker; | ||
| import software.amazon.s3.analyticsaccelerator.util.RequestCallback; | ||
|
Check failure on line 23 in hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/streams/AnalyticsRequestCallback.java
|
||
|
|
||
| /** | ||
| * Implementation of AAL's RequestCallback interface that tracks analytics operations. | ||
| */ | ||
| public class AnalyticsRequestCallback implements RequestCallback { | ||
|
Check failure on line 28 in hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/streams/AnalyticsRequestCallback.java
|
||
| private final S3AInputStreamStatistics statistics; | ||
|
|
||
| /** | ||
| * Create a new callback instance. | ||
| * @param statistics the statistics to update | ||
| */ | ||
| public AnalyticsRequestCallback(S3AInputStreamStatistics statistics) { | ||
| this.statistics = statistics; | ||
| } | ||
|
|
||
| @Override | ||
vaibhav5140 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| public void onGetRequest() { | ||
| statistics.incrementAnalyticsGetRequests(); | ||
| // Update ACTION_HTTP_GET_REQUEST statistic | ||
| DurationTracker tracker = statistics.initiateGetRequest(); | ||
|
||
| tracker.close(); | ||
| } | ||
|
|
||
| @Override | ||
| public void onHeadRequest() { | ||
| statistics.incrementAnalyticsHeadRequests(); | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| import software.amazon.s3.analyticsaccelerator.util.InputPolicy; | ||
| import software.amazon.s3.analyticsaccelerator.util.OpenStreamInformation; | ||
| import software.amazon.s3.analyticsaccelerator.util.S3URI; | ||
| import software.amazon.s3.analyticsaccelerator.util.RequestCallback; | ||
|
Check failure on line 31 in hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/streams/AnalyticsStream.java
|
||
|
|
||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
@@ -48,13 +49,17 @@ | |
| private S3SeekableInputStream inputStream; | ||
| private long lastReadCurrentPos = 0; | ||
| private volatile boolean closed; | ||
| private final long contentLength; | ||
| private final long lengthLimit; | ||
|
|
||
| public static final Logger LOG = LoggerFactory.getLogger(AnalyticsStream.class); | ||
|
|
||
| public AnalyticsStream(final ObjectReadParameters parameters, | ||
| final S3SeekableInputStreamFactory s3SeekableInputStreamFactory) throws IOException { | ||
| super(InputStreamType.Analytics, parameters); | ||
| S3ObjectAttributes s3Attributes = parameters.getObjectAttributes(); | ||
| this.contentLength = s3Attributes.getLen(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do you need this?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. getLen() is needed for length limiting, it ensures AnalyticsStream respects the declared file length from openFile() options rather than reading the entire S3 object |
||
| this.lengthLimit = s3Attributes.getLen(); | ||
| this.inputStream = s3SeekableInputStreamFactory.createStream(S3URI.of(s3Attributes.getBucket(), | ||
| s3Attributes.getKey()), buildOpenStreamInformation(parameters)); | ||
| getS3AStreamStatistics().streamOpened(InputStreamType.Analytics); | ||
|
|
@@ -63,13 +68,23 @@ | |
| @Override | ||
| public int read() throws IOException { | ||
| throwIfClosed(); | ||
| if (getPos() >= lengthLimit) { | ||
| return -1; // EOF reached due to length limit | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to close underlying stream here before returning? We might check this from other Stream implementations. |
||
| } | ||
| getS3AStreamStatistics().readOperationStarted(getPos(), 1); | ||
|
|
||
| int bytesRead; | ||
| try { | ||
| bytesRead = inputStream.read(); | ||
| } catch (IOException ioe) { | ||
| onReadFailure(ioe); | ||
| throw ioe; | ||
| } | ||
|
|
||
| if (bytesRead != -1) { | ||
| incrementBytesRead(1); | ||
| } | ||
|
|
||
| return bytesRead; | ||
| } | ||
|
|
||
|
|
@@ -105,26 +120,47 @@ | |
| */ | ||
| public int readTail(byte[] buf, int off, int len) throws IOException { | ||
| throwIfClosed(); | ||
| getS3AStreamStatistics().readOperationStarted(getPos(), len); | ||
|
|
||
| int bytesRead; | ||
| try { | ||
| bytesRead = inputStream.readTail(buf, off, len); | ||
| } catch (IOException ioe) { | ||
| onReadFailure(ioe); | ||
| throw ioe; | ||
| } | ||
|
|
||
| if (bytesRead > 0) { | ||
| incrementBytesRead(bytesRead); | ||
| } | ||
|
|
||
| return bytesRead; | ||
| } | ||
|
|
||
| @Override | ||
| public int read(byte[] buf, int off, int len) throws IOException { | ||
| throwIfClosed(); | ||
| long pos = getPos(); | ||
| if (pos >= lengthLimit) { | ||
| return -1; // EOF reached due to length limit | ||
| } | ||
|
|
||
|
Check failure on line 147 in hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/streams/AnalyticsStream.java
|
||
| // Limit read length to not exceed the length limit | ||
| int maxRead = (int) Math.min(len, lengthLimit - pos); | ||
| getS3AStreamStatistics().readOperationStarted(pos, maxRead); | ||
|
|
||
| int bytesRead; | ||
| try { | ||
| bytesRead = inputStream.read(buf, off, len); | ||
| bytesRead = inputStream.read(buf, off, maxRead); | ||
| } catch (IOException ioe) { | ||
| onReadFailure(ioe); | ||
| throw ioe; | ||
| } | ||
|
|
||
| if (bytesRead > 0) { | ||
| incrementBytesRead(bytesRead); | ||
| } | ||
|
|
||
| return bytesRead; | ||
| } | ||
|
|
||
|
|
@@ -194,10 +230,13 @@ | |
| } | ||
|
|
||
| private OpenStreamInformation buildOpenStreamInformation(ObjectReadParameters parameters) { | ||
|
|
||
| final RequestCallback requestCallback = new AnalyticsRequestCallback(getS3AStreamStatistics()); | ||
|
|
||
| OpenStreamInformation.OpenStreamInformationBuilder openStreamInformationBuilder = | ||
| OpenStreamInformation.builder() | ||
| .inputPolicy(mapS3AInputPolicyToAAL(parameters.getContext() | ||
| .getInputPolicy())); | ||
| .getInputPolicy())).requestCallback(requestCallback); | ||
|
|
||
| if (parameters.getObjectAttributes().getETag() != null) { | ||
| openStreamInformationBuilder.objectMetadata(ObjectMetadata.builder() | ||
|
|
@@ -235,4 +274,16 @@ | |
| throw new IOException(getKey() + ": " + FSExceptionMessages.STREAM_IS_CLOSED); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Increment the bytes read counter if there is a stats instance | ||
| * and the number of bytes read is more than zero. | ||
| * @param bytesRead number of bytes read | ||
| */ | ||
| private void incrementBytesRead(long bytesRead) { | ||
| getS3AStreamStatistics().bytesRead(bytesRead); | ||
| if (getContext().getStats() != null && bytesRead > 0) { | ||
| getContext().getStats().incrementBytesRead(bytesRead); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: update these comments to say: GET requests made by the analytics stream.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually do we really need new statistics? why can't we re-use the existing ones?
ACTION_HTTP_GET_REQUEST and ACTION_HTTP_HEAD_REQUEST?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, new analytics specific statistics provide isolated tracking. In case if both S3A and Analytics streams are used simultaneously, separate metrics provide precise tracking
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need isolated tracking? Can we ever use mix and match streams?