-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HDFS-16949 Introduce inverse quantiles for metrics where higher numer… #5486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
056a473
a66f26a
6bca0d1
a30980e
8888f8c
62a29be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| package org.apache.hadoop.metrics2.util; | ||
|
|
||
| import org.apache.hadoop.util.Preconditions; | ||
| import java.util.ListIterator; | ||
|
|
||
| public class InverseQuantiles extends SampleQuantiles{ | ||
|
|
||
| public InverseQuantiles(Quantile[] quantiles) { | ||
| super(quantiles); | ||
| } | ||
|
|
||
|
|
||
| /** | ||
| * Get the estimated value at the inverse of the specified quantile. | ||
| * Eg: return the value at (1 - 0.99)*count position for quantile 0.99. | ||
| * When count is 100, quantile 0.99 is desired to return the value at the 1st position | ||
| * | ||
| * @param quantile Queried quantile, e.g. 0.50 or 0.99. | ||
| * @return Estimated value at the inverse position of that quantile. | ||
| */ | ||
| long query(double quantile) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. im assuming this logic looks similar to the SampleQuantile, is there any small refactor we could do to DRY, then maybe a couple lines below looks like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed, that will be much cleaner. Let me make the change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The DRY method would be to encapsulate the for loop to a protected function "getQuantilesFromSamples".
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to reverse the traversal order. |
||
| Preconditions.checkState(!samples.isEmpty(), "no data in estimator"); | ||
|
|
||
| int rankMin = 0; | ||
| int desired = (int) ((1 - quantile) * count); | ||
|
|
||
| ListIterator<SampleItem> it = samples.listIterator(); | ||
| SampleItem prev; | ||
| SampleItem cur = it.next(); | ||
| for (int i = 1; i < samples.size(); i++) { | ||
| prev = cur; | ||
| cur = it.next(); | ||
|
|
||
| rankMin += prev.g; | ||
|
|
||
| if (rankMin + cur.g + cur.delta > desired + (allowableError(i) / 2)) { | ||
| return prev.value; | ||
| } | ||
| } | ||
|
|
||
| // edge case of wanting max value | ||
| return samples.get(0).value; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -118,4 +118,36 @@ public void testQuantileError() throws IOException { | |
| } | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testInverseQuantiles() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can you just add a description of what the test is doing |
||
| SampleQuantiles estimatorWithInverseQuantiles = new InverseQuantiles(quantiles); | ||
| final int count = 100000; | ||
| Random r = new Random(0xDEADDEAD); | ||
| Long[] values = new Long[count]; | ||
| for (int i = 0; i < count; i++) { | ||
| values[i] = (long) (i + 1); | ||
| } | ||
| // Do 10 shuffle/insert/check cycles | ||
| for (int i = 0; i < 10; i++) { | ||
| System.out.println("Starting run " + i); | ||
| Collections.shuffle(Arrays.asList(values), r); | ||
| estimatorWithInverseQuantiles.clear(); | ||
| for (int j = 0; j < count; j++) { | ||
| estimatorWithInverseQuantiles.insert(values[j]); | ||
| } | ||
| Map<Quantile, Long> snapshot; | ||
| snapshot = estimatorWithInverseQuantiles.snapshot(); | ||
| for (Quantile q : quantiles) { | ||
| long actual = (long) ((1 - q.quantile) * count); | ||
| long error = (long) ((0.1 - q.error) * count); | ||
| long estimate = snapshot.get(q); | ||
| System.out | ||
| .println(String.format("For quantile %f Expected %d with error %d, estimated %d", | ||
| q.quantile, actual, error, estimate)); | ||
| assertThat(estimate <= actual + error).isTrue(); | ||
| assertThat(estimate >= actual - error).isTrue(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is estimator variable used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, estimator is the one which will sort the sample and populate quantiles.