Skip to content

Commit 293ed80

Browse files
xiaoxmengfacebook-github-bot
authored andcommitted
misc: Add fine-grained stats for index connector stats tracking (#12759)
Summary: Add runtime stats to track the wall/cpu times spent in different stages during index lookup processing and integrate with velox operator stats Add metrics to monitor index lookup and table scan batch processing time distribution to ease monitoring an online service Reviewed By: wenqiwooo Differential Revision: D71599185
1 parent bfb300c commit 293ed80

12 files changed

Lines changed: 241 additions & 67 deletions

velox/common/base/Counters.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,42 @@ void registerVeloxMetrics() {
586586
// The number of data size exchange requests.
587587
DEFINE_METRIC(kMetricExchangeDataSizeCount, facebook::velox::StatType::COUNT);
588588

589+
/// ================== Index Lookup Counters =================
590+
// The distribution of index lookup result raw bytes in range of [0, 128MB]
591+
// with 128 buckets. It is configured to report the capacity at P50, P90, P99,
592+
// and P100 percentiles.
593+
DEFINE_HISTOGRAM_METRIC(
594+
kMetricIndexLookupResultRawBytes,
595+
1L << 20,
596+
0,
597+
128L << 20,
598+
50,
599+
90,
600+
99,
601+
100);
602+
603+
// The distribution of index lookup result bytes in range of [0, 128MB] with
604+
// 128 buckets. It is configured to report the capacity at P50, P90, P99, and
605+
// P100 percentiles.
606+
DEFINE_HISTOGRAM_METRIC(
607+
kMetricIndexLookupResultBytes, 1L << 20, 0, 128L << 20, 50, 90, 99, 100);
608+
609+
// The time distribution of index lookup time in range of [0, 16s] with 512
610+
// buckets and reports P50, P90, P99, and P100.
611+
DEFINE_HISTOGRAM_METRIC(
612+
kMetricIndexLookupTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100);
613+
614+
// The time distribution of index lookup wait time in range of [0, 16s] with
615+
// 512 buckets and reports P50, P90, P99, and P100.
616+
DEFINE_HISTOGRAM_METRIC(
617+
kMetricIndexLookupWaitTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100);
618+
619+
/// ================== Table Scan Counters =================
620+
// The time distribution of table scan batch processing time in range of [0,
621+
// 16s] with 512 buckets and reports P50, P90, P99, and P100.
622+
DEFINE_HISTOGRAM_METRIC(
623+
kMetricTableScanBatchProcessTimeMs, 32, 0, 16L << 10, 50, 90, 99, 100);
624+
589625
/// ================== Storage Counters =================
590626

591627
// The time distribution of storage IO throttled duration in range of [0, 30s]

velox/common/base/Counters.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,4 +360,19 @@ constexpr folly::StringPiece kMetricStorageGlobalThrottled{
360360

361361
constexpr folly::StringPiece kMetricStorageNetworkThrottled{
362362
"velox.storage_network_throttled_count"};
363+
364+
constexpr folly::StringPiece kMetricIndexLookupResultRawBytes{
365+
"velox.index_lookup_result_raw_bytes"};
366+
367+
constexpr folly::StringPiece kMetricIndexLookupResultBytes{
368+
"velox.index_lookup_result_bytes"};
369+
370+
constexpr folly::StringPiece kMetricIndexLookupTimeMs{
371+
"velox.index_lookup_time_ms"};
372+
373+
constexpr folly::StringPiece kMetricIndexLookupWaitTimeMs{
374+
"velox.index_lookup_wait_time_ms"};
375+
376+
constexpr folly::StringPiece kMetricTableScanBatchProcessTimeMs{
377+
"velox.table_scan_batch_process_time_ms"};
363378
} // namespace facebook::velox

velox/common/base/RuntimeMetrics.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,44 +53,53 @@ void RuntimeMetric::printMetric(std::stringstream& stream) const {
5353
case RuntimeCounter::Unit::kNanos:
5454
stream << " sum: " << succinctNanos(sum) << ", count: " << count
5555
<< ", min: " << succinctNanos(min)
56-
<< ", max: " << succinctNanos(max);
56+
<< ", max: " << succinctNanos(max)
57+
<< ", avg: " << succinctNanos(count == 0 ? 0 : sum / count);
5758
break;
5859
case RuntimeCounter::Unit::kBytes:
5960
stream << " sum: " << succinctBytes(sum) << ", count: " << count
6061
<< ", min: " << succinctBytes(min)
61-
<< ", max: " << succinctBytes(max);
62+
<< ", max: " << succinctBytes(max)
63+
<< ", avg: " << succinctBytes(count == 0 ? 0 : sum / count);
6264
break;
6365
case RuntimeCounter::Unit::kNone:
6466
[[fallthrough]];
6567
default:
6668
stream << " sum: " << sum << ", count: " << count << ", min: " << min
67-
<< ", max: " << max;
69+
<< ", max: " << max << ", avg: " << (count == 0 ? 0 : sum / count);
6870
}
6971
}
7072

7173
std::string RuntimeMetric::toString() const {
7274
switch (unit) {
7375
case RuntimeCounter::Unit::kNanos:
7476
return fmt::format(
75-
"sum:{}, count:{}, min:{}, max:{}",
77+
"sum:{}, count:{}, min:{}, max:{}, avg: {}",
7678
succinctNanos(sum),
7779
count,
7880
succinctNanos(min),
79-
succinctNanos(max));
81+
succinctNanos(max),
82+
succinctNanos(count == 0 ? 0 : sum / count));
8083
break;
8184
case RuntimeCounter::Unit::kBytes:
8285
return fmt::format(
83-
"sum:{}, count:{}, min:{}, max:{}",
86+
"sum:{}, count:{}, min:{}, max:{}, avg: {}",
8487
succinctBytes(sum),
8588
count,
8689
succinctBytes(min),
87-
succinctBytes(max));
90+
succinctBytes(max),
91+
succinctBytes(count == 0 ? 0 : sum / count));
8892
break;
8993
case RuntimeCounter::Unit::kNone:
9094
[[fallthrough]];
9195
default:
9296
return fmt::format(
93-
"sum:{}, count:{}, min:{}, max:{}", sum, count, min, max);
97+
"sum:{}, count:{}, min:{}, max:{}, avg: {}",
98+
sum,
99+
count,
100+
min,
101+
max,
102+
count == 0 ? 0 : sum / count);
94103
}
95104
}
96105

velox/common/base/tests/RuntimeMetricsTest.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,12 @@ TEST_F(RuntimeMetricsTest, basic) {
4949

5050
ASSERT_EQ(
5151
fmt::format(
52-
"sum:{}, count:{}, min:{}, max:{}",
52+
"sum:{}, count:{}, min:{}, max:{}, avg: {}",
5353
rm1.sum,
5454
rm1.count,
5555
rm1.min,
56-
rm1.max),
56+
rm1.max,
57+
rm1.sum / rm1.count),
5758
rm1.toString());
5859

5960
RuntimeMetric rm2;
@@ -74,11 +75,13 @@ TEST_F(RuntimeMetricsTest, basic) {
7475

7576
RuntimeMetric byteRm(RuntimeCounter::Unit::kBytes);
7677
byteRm.addValue(5);
77-
ASSERT_EQ(byteRm.toString(), "sum:5B, count:1, min:5B, max:5B");
78+
ASSERT_EQ(byteRm.toString(), "sum:5B, count:1, min:5B, max:5B, avg: 5B");
7879

7980
RuntimeMetric timeRm(RuntimeCounter::Unit::kNanos);
8081
timeRm.addValue(2'000);
81-
ASSERT_EQ(timeRm.toString(), "sum:2.00us, count:1, min:2.00us, max:2.00us");
82+
ASSERT_EQ(
83+
timeRm.toString(),
84+
"sum:2.00us, count:1, min:2.00us, max:2.00us, avg: 2.00us");
8285
}
8386

8487
} // namespace facebook::velox

velox/docs/monitoring/metrics.rst

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,3 +560,47 @@ Hive Connector
560560
- The distribution of hive sort writer finish processing time slice in range
561561
of[0, 120s] with 60 buckets. It is configured to report latency at P50,
562562
P90, P99, and P100 percentiles.
563+
564+
Index Join
565+
----------
566+
567+
.. list-table::
568+
:widths: 40 10 50
569+
:header-rows: 1
570+
571+
* - Metric Name
572+
- Type
573+
- Description
574+
* - index_lookup_wait_time_ms
575+
- Histogram
576+
- The time distribution of index lookup time in range of [0, 16s] with 512
577+
buckets and reports P50, P90, P99, and P100.
578+
* - index_lookup_wait_time_ms
579+
- Histogram
580+
- The time distribution of index lookup time in range of [0, 16s] with 512
581+
buckets and reports P50, P90, P99, and P100.
582+
* - index_lookup_result_raw_bytes
583+
- Histogram
584+
- The distribution of index lookup result raw bytes in range of [0, 128MB]
585+
with 128 buckets. It is configured to report the capacity at P50, P90, P99,
586+
and P100 percentiles.
587+
* - index_lookup_result_bytes
588+
- Histogram
589+
- The distribution of index lookup result bytes in range of [0, 128MB] with
590+
128 buckets. It is configured to report the capacity at P50, P90, P99, and
591+
P100 percentiles.
592+
593+
Table Scan
594+
----------
595+
596+
.. list-table::
597+
:widths: 40 10 50
598+
:header-rows: 1
599+
600+
* - Metric Name
601+
- Type
602+
- Description
603+
* - table_scan_batch_process_time_ms
604+
- Histogram
605+
- The time distribution of table scan batch processing time in range of [0,
606+
16s] with 512 buckets and reports P50, P90, P99, and P100.

velox/docs/monitoring/stats.rst

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,32 @@ These stats are reported only by IndexLookupJoin operator
142142
* - Stats
143143
- Unit
144144
- Description
145-
* - lookupWallNanos
145+
* - connectorlookupWallNanos
146146
- nanos
147-
- The walltime in nanoseconds that the index connector do the lookup.
148-
* - lookupCpuNanos
147+
- The end-to-end walltime in nanoseconds that the index connector do the lookup.
148+
* - connectorlookupWaitWallNanos
149149
- nanos
150-
- The cpu time in nanoseconds that the index connector do the lookup.
150+
- The walltime in nanoseconds that the index connector wait for the lookup from
151+
remote storage.
152+
* - connectorResultPrepareCpuNanos
153+
- nanos
154+
- The cpu time in nanoseconds that the index connector process response from storages
155+
client for followup processing by index join operator.
156+
* - clientRequestProcessCpuNanos
157+
- nanos
158+
- The cpu time in nanoseconds that the storage client process request for remote
159+
storage lookup such as encoding the lookup input data into remotr storage request.
160+
* - clientResultProcessCpuNanos
161+
- nanos
162+
- The cpu time in nanoseconds that the storage client process response from remote
163+
storage lookup such as decoding the response data into velox vectors.
164+
* - clientLookupResultRawSize
165+
- bytes
166+
- The byte size of the raw result received from the remote storage lookup.
167+
* - clientLookupResultSize
168+
- bytes
169+
- The byte size of the result data in velox vectors that are decoded from the raw data
170+
received from the remote storage lookup.
151171

152172
Spilling
153173
--------

velox/exec/IndexLookupJoin.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -779,14 +779,15 @@ void IndexLookupJoin::recordConnectorStats() {
779779
lockedStats->runtimeStats.erase(name);
780780
lockedStats->runtimeStats.emplace(name, std::move(value));
781781
}
782-
if (connectorStats.count(kConnectorLookupCpuTime) != 0) {
783-
VELOX_CHECK_EQ(
784-
connectorStats[kConnectorLookupCpuTime].count,
785-
connectorStats[kConnectorLookupWallTime].count);
782+
if (connectorStats.count(kConnectorLookupWallTime) != 0) {
786783
const CpuWallTiming backgroundTiming{
787-
static_cast<uint64_t>(connectorStats[kConnectorLookupCpuTime].count),
784+
static_cast<uint64_t>(connectorStats[kConnectorLookupWallTime].count),
788785
static_cast<uint64_t>(connectorStats[kConnectorLookupWallTime].sum),
789-
static_cast<uint64_t>(connectorStats[kConnectorLookupCpuTime].sum)};
786+
// NOTE: this might not be accurate as it doesn't include the time spent
787+
// inside the index storage client.
788+
static_cast<uint64_t>(connectorStats[kConnectorResultPrepareTime].sum) +
789+
connectorStats[kClientRequestProcessTime].sum +
790+
connectorStats[kClientResultProcessTime].sum};
790791
lockedStats->backgroundTiming.clear();
791792
lockedStats->backgroundTiming.add(backgroundTiming);
792793
}

velox/exec/IndexLookupJoin.h

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,35 @@ class IndexLookupJoin : public Operator {
4242
void close() override;
4343

4444
/// Defines lookup runtime stats.
45-
/// The walltime time that the index connector do the lookup.
46-
static inline const std::string kConnectorLookupWallTime{"lookupWallNanos"};
47-
/// The cpu time that the index connector do the lookup.
48-
static inline const std::string kConnectorLookupCpuTime{"lookupCpuNanos"};
45+
/// The end-to-end walltime in nanoseconds that the index connector do the
46+
/// lookup.
47+
static inline const std::string kConnectorLookupWallTime{
48+
"connectorLookupWallNanos"};
49+
/// The cpu time in nanoseconds that the index connector process response from
50+
/// storage client for followup processing by index join operator.
51+
static inline const std::string kConnectorResultPrepareTime{
52+
"connectorResultPrepareCpuNanos"};
53+
/// The cpu time in nanoseconds that the storage client process request for
54+
/// remote storage lookup such as encoding the lookup input data into remotr
55+
/// storage request.
56+
static inline const std::string kClientRequestProcessTime{
57+
"clientRequestProcessCpuNanos"};
58+
/// The walltime in nanoseconds that the storage client wait for the lookup
59+
/// from remote storage.
60+
static inline const std::string kClientLookupWaitWallTime{
61+
"clientlookupWaitWallNanos"};
62+
/// The cpu time in nanoseconds that the storage client process response from
63+
/// remote storage lookup such as decoding the response data into velox
64+
/// vectors.
65+
static inline const std::string kClientResultProcessTime{
66+
"clientResultProcessCpuNanos"};
67+
/// The byte size of the raw result received from the remote storage lookup.
68+
static inline const std::string kClientLookupResultRawSize{
69+
"clientLookupResultRawSize"};
70+
/// The byte size of the result data in velox vectors that are decoded from
71+
/// the raw data received from the remote storage lookup.
72+
static inline const std::string kClientLookupResultSize{
73+
"clientLookupResultSize"};
4974

5075
private:
5176
using LookupResultIter = connector::IndexSource::LookupResultIterator;

velox/exec/TableScan.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,10 @@ RowVectorPtr TableScan::getOutput() {
282282
{maxFilteringRatio_,
283283
1.0 * data->size() / readBatchSize,
284284
1.0 / kMaxSelectiveBatchSizeMultiplier});
285+
if (ioTimeUs > 0) {
286+
RECORD_HISTOGRAM_METRIC_VALUE(
287+
velox::kMetricTableScanBatchProcessTimeMs, ioTimeUs / 1'000);
288+
}
285289
return data;
286290
}
287291
continue;

velox/exec/tests/IndexLookupJoinTest.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,13 +1817,22 @@ DEBUG_ONLY_TEST_P(IndexLookupJoinTest, runtimeStats) {
18171817
numProbeBatches);
18181818
ASSERT_GT(runtimeStats.at(IndexLookupJoin::kConnectorLookupWallTime).sum, 0);
18191819
ASSERT_EQ(
1820-
runtimeStats.at(IndexLookupJoin::kConnectorLookupCpuTime).count,
1820+
runtimeStats.at(IndexLookupJoin::kClientLookupWaitWallTime).count,
18211821
numProbeBatches);
1822-
ASSERT_GT(runtimeStats.at(IndexLookupJoin::kConnectorLookupCpuTime).sum, 0);
1822+
ASSERT_GT(runtimeStats.at(IndexLookupJoin::kClientLookupWaitWallTime).sum, 0);
1823+
ASSERT_EQ(
1824+
runtimeStats.at(IndexLookupJoin::kConnectorResultPrepareTime).count,
1825+
numProbeBatches);
1826+
ASSERT_GT(
1827+
runtimeStats.at(IndexLookupJoin::kConnectorResultPrepareTime).sum, 0);
1828+
ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientRequestProcessTime), 0);
1829+
ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientResultProcessTime), 0);
1830+
ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientLookupResultSize), 0);
1831+
ASSERT_EQ(runtimeStats.count(IndexLookupJoin::kClientLookupResultRawSize), 0);
18231832
ASSERT_THAT(
18241833
operatorStats.toString(true, true),
18251834
testing::MatchesRegex(
1826-
".*Runtime stats.*lookupWallNanos.*lookupCpuNanos.*"));
1835+
".*Runtime stats.*connectorLookupWallNanos:.*clientlookupWaitWallNanos.*connectorResultPrepareCpuNanos.*"));
18271836
}
18281837

18291838
TEST_P(IndexLookupJoinTest, joinFuzzer) {

0 commit comments

Comments
 (0)