Skip to content

Commit 5a27459

Browse files
net/metrics: Add metrics for inbound/outbound traffic (#10846)
This PR adds a new metric for inbound / outbound traffic for individual request-response protocols. - the PR is motivated by #10765 which shows a significant number of bytes as downloaded (4-5 MiB/s). This is suspicious for a fully synced validator, 1-2 blocks to the tip of the chain. - It suggests a protocol is internally consuming too much bandwidth leading to network inefficiencies, wasted CPU, and in the case of the issue to OOM kills cc @paritytech/sdk-node --------- Signed-off-by: Alexandru Vasile <[email protected]> Co-authored-by: cmd[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 896b603 commit 5a27459

File tree

4 files changed

+68
-0
lines changed

4 files changed

+68
-0
lines changed

prdoc/pr_10846.prdoc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
title: 'net/metrics: Add metrics for inbound/outbound traffic '
2+
doc:
3+
- audience: Node Dev
4+
description: |-
5+
This PR adds a new metric for inbound / outbound traffic for individual request-response protocols.
6+
7+
- the PR is motivated by https://github.com/paritytech/polkadot-sdk/issues/10765 which shows a significant number of bytes as downloaded (4-5 MiB/s). This is suspicious for a fully synced validator, 1-2 blocks to the tip of the chain.
8+
- It suggests a protocol is internally consuming too much bandwidth leading to network inefficiencies, wasted CPU, and in the case of the issue to OOM kills
9+
10+
cc @paritytech/sdk-node
11+
crates:
12+
- name: sc-network
13+
bump: patch

substrate/client/network/src/litep2p/shim/request_response/metrics.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,44 @@ impl RequestResponseMetrics {
7575
.observe(duration.as_secs_f64());
7676
}
7777
}
78+
79+
/// Register inbound bytes (request payload received from peer) to Prometheus
80+
pub fn register_inbound_request_bytes(&self, bytes: usize) {
81+
if let Some(metrics) = &self.metrics {
82+
metrics
83+
.requests_response_bytes_total
84+
.with_label_values(&["in", &self.protocol])
85+
.inc_by(bytes as u64);
86+
}
87+
}
88+
89+
/// Register outbound bytes (response payload sent to peer) to Prometheus
90+
pub fn register_outbound_response_bytes(&self, bytes: usize) {
91+
if let Some(metrics) = &self.metrics {
92+
metrics
93+
.requests_response_bytes_total
94+
.with_label_values(&["out", &self.protocol])
95+
.inc_by(bytes as u64);
96+
}
97+
}
98+
99+
/// Register outbound bytes (request payload sent to peer) to Prometheus
100+
pub fn register_outbound_request_bytes(&self, bytes: usize) {
101+
if let Some(metrics) = &self.metrics {
102+
metrics
103+
.requests_response_bytes_total
104+
.with_label_values(&["out", &self.protocol])
105+
.inc_by(bytes as u64);
106+
}
107+
}
108+
109+
/// Register inbound bytes (response payload received from peer) to Prometheus
110+
pub fn register_inbound_response_bytes(&self, bytes: usize) {
111+
if let Some(metrics) = &self.metrics {
112+
metrics
113+
.requests_response_bytes_total
114+
.with_label_values(&["in", &self.protocol])
115+
.inc_by(bytes as u64);
116+
}
117+
}
78118
}

substrate/client/network/src/litep2p/shim/request_response/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,10 @@ impl RequestResponseProtocol {
245245
dial_options,
246246
);
247247

248+
let request_len = request.len();
248249
match self.handle.try_send_request(peer.into(), request, dial_options) {
249250
Ok(request_id) => {
251+
self.metrics.register_outbound_request_bytes(request_len);
250252
self.pending_inbound_responses
251253
.insert(request_id, PendingRequest::new(tx, Instant::now(), fallback_request));
252254
},
@@ -280,6 +282,8 @@ impl RequestResponseProtocol {
280282
request.len(),
281283
);
282284

285+
self.metrics.register_inbound_request_bytes(request.len());
286+
283287
let Some(inbound_queue) = &self.inbound_queue else {
284288
log::trace!(
285289
target: LOG_TARGET,
@@ -350,6 +354,7 @@ impl RequestResponseProtocol {
350354
response.len(),
351355
);
352356

357+
self.metrics.register_inbound_response_bytes(response.len());
353358
let _ = tx.send(Ok((response, self.protocol.clone())));
354359
self.metrics.register_outbound_request_success(started.elapsed());
355360
},
@@ -512,6 +517,8 @@ impl RequestResponseProtocol {
512517
response.len(),
513518
);
514519

520+
self.metrics.register_outbound_response_bytes(response.len());
521+
515522
match sent_feedback {
516523
None => self.handle.send_response(request_id, response),
517524
Some(feedback) =>

substrate/client/network/src/service/metrics.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ pub struct Metrics {
8585
pub requests_in_success_total: HistogramVec,
8686
pub requests_out_failure_total: CounterVec<U64>,
8787
pub requests_out_success_total: HistogramVec,
88+
pub requests_response_bytes_total: CounterVec<U64>,
8889
}
8990

9091
impl Metrics {
@@ -211,6 +212,13 @@ impl Metrics {
211212
},
212213
&["protocol"]
213214
)?, registry)?,
215+
requests_response_bytes_total: prometheus::register(CounterVec::new(
216+
Opts::new(
217+
"substrate_sub_libp2p_requests_response_bytes_total",
218+
"Total bytes sent and received by request-response protocols"
219+
),
220+
&["direction", "protocol"]
221+
)?, registry)?,
214222
})
215223
}
216224
}

0 commit comments

Comments
 (0)