Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions yarn-project/telemetry-client/src/event_loop_monitor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { promiseWithResolvers } from '@aztec/foundation/promise';
import { Timer } from '@aztec/foundation/timer';

import { EVENT_LOOP_LAG } from './metrics.js';
import { type Meter, type ObservableGauge, type ObservableResult, ValueType } from './telemetry.js';

/**
* Detector for custom Aztec attributes
*/
export class EventLoopMonitor {
private eventLoopLag: ObservableGauge;
private started = false;

constructor(meter: Meter) {
this.eventLoopLag = meter.createObservableGauge(EVENT_LOOP_LAG, {
unit: 'us',
valueType: ValueType.INT,
description: 'How busy is the event loop',
});
}

start(): void {
if (this.started) {
return;
}
this.eventLoopLag.addCallback(this.measureLag);
}

stop(): void {
if (!this.started) {
return;
}
this.eventLoopLag.removeCallback(this.measureLag);
}

private measureLag = async (obs: ObservableResult): Promise<void> => {
const timer = new Timer();
const { promise, resolve } = promiseWithResolvers<number>();
// how long does it take to schedule the next macro task?
// if this number spikes then we're (1) either blocking the event loop with long running sync code
// or (2) spamming the event loop with micro tasks
setImmediate(() => {
resolve(timer.us());
});

const lag = await promise;
obs.observe(Math.floor(lag));
};
}
2 changes: 2 additions & 0 deletions yarn-project/telemetry-client/src/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,5 @@ export const PROOF_VERIFIER_COUNT = 'aztec.proof_verifier.count';

export const VALIDATOR_RE_EXECUTION_TIME = 'aztec.validator.re_execution_time';
export const VALIDATOR_FAILED_REEXECUTION_COUNT = 'aztec.validator.failed_reexecution_count';

export const EVENT_LOOP_LAG = 'aztec.event_loop_lag';
9 changes: 9 additions & 0 deletions yarn-project/telemetry-client/src/otel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ import { BatchSpanProcessor, NodeTracerProvider } from '@opentelemetry/sdk-trace
import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic-conventions';

import { type TelemetryClientConfig } from './config.js';
import { EventLoopMonitor } from './event_loop_monitor.js';
import { linearBuckets } from './histogram_utils.js';
import { registerOtelLoggerProvider } from './otel_logger_provider.js';
import { getOtelResource } from './otel_resource.js';
import { type Gauge, type TelemetryClient } from './telemetry.js';

export class OpenTelemetryClient implements TelemetryClient {
hostMetrics: HostMetrics | undefined;
eventLoopMonitor: EventLoopMonitor | undefined;
targetInfo: Gauge | undefined;
private meters: Map<string, Meter> = new Map<string, Meter>();
private tracers: Map<string, Tracer> = new Map<string, Tracer>();
Expand Down Expand Up @@ -87,6 +89,10 @@ export class OpenTelemetryClient implements TelemetryClient {
meterProvider: this.meterProvider,
});

this.eventLoopMonitor = new EventLoopMonitor(
this.meterProvider.getMeter(this.resource.attributes[ATTR_SERVICE_NAME] as string),
);

// See these two links for more information on providing target information:
// https://opentelemetry.io/docs/specs/otel/compatibility/prometheus_and_openmetrics/#resource-attributes
// https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#supporting-target-metadata-in-both-push-based-and-pull-based-systems
Expand All @@ -96,6 +102,7 @@ export class OpenTelemetryClient implements TelemetryClient {

this.targetInfo.record(1, this.resource.attributes);
this.hostMetrics.start();
this.eventLoopMonitor.start();
}

public isEnabled() {
Expand All @@ -111,6 +118,8 @@ export class OpenTelemetryClient implements TelemetryClient {
}

public async stop() {
this.eventLoopMonitor?.stop();

const flushAndShutdown = async (provider: { forceFlush: () => Promise<void>; shutdown: () => Promise<void> }) => {
await provider.forceFlush();
await provider.shutdown();
Expand Down