Skip to content

Commit 31c0800

Browse files
authored
Include river errors in server side spans (#283)
## Why This includes errors on the spans generated by the server so we have more error info. I also changed the names of the spans to match the format that river-python is using. In order to write a test for this, I had to make it so the tracer is created per-transport rather than having a global one. ## What changed - De-global the tracer. The transport will construct and own a tracer instance (obtained from the global tracer provider). - Add river errors to the server side spans - Update the server/client spans to match the format we are using for river-python - This may make it harder to look at historical span data, let me know if you think this is a bad idea and I can revert it ## Versioning - [ ] Breaking protocol change - [ ] Breaking ts/js API change <!-- Kind reminder to add tests and updated documentation if needed -->
1 parent 1007a55 commit 31c0800

File tree

11 files changed

+184
-44
lines changed

11 files changed

+184
-44
lines changed

router/client.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ function handleProc(
288288
const procClosesWithInit = procType === 'rpc' || procType === 'subscription';
289289
const streamId = generateId();
290290
const { span, ctx } = createProcTelemetryInfo(
291+
transport.tracer,
291292
session,
292293
procType,
293294
serviceName,

router/server.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,12 @@ import { Value, ValueError } from '@sinclair/typebox/value';
3636
import { Err, Result, Ok, ErrResult } from './result';
3737
import { EventMap } from '../transport/events';
3838
import { coerceErrorString } from '../transport/stringifyError';
39-
import { Span, SpanStatusCode } from '@opentelemetry/api';
40-
import { createHandlerSpan, PropagationContext } from '../tracing';
39+
import { Span } from '@opentelemetry/api';
40+
import {
41+
createHandlerSpan,
42+
PropagationContext,
43+
recordRiverError,
44+
} from '../tracing';
4145
import { ServerHandshakeOptions } from './handshake';
4246
import { Connection } from '../transport/connection';
4347
import { ServerTransport } from '../transport/server';
@@ -198,6 +202,7 @@ class RiverServer<Services extends AnyServiceSchemaMap>
198202

199203
// if its not a cancelled stream, validate and create a new stream
200204
createHandlerSpan(
205+
transport.tracer,
201206
newStreamProps.initialSession,
202207
newStreamProps.procedure.type,
203208
newStreamProps.serviceName,
@@ -423,6 +428,8 @@ class RiverServer<Services extends AnyServiceSchemaMap>
423428
};
424429

425430
const onServerCancel = (e: Static<typeof ReaderErrorSchema>) => {
431+
recordRiverError(span, e);
432+
426433
if (reqReadable.isClosed() && resWritable.isClosed()) {
427434
// Everything already closed, no-op.
428435
return;
@@ -476,6 +483,10 @@ class RiverServer<Services extends AnyServiceSchemaMap>
476483
Result<Static<PayloadType>, Static<BaseErrorSchemaType>>
477484
>({
478485
writeCb: (response) => {
486+
if (!response.ok) {
487+
recordRiverError(span, response.payload);
488+
}
489+
479490
sessionScopedSend({
480491
streamId,
481492
controlFlags: procClosesWithResponse
@@ -519,7 +530,6 @@ class RiverServer<Services extends AnyServiceSchemaMap>
519530
const errorMsg = coerceErrorString(err);
520531

521532
span.recordException(err instanceof Error ? err : new Error(errorMsg));
522-
span.setStatus({ code: SpanStatusCode.ERROR });
523533

524534
this.log?.error(
525535
`${serviceName}.${procedureName} handler threw an uncaught error`,

testUtil/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import { SessionStateGraph } from '../transport/sessionStateMachine/transitions'
1919
import { BaseErrorSchemaType } from '../router/errors';
2020
import { ClientTransport } from '../transport/client';
2121
import { ServerTransport } from '../transport/server';
22+
import { getTracer } from '../tracing';
2223

2324
export {
2425
createMockTransportNetwork,
@@ -186,6 +187,7 @@ export function dummySession() {
186187
},
187188
testingSessionOptions,
188189
currentProtocolVersion,
190+
getTracer(),
189191
);
190192
}
191193

tracing/index.ts

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,18 @@ import {
22
Context,
33
Span,
44
SpanKind,
5+
SpanStatusCode,
56
context,
67
propagation,
78
trace,
9+
Tracer,
810
} from '@opentelemetry/api';
9-
import { version as RIVER_VERSION } from '../package.json';
10-
import { ValidProcType } from '../router';
11+
import { BaseErrorSchemaType, RIVER_VERSION, ValidProcType } from '../router';
1112
import { Connection } from '../transport';
1213
import { MessageMetadata } from '../logging';
1314
import { ClientSession } from '../transport/sessionStateMachine/transitions';
1415
import { IdentifiedSession } from '../transport/sessionStateMachine/common';
16+
import { Static } from '@sinclair/typebox';
1517

1618
export interface PropagationContext {
1719
traceparent: string;
@@ -36,6 +38,7 @@ export function getPropagationContext(
3638
}
3739

3840
export function createSessionTelemetryInfo(
41+
tracer: Tracer,
3942
sessionId: string,
4043
to: string,
4144
from: string,
@@ -46,7 +49,7 @@ export function createSessionTelemetryInfo(
4649
: context.active();
4750

4851
const span = tracer.startSpan(
49-
`session ${sessionId}`,
52+
`river.session.${sessionId}`,
5053
{
5154
attributes: {
5255
component: 'river',
@@ -64,6 +67,7 @@ export function createSessionTelemetryInfo(
6467
}
6568

6669
export function createConnectionTelemetryInfo(
70+
tracer: Tracer,
6771
connection: Connection,
6872
info: TelemetryInfo,
6973
): TelemetryInfo {
@@ -85,6 +89,7 @@ export function createConnectionTelemetryInfo(
8589
}
8690

8791
export function createProcTelemetryInfo(
92+
tracer: Tracer,
8893
session: ClientSession<Connection>,
8994
kind: ValidProcType,
9095
serviceName: string,
@@ -93,7 +98,7 @@ export function createProcTelemetryInfo(
9398
): TelemetryInfo {
9499
const baseCtx = context.active();
95100
const span = tracer.startSpan(
96-
`procedure call ${serviceName}.${procedureName}`,
101+
`river.client.${serviceName}.${procedureName}`,
97102
{
98103
attributes: {
99104
component: 'river',
@@ -131,6 +136,7 @@ export function createProcTelemetryInfo(
131136
}
132137

133138
export function createHandlerSpan<Fn extends (span: Span) => unknown>(
139+
tracer: Tracer,
134140
session: IdentifiedSession,
135141
kind: ValidProcType,
136142
serviceName: string,
@@ -144,7 +150,7 @@ export function createHandlerSpan<Fn extends (span: Span) => unknown>(
144150
: context.active();
145151

146152
return tracer.startActiveSpan<Fn>(
147-
`procedure handler ${serviceName}.${procedureName}`,
153+
`river.server.${serviceName}.${procedureName}`,
148154
{
149155
attributes: {
150156
component: 'river',
@@ -162,5 +168,20 @@ export function createHandlerSpan<Fn extends (span: Span) => unknown>(
162168
);
163169
}
164170

165-
const tracer = trace.getTracer('river', RIVER_VERSION);
166-
export default tracer;
171+
export function recordRiverError(
172+
span: Span,
173+
error: Static<BaseErrorSchemaType>,
174+
): void {
175+
span.setStatus({
176+
code: SpanStatusCode.ERROR,
177+
message: error.message,
178+
});
179+
span.setAttributes({
180+
'river.error_code': error.code,
181+
'river.error_message': error.message,
182+
});
183+
}
184+
185+
export function getTracer(): Tracer {
186+
return trace.getTracer('river', RIVER_VERSION);
187+
}

tracing/tracing.test.ts

Lines changed: 89 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
import { trace, context, propagation, Span } from '@opentelemetry/api';
1+
import {
2+
trace,
3+
context,
4+
propagation,
5+
Span,
6+
SpanStatusCode,
7+
} from '@opentelemetry/api';
28
import { describe, test, expect, vi, assert, beforeEach } from 'vitest';
39
import { dummySession, readNextResult } from '../testUtil';
410

@@ -9,10 +15,7 @@ import {
915
} from '@opentelemetry/sdk-trace-base';
1016
import { W3CTraceContextPropagator } from '@opentelemetry/core';
1117
import { AsyncHooksContextManager } from '@opentelemetry/context-async-hooks';
12-
import tracer, {
13-
createSessionTelemetryInfo,
14-
getPropagationContext,
15-
} from './index';
18+
import { createSessionTelemetryInfo, getPropagationContext } from './index';
1619
import { testMatrix } from '../testUtil/fixtures/matrix';
1720
import {
1821
cleanupTransports,
@@ -27,26 +30,27 @@ import { createClient } from '../router/client';
2730
import { UNCAUGHT_ERROR_CODE } from '../router';
2831
import { LogFn } from '../logging';
2932

30-
describe('Basic tracing tests', () => {
31-
const provider = new BasicTracerProvider();
32-
provider.addSpanProcessor(
33-
new SimpleSpanProcessor(new InMemorySpanExporter()),
34-
);
35-
const contextManager = new AsyncHooksContextManager();
36-
contextManager.enable();
37-
trace.setGlobalTracerProvider(provider);
38-
context.setGlobalContextManager(contextManager);
39-
propagation.setGlobalPropagator(new W3CTraceContextPropagator());
33+
const provider = new BasicTracerProvider();
34+
const spanExporter = new InMemorySpanExporter();
35+
provider.addSpanProcessor(new SimpleSpanProcessor(spanExporter));
36+
const contextManager = new AsyncHooksContextManager();
37+
contextManager.enable();
38+
trace.setGlobalTracerProvider(provider);
39+
context.setGlobalContextManager(contextManager);
40+
propagation.setGlobalPropagator(new W3CTraceContextPropagator());
4041

42+
describe('Basic tracing tests', () => {
4143
test('createSessionTelemetryInfo', () => {
4244
const parentCtx = context.active();
45+
const tracer = trace.getTracer('test');
4346
const span = tracer.startSpan('empty span', {}, parentCtx);
4447
const ctx = trace.setSpan(parentCtx, span);
4548

4649
const propCtx = getPropagationContext(ctx);
4750
expect(propCtx?.traceparent).toBeTruthy();
4851
const session = dummySession();
4952
const teleInfo = createSessionTelemetryInfo(
53+
tracer,
5054
session.id,
5155
session.to,
5256
session.from,
@@ -75,6 +79,7 @@ describe.each(testMatrix())(
7579
const setup = await transport.setup({ client: opts, server: opts });
7680
getClientTransport = setup.getClientTransport;
7781
getServerTransport = setup.getServerTransport;
82+
spanExporter.reset();
7883

7984
return async () => {
8085
await postTestCleanup();
@@ -189,5 +194,74 @@ describe.each(testMatrix())(
189194
server,
190195
});
191196
});
197+
198+
test('river errors are recorded on handler spans', async () => {
199+
// setup
200+
const clientTransport = getClientTransport('client');
201+
const clientMockLogger = vi.fn<LogFn>();
202+
clientTransport.bindLogger(clientMockLogger);
203+
const serverTransport = getServerTransport();
204+
const serverMockLogger = vi.fn<LogFn>();
205+
serverTransport.bindLogger(serverMockLogger);
206+
const services = {
207+
fallible: FallibleServiceSchema,
208+
};
209+
const server = createServer(serverTransport, services);
210+
const client = createClient<typeof services>(
211+
clientTransport,
212+
serverTransport.clientId,
213+
);
214+
addPostTestCleanup(async () => {
215+
await cleanupTransports([clientTransport, serverTransport]);
216+
});
217+
218+
const { reqWritable, resReadable } = client.fallible.echo.stream({});
219+
220+
reqWritable.write({
221+
msg: 'abc',
222+
throwResult: false,
223+
throwError: false,
224+
});
225+
let result = await readNextResult(resReadable);
226+
expect(result).toStrictEqual({
227+
ok: true,
228+
payload: {
229+
response: 'abc',
230+
},
231+
});
232+
233+
// this isn't the first message so doesn't have telemetry info on the message itself
234+
reqWritable.write({
235+
msg: 'def',
236+
throwResult: false,
237+
throwError: true,
238+
});
239+
240+
result = await readNextResult(resReadable);
241+
expect(result).toStrictEqual({
242+
ok: false,
243+
payload: {
244+
code: UNCAUGHT_ERROR_CODE,
245+
message: 'some message',
246+
},
247+
});
248+
249+
const spans = spanExporter.getFinishedSpans();
250+
251+
const errSpan = spans.find(
252+
(span) =>
253+
span.name === 'river.server.fallible.echo' &&
254+
span.status.code === SpanStatusCode.ERROR,
255+
);
256+
expect(errSpan).toBeTruthy();
257+
expect(errSpan?.attributes['river.error_code']).toBe(UNCAUGHT_ERROR_CODE);
258+
expect(errSpan?.attributes['river.error_message']).toBe('some message');
259+
260+
await testFinishesCleanly({
261+
clientTransports: [clientTransport],
262+
serverTransport,
263+
server,
264+
});
265+
});
192266
},
193267
);

transport/client.ts

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import { Transport } from './transport';
1818
import { coerceErrorString } from './stringifyError';
1919
import { ProtocolError } from './events';
2020
import { Value } from '@sinclair/typebox/value';
21-
import tracer, { getPropagationContext } from '../tracing';
21+
import { getPropagationContext } from '../tracing';
2222
import { Connection } from './connection';
2323
import { MessageMetadata } from '../logging';
2424
import { SessionConnecting } from './sessionStateMachine/SessionConnecting';
@@ -110,6 +110,7 @@ export abstract class ClientTransport<
110110
},
111111
this.options,
112112
currentProtocolVersion,
113+
this.tracer,
113114
this.log,
114115
);
115116

@@ -377,20 +378,23 @@ export abstract class ClientTransport<
377378
}
378379

379380
protected onBackoffFinished(session: SessionBackingOff) {
380-
const connPromise = tracer.startActiveSpan('connect', async (span) => {
381-
try {
382-
return await this.createNewOutgoingConnection(session.to);
383-
} catch (err) {
384-
// rethrow the error so that the promise is rejected
385-
// as it was before we wrapped it in a span
386-
const errStr = coerceErrorString(err);
387-
span.recordException(errStr);
388-
span.setStatus({ code: SpanStatusCode.ERROR });
389-
throw err;
390-
} finally {
391-
span.end();
392-
}
393-
});
381+
const connPromise = session.tracer.startActiveSpan(
382+
'connect',
383+
async (span) => {
384+
try {
385+
return await this.createNewOutgoingConnection(session.to);
386+
} catch (err) {
387+
// rethrow the error so that the promise is rejected
388+
// as it was before we wrapped it in a span
389+
const errStr = coerceErrorString(err);
390+
span.recordException(errStr);
391+
span.setStatus({ code: SpanStatusCode.ERROR });
392+
throw err;
393+
} finally {
394+
span.end();
395+
}
396+
},
397+
);
394398

395399
// transition to connecting
396400
const connectingSession =

transport/server.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ export abstract class ServerTransport<
164164
},
165165
},
166166
this.options,
167+
this.tracer,
167168
this.log,
168169
);
169170

0 commit comments

Comments
 (0)