-
Notifications
You must be signed in to change notification settings - Fork 639
fix: OTLP logging improvements #3343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
97ebbaf
077b508
e32c996
3d9f3b1
55a6f20
104d788
03351c2
79c6d7a
ee681f4
7b658dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -94,8 +94,22 @@ impl LogExporter for TonicLogsClient { | |
| .interceptor | ||
| .call(Request::new(())) | ||
| .map_err(|e| { | ||
| otel_warn!( | ||
| name: "TonicLogsClient.InterceptorFailed", | ||
| grpc_code = format!("{:?}", e.code()) | ||
| ); | ||
| // grpc_message and grpc_details may contain sensitive information, | ||
| // so log them at debug level only. | ||
| otel_debug!( | ||
| name: "TonicLogsClient.InterceptorFailedDetails", | ||
| grpc_message = e.message(), | ||
| grpc_details = format!("{:?}", e.details()) | ||
| ); | ||
| // Convert interceptor errors to tonic::Status for retry classification | ||
| tonic::Status::internal(format!("interceptor error: {e:?}")) | ||
| tonic::Status::internal(&format!( | ||
| "Logs export failed in interceptor with gRPC code: {:?}", | ||
| e.code() | ||
| )) | ||
| })? | ||
| .into_parts(); | ||
| Ok((inner.client.clone(), m, e)) | ||
|
|
@@ -137,9 +151,47 @@ impl LogExporter for TonicLogsClient { | |
| .await | ||
| { | ||
| Ok(_) => Ok(()), | ||
| Err(tonic_status) => Err(OTelSdkError::InternalFailure(format!( | ||
| "export error: {tonic_status:?}" | ||
| ))), | ||
| Err(tonic_status) => { | ||
| // For connection-related errors (Unavailable, Unknown, etc.), the message | ||
| // typically contains safe, actionable information (e.g., "Connection refused"). | ||
| // For auth errors (Unauthenticated, PermissionDenied), the message may contain | ||
| // sensitive information, so we only log the code at WARN level. | ||
| let code = tonic_status.code(); | ||
| let is_connection_error = matches!( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit - this ~35-40 lines of code seems to be repeated for all the signals - can have a shared helper method. |
||
| code, | ||
| tonic::Code::Unavailable | ||
| | tonic::Code::Unknown | ||
| | tonic::Code::DeadlineExceeded | ||
| | tonic::Code::ResourceExhausted | ||
| | tonic::Code::Aborted | ||
| | tonic::Code::Cancelled | ||
| ); | ||
|
|
||
| if is_connection_error { | ||
| otel_warn!( | ||
| name: "TonicLogsClient.ExportFailed", | ||
| grpc_code = format!("{:?}", code), | ||
| grpc_message = tonic_status.message() | ||
| ); | ||
| } else { | ||
| // For potentially sensitive errors (Unauthenticated, PermissionDenied, etc.), | ||
| // only log the code at WARN level. | ||
| otel_warn!( | ||
| name: "TonicLogsClient.ExportFailed", | ||
| grpc_code = format!("{:?}", code) | ||
| ); | ||
| // Log message and details at debug level for sensitive error types. | ||
| otel_debug!( | ||
| name: "TonicLogsClient.ExportFailedDetails", | ||
| grpc_message = tonic_status.message(), | ||
| grpc_details = format!("{:?}", tonic_status.details()) | ||
| ); | ||
| } | ||
| Err(OTelSdkError::InternalFailure(format!( | ||
| "Logs export failed with gRPC code: {:?}", | ||
| code | ||
| ))) | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -85,9 +85,18 @@ impl MetricsClient for TonicMetricsClient { | |
| .interceptor | ||
| .call(Request::new(())) | ||
| .map_err(|e| { | ||
| tonic::Status::internal(format!( | ||
| "unexpected status while exporting {e:?}" | ||
| )) | ||
| otel_warn!( | ||
| name: "TonicMetricsClient.InterceptorFailed", | ||
| grpc_code = format!("{:?}", e.code()) | ||
| ); | ||
| // grpc_message and grpc_details may contain sensitive information, | ||
| // so log them at debug level only. | ||
| otel_debug!( | ||
| name: "TonicMetricsClient.InterceptorFailedDetails", | ||
| grpc_message = e.message(), | ||
| grpc_details = format!("{:?}", e.details()) | ||
| ); | ||
| tonic::Status::internal("Metrics export failed in interceptor") | ||
| })? | ||
| .into_parts(); | ||
| Ok((inner.client.clone(), m, e)) | ||
|
|
@@ -127,9 +136,46 @@ impl MetricsClient for TonicMetricsClient { | |
| .await | ||
| { | ||
| Ok(_) => Ok(()), | ||
| Err(tonic_status) => Err(OTelSdkError::InternalFailure(format!( | ||
| "export error: {tonic_status:?}" | ||
| ))), | ||
| Err(tonic_status) => { | ||
| // For connection-related errors (Unavailable, Unknown, etc.), the message | ||
| // typically contains safe, actionable information (e.g., "Connection refused"). | ||
| // For auth errors (Unauthenticated, PermissionDenied), the message may contain | ||
| // sensitive information, so we only log the code at WARN level. | ||
| let code = tonic_status.code(); | ||
| let is_connection_error = matches!( | ||
| code, | ||
| tonic::Code::Unavailable | ||
| | tonic::Code::Unknown | ||
| | tonic::Code::DeadlineExceeded | ||
| | tonic::Code::ResourceExhausted | ||
| | tonic::Code::Aborted | ||
| | tonic::Code::Cancelled | ||
| ); | ||
|
|
||
| if is_connection_error { | ||
| otel_warn!( | ||
| name: "TonicMetricsClient.ExportFailed", | ||
| grpc_code = format!("{:?}", code), | ||
| grpc_message = tonic_status.message() | ||
| ); | ||
| } else { | ||
| // For potentially sensitive errors (Unauthenticated, PermissionDenied, etc.), | ||
| // only log the code at WARN level. | ||
| otel_warn!( | ||
| name: "TonicMetricsClient.ExportFailed", | ||
| grpc_code = format!("{:?}", code) | ||
| ); | ||
| // Log message and details at debug level for sensitive error types. | ||
| otel_debug!( | ||
| name: "TonicMetricsClient.ExportFailedDetails", | ||
| grpc_message = tonic_status.message(), | ||
| grpc_details = format!("{:?}", tonic_status.details()) | ||
| ); | ||
| } | ||
| Err(OTelSdkError::InternalFailure( | ||
| "Metrics export failed".into(), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| )) | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -96,8 +96,19 @@ impl SpanExporter for TonicTracesClient { | |
| .interceptor | ||
| .call(Request::new(())) | ||
| .map_err(|e| { | ||
| otel_warn!( | ||
| name: "TonicTracesClient.InterceptorFailed", | ||
| grpc_code = format!("{:?}", e.code()) | ||
| ); | ||
| // grpc_message and grpc_details may contain sensitive information, | ||
| // so log them at debug level only. | ||
| otel_debug!( | ||
| name: "TonicTracesClient.InterceptorFailedDetails", | ||
| grpc_message = e.message(), | ||
| grpc_details = format!("{:?}", e.details()) | ||
| ); | ||
| // Convert interceptor errors to tonic::Status for retry classification | ||
| tonic::Status::internal(format!("interceptor error: {e:?}")) | ||
| tonic::Status::internal("Traces export failed in interceptor") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| })? | ||
| .into_parts(); | ||
| Ok((inner.client.clone(), m, e)) | ||
|
|
@@ -140,9 +151,44 @@ impl SpanExporter for TonicTracesClient { | |
| .await | ||
| { | ||
| Ok(_) => Ok(()), | ||
| Err(tonic_status) => Err(OTelSdkError::InternalFailure(format!( | ||
| "export error: {tonic_status:?}" | ||
| ))), | ||
| Err(tonic_status) => { | ||
| // For connection-related errors (Unavailable, Unknown, etc.), the message | ||
| // typically contains safe, actionable information (e.g., "Connection refused"). | ||
| // For auth errors (Unauthenticated, PermissionDenied), the message may contain | ||
| // sensitive information, so we only log the code at WARN level. | ||
| let code = tonic_status.code(); | ||
| let is_connection_error = matches!( | ||
| code, | ||
| tonic::Code::Unavailable | ||
| | tonic::Code::Unknown | ||
| | tonic::Code::DeadlineExceeded | ||
| | tonic::Code::ResourceExhausted | ||
| | tonic::Code::Aborted | ||
| | tonic::Code::Cancelled | ||
| ); | ||
|
|
||
| if is_connection_error { | ||
| otel_warn!( | ||
| name: "TonicTracesClient.ExportFailed", | ||
| grpc_code = format!("{:?}", code), | ||
| grpc_message = tonic_status.message() | ||
| ); | ||
| } else { | ||
| // For potentially sensitive errors (Unauthenticated, PermissionDenied, etc.), | ||
| // only log the code at WARN level. | ||
| otel_warn!( | ||
| name: "TonicTracesClient.ExportFailed", | ||
| grpc_code = format!("{:?}", code) | ||
| ); | ||
| // Log message and details at debug level for sensitive error types. | ||
| otel_debug!( | ||
| name: "TonicTracesClient.ExportFailedDetails", | ||
| grpc_message = tonic_status.message(), | ||
| grpc_details = format!("{:?}", tonic_status.details()) | ||
| ); | ||
| } | ||
| Err(OTelSdkError::InternalFailure("Traces export failed".into())) | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are using the same event name
HttpClient.ExportFailedfor two different kinds of errors:client.send_bytesresponse.status()is not successful.Maybe we could use different event names for better diagnosability:
HttpClient.NetworkErrorandHttpClientStatusError.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did consider that and ended up with same one. I am not 100% happy with this either.
I tried to distinguish between failures where we don't get a response at all and the ones where we get response, but it has status code indicating failure. But then realized that
requestclient don't expose us a way to obtainStatusCodefrom its Error. Without having firmer control over the client, this is a decent tradeoff.(The otel-http crate could offer additional logging, as it knows internals of the client it uses, but OTLP exporter does not know that much).
let me know if we should try to make this better or this is acceptable tradeoff.