Skip to content

Commit 5e8371a

Browse files
authored
sep 29 studio (BoundaryML#2576)
- **type fixes** - **use blak3 hash when hashing blobs** - **fix streams being null in studio** - **http metadata was serialized incorrectly** - **Fix issue with python exceptions not counting as exceptions in studio, http bodies serialized as byte arrays, add test for checking publisher spans** <!-- CURSOR_SUMMARY --> --- > [!NOTE] > Switches blob hashing to blake3, fixes HTTP body serialization and Python exception handling, adds trace function summaries/match-highlights and blob fetch API, enables optional trace event file dump, and adjusts stream/function end payloads. > > - **Tracing/Serialization**: > - Serialize `HTTPBody.raw` bytes as UTF-8 string with flexible deserializer; fix HTTP metadata typing and headers shapes. > - Treat Python exceptions as errors in function end events; propagate proper error to tracer. > - Stream end events now carry parsed value meta instead of always null. > - Optional local trace dump via `BAML_TRACE_FILE` (writes newline-delimited JSON events). > - **Blobs**: > - Use `blake3` for blob hashing; replace SHA-256 and add dependency. > - New UI API/types `ui_blobs` with `GetBlobRequest/Response` and `BlobFormat`. > - **UI/Types**: > - TS type refinements (`any` -> `unknown`, header maps stricter) and added `http_metadata_summary`. > - Traces API: add `matched_descendant_ids` field and `include_match_highlights` flag. > - New endpoint: `/v1/traces/function-summaries` with request/response and `FunctionSummary` aggregates. > - **Tests/Infra**: > - Python tests verify publisher span counts via file, adjust stream test name, and add session flush. > - Ignore trace debug files in git. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 29e202f. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Signed-off-by: aaronvg <[email protected]>
1 parent 5b87fb5 commit 5e8371a

File tree

18 files changed

+514
-86
lines changed

18 files changed

+514
-86
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,5 +171,7 @@ jj-workflow.mdc
171171
third_party
172172

173173
baml_repl_history.txt
174+
trace_events_debug.json
174175

175176
result
177+

engine/Cargo.lock

Lines changed: 27 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

engine/baml-rpc/src/runtime_api/trace_event.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,78 @@ pub enum IntermediateData<'a> {
151151

152152
#[derive(Debug, Serialize, Deserialize)]
153153
pub struct HTTPBody<'a> {
154+
#[serde(
155+
serialize_with = "serialize_bytes_as_string",
156+
deserialize_with = "deserialize_string_as_bytes"
157+
)]
154158
pub raw: Cow<'a, [u8]>,
155159
}
156160

161+
fn serialize_bytes_as_string<S>(bytes: &[u8], serializer: S) -> Result<S::Ok, S::Error>
162+
where
163+
S: serde::Serializer,
164+
{
165+
// Serialize as text to avoid exploding arrays of bytes; use lossy UTF-8 if needed
166+
let s = String::from_utf8_lossy(bytes);
167+
serializer.serialize_str(&s)
168+
}
169+
170+
fn deserialize_string_as_bytes<'de, D>(deserializer: D) -> Result<Cow<'static, [u8]>, D::Error>
171+
where
172+
D: serde::Deserializer<'de>,
173+
{
174+
struct BytesVisitor;
175+
176+
impl<'de> serde::de::Visitor<'de> for BytesVisitor {
177+
type Value = Cow<'static, [u8]>;
178+
179+
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
180+
formatter.write_str("a string or byte array")
181+
}
182+
183+
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
184+
where
185+
E: serde::de::Error,
186+
{
187+
Ok(Cow::Owned(value.as_bytes().to_vec()))
188+
}
189+
190+
fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
191+
where
192+
E: serde::de::Error,
193+
{
194+
Ok(Cow::Owned(value.into_bytes()))
195+
}
196+
197+
fn visit_bytes<E>(self, value: &[u8]) -> Result<Self::Value, E>
198+
where
199+
E: serde::de::Error,
200+
{
201+
Ok(Cow::Owned(value.to_vec()))
202+
}
203+
204+
fn visit_byte_buf<E>(self, value: Vec<u8>) -> Result<Self::Value, E>
205+
where
206+
E: serde::de::Error,
207+
{
208+
Ok(Cow::Owned(value))
209+
}
210+
211+
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
212+
where
213+
A: serde::de::SeqAccess<'de>,
214+
{
215+
let mut bytes = Vec::new();
216+
while let Some(byte) = seq.next_element::<u8>()? {
217+
bytes.push(byte);
218+
}
219+
Ok(Cow::Owned(bytes))
220+
}
221+
}
222+
223+
deserializer.deserialize_any(BytesVisitor)
224+
}
225+
157226
#[derive(Debug, Serialize, Deserialize)]
158227
pub struct Event<'a> {
159228
pub raw: Cow<'a, str>,

engine/baml-rpc/src/ui/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod ui_baml_src;
2+
pub mod ui_blobs;
23
pub mod ui_control_plane_orgs;
34
pub mod ui_control_plane_projects;
45
pub mod ui_dashboard;

engine/baml-rpc/src/ui/ui_baml_src.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pub struct BamlSourceCode {
3232
pub struct AstNodeDefinition {
3333
pub project_id: String,
3434
pub ast_node_id: String,
35-
#[ts(type = "any")]
35+
#[ts(type = "unknown")]
3636
pub ast_node_definition: serde_json::Value,
3737
pub flattened_dependencies_ast_nodes: Vec<String>,
3838
pub baml_src_node_ids: Vec<String>,

engine/baml-rpc/src/ui/ui_blobs.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use serde::{Deserialize, Serialize};
2+
use ts_rs::TS;
3+
4+
use crate::ProjectId;
5+
6+
/// Request to fetch blob content
7+
#[derive(Debug, Serialize, Deserialize, TS)]
8+
#[ts(export)]
9+
pub struct GetBlobRequest {
10+
/// The 64-character BLAKE3 hash of the blob
11+
pub blob_hash: String,
12+
/// The project ID that owns this blob
13+
#[ts(type = "string")]
14+
pub project_id: ProjectId,
15+
/// Response format: 'raw' returns binary, 'json' returns base64-encoded content
16+
#[serde(default)]
17+
pub format: BlobFormat,
18+
}
19+
20+
/// Response format for blob content
21+
#[derive(Debug, Serialize, Deserialize, TS, Default)]
22+
#[ts(export)]
23+
#[serde(rename_all = "lowercase")]
24+
pub enum BlobFormat {
25+
/// Return raw binary content with appropriate Content-Type header
26+
#[default]
27+
Raw,
28+
/// Return JSON with base64-encoded content
29+
Json,
30+
}
31+
32+
/// JSON response format for blob content
33+
#[derive(Debug, Serialize, Deserialize, TS)]
34+
#[ts(export)]
35+
pub struct GetBlobResponse {
36+
/// The blob hash
37+
pub blob_hash: String,
38+
/// The content type of the decoded blob (e.g., "image/png")
39+
pub content_type: String,
40+
/// Base64-encoded content (exact same as used in LLM request)
41+
pub base64_content: String,
42+
}

engine/baml-rpc/src/ui/ui_traces.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ pub struct NodeDetails {
4545
pub children_limit: Option<u32>, // How many children were requested/returned
4646
#[ts(type = "number", optional)]
4747
pub children_offset: Option<u32>, // Pagination offset for children
48+
49+
// Match highlighting - IDs of descendants that matched filters
50+
#[ts(optional)]
51+
pub matched_descendant_ids: Option<Vec<String>>,
4852
}
4953

5054
#[derive(Debug, Serialize, Deserialize, TS)]
@@ -130,6 +134,9 @@ pub struct ListTracesRequest {
130134
/// Filter to only show LLM function calls (function_type = 'baml_llm')
131135
#[ts(optional)]
132136
pub llm_only: Option<FilterExpression<bool>>,
137+
/// Whether to include matched_descendant_ids in the response for match highlighting. Defaults to false.
138+
#[ts(optional)]
139+
pub include_match_highlights: Option<bool>,
133140
}
134141

135142
impl Default for ListTracesRequest {
@@ -160,6 +167,7 @@ impl Default for ListTracesRequest {
160167
relative_time: None,
161168
search: None,
162169
llm_only: None,
170+
include_match_highlights: Some(false),
163171
}
164172
}
165173
}
@@ -273,3 +281,117 @@ mod tests {
273281
assert_eq!(request.limit, Some(50));
274282
}
275283
}
284+
285+
// API for listing function summaries with aggregate statistics
286+
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
287+
#[serde(rename_all = "camelCase", default)]
288+
#[ts(export)]
289+
pub struct ListTraceFunctionSummariesRequest {
290+
#[ts(type = "string")]
291+
pub project_id: ProjectId,
292+
/// Maximum number of functions to return. Defaults to 100 if not specified.
293+
#[ts(optional)]
294+
pub limit: Option<u32>,
295+
/// Cursor for pagination: fetch functions starting after this function_name
296+
#[ts(optional)]
297+
pub starting_after: Option<String>,
298+
299+
// Time filters
300+
#[ts(optional)]
301+
pub relative_time: Option<RelativeTime>,
302+
#[ts(type = "FilterExpression<number>", optional)]
303+
pub start_time: Option<FilterExpression<EpochMsTimestamp>>,
304+
#[ts(type = "FilterExpression<number>", optional)]
305+
pub end_time: Option<FilterExpression<EpochMsTimestamp>>,
306+
307+
// Function filters
308+
#[ts(optional)]
309+
pub function_name: Option<FilterExpression<String>>,
310+
#[ts(optional)]
311+
pub function_id: Option<FilterExpression<String>>,
312+
#[ts(optional)]
313+
pub llm_only: Option<FilterExpression<bool>>,
314+
315+
// Status and other filters
316+
#[ts(optional)]
317+
pub status: Option<FilterExpression<FunctionCallStatus>>,
318+
#[ts(optional)]
319+
pub tag_filters: Option<Vec<TagFilter>>,
320+
#[ts(optional)]
321+
pub error_filters: Option<Vec<TagFilter>>,
322+
#[ts(optional)]
323+
pub search: Option<String>,
324+
}
325+
326+
impl Default for ListTraceFunctionSummariesRequest {
327+
fn default() -> Self {
328+
Self {
329+
project_id: ProjectId::new(),
330+
limit: Some(100),
331+
starting_after: None,
332+
relative_time: None,
333+
start_time: None,
334+
end_time: None,
335+
function_name: None,
336+
function_id: None,
337+
llm_only: None,
338+
status: None,
339+
tag_filters: None,
340+
error_filters: None,
341+
search: None,
342+
}
343+
}
344+
}
345+
346+
#[derive(Debug, Serialize, Deserialize, TS)]
347+
#[ts(export)]
348+
pub struct FunctionSummary {
349+
#[ts(optional)]
350+
pub function_id: Option<ui_types::UiFunctionIdString>,
351+
pub function_name: String,
352+
pub function_type: String, // 'baml_llm' or 'native'
353+
#[ts(type = "Record<string, unknown>")]
354+
pub tags: serde_json::Map<String, serde_json::Value>,
355+
356+
// Aggregate statistics
357+
#[ts(type = "number")]
358+
pub total_traces: u64,
359+
#[ts(type = "number")]
360+
pub success_count: u64,
361+
#[ts(type = "number")]
362+
pub error_count: u64,
363+
#[ts(type = "number")]
364+
pub running_count: u64,
365+
366+
// Time range for this function's traces
367+
#[ts(type = "number")]
368+
pub first_trace_time: EpochMsTimestamp,
369+
#[ts(type = "number")]
370+
pub last_trace_time: EpochMsTimestamp,
371+
372+
// Optional cost aggregates (if available)
373+
#[ts(type = "number", optional)]
374+
pub total_cost: Option<f64>,
375+
#[ts(type = "number", optional)]
376+
pub avg_duration_ms: Option<f64>,
377+
}
378+
379+
#[derive(Debug, Serialize, Deserialize, TS)]
380+
#[ts(export)]
381+
pub struct ListTraceFunctionSummariesResponse {
382+
pub summaries: Vec<FunctionSummary>,
383+
pub function_definitions: Vec<ui_types::UiFunctionDefinition>,
384+
pub type_definitions: Vec<ui_types::UiTypeDefinition>,
385+
pub has_more: bool,
386+
#[ts(optional)]
387+
pub next_cursor: Option<String>, // function_name for pagination
388+
}
389+
390+
pub struct ListTraceFunctionSummaries;
391+
392+
impl ApiEndpoint for ListTraceFunctionSummaries {
393+
type Request<'a> = ListTraceFunctionSummariesRequest;
394+
type Response<'a> = ListTraceFunctionSummariesResponse;
395+
396+
const PATH: &'static str = "/v1/traces/function-summaries";
397+
}

0 commit comments

Comments
 (0)