fix(openai): align reasoning capture with codex official pattern

birdmanmandbir · birdmanmandbir · commit 968c56725972 · 2026-05-06T13:12:10.000+08:00
Three changes to bring fantasy's Responses API streaming into parity with OpenAI's official codex CLI (codex-rs/codex-api/src/sse/responses.rs): 1. **Defer activeReasoning cleanup to end-of-stream.** The previous implementation deleted the entry on `response.output_item.done`, which meant any subsequent event for the same reasoning item (e.g. late delta or duplicate done) silently dropped. The codex official parser keeps items addressable until the full stream completes. 2. **Capture done.Item.Summary on output_item.done.** The streaming summary delta path may already populate state.metadata.Summary via reasoning_summary_text.delta events, but the done event carries the authoritative final list. Prefer it when non-empty so partial-summary streams are corrected to the final shape. 3. **Add response.reasoning_text.delta handler.** Some gpt-5.x reasoning variants stream reasoning via this event channel (raw reasoning text keyed by ItemID + ContentIndex) instead of, or in addition to, reasoning_summary_text.delta. The official codex parser handles both; fantasy previously only handled the summary path, dropping raw reasoning text for affected models. Background: empirical lenos session 35dd39ec (codex 5.4 multi-turn audit) showed turn 1 captured encrypted_content cleanly via the existing output_item.done capture (PR charmbracelet#71's Fix 2), but follow-up turns and gpt-5.5 high sessions (ab022528) showed state.metadata.EncryptedContent stuck at empty despite the API streaming reasoning text. Investigation against the official codex CLI source + multiple reverse-engineered backend proxies (MetaFARS/codex-relay, hermes-agent issue #5732, satoriweb's protocol notes) confirmed: - response.completed.output is unreliable on the Codex backend (can be empty even when output_item.done events delivered the data). - The reasoning_text.delta event is a separate channel from reasoning_summary_text.delta; both must be handled to capture all thinking text emitted by gpt-5.x reasoning variants. This commit reverts the Fix 3 attempt (commit 7ce6466 — re-emitting ReasoningEnd from response.completed.output) which was based on the incorrect assumption that completed.output is the source of truth.
diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go
@@ -1127,19 +1127,35 @@ func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (
 				case "reasoning":
 					state := activeReasoning[done.Item.ID]
 					if state != nil {
-						// The output_item.done event carries the FINAL
-						// encrypted_content blob for the reasoning item.
-						// The earlier output_item.added event for reasoning
-						// items typically does not include it (the item is
-						// still being generated). Capture it here so the
-						// blob is available for replay on subsequent turns
-						// (see also: ContentTypeReasoning case in
-						// toResponsesPrompt). Without this, encrypted_content
-						// is silently dropped and reasoning continuity is
-						// lost across requests when store=false.
+						// The output_item.done event for reasoning items is
+						// the SOURCE OF TRUTH for the final reasoning state
+						// (matches OpenAI's official codex CLI pattern: see
+						// codex-rs/codex-api/src/sse/responses.rs which
+						// extracts the full ResponseItem here and ignores
+						// response.completed.output, which is known to be
+						// empty for some Codex backend responses — refs
+						// hermes-agent issue #5732).
+						//
+						// Capture every available field so encrypted_content,
+						// summary, and any other surface the API populates is
+						// preserved for replay on subsequent turns (see
+						// ContentTypeReasoning case in toResponsesPrompt for
+						// the replay path).
 						if done.Item.EncryptedContent != "" {
 							state.metadata.EncryptedContent = &done.Item.EncryptedContent
 						}
+						// Pull final Summary from done.Item — the streaming
+						// reasoning_summary_text.delta path may have populated
+						// state.metadata.Summary already, but the done event
+						// carries the authoritative final list. Prefer it
+						// when non-empty.
+						if len(done.Item.Summary) > 0 {
+							finalSummary := make([]string, 0, len(done.Item.Summary))
+							for _, s := range done.Item.Summary {
+								finalSummary = append(finalSummary, s.Text)
+							}
+							state.metadata.Summary = finalSummary
+						}
 						if !yield(fantasy.StreamPart{
 							Type: fantasy.StreamPartTypeReasoningEnd,
 							ID:   done.Item.ID,
@@ -1149,7 +1165,11 @@ func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (
 						}) {
 							return
 						}
-						delete(activeReasoning, done.Item.ID)
+						// Don't delete activeReasoning here — keep it through
+						// response.completed so any stragglers (e.g. items
+						// for which output_item.done never fires) can still be
+						// inspected/finalised. The map is cleared at the end
+						// of the stream when the function returns.
 					}
 				}
 
@@ -1251,6 +1271,33 @@ func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (
 					}
 				}
 
+			case "response.reasoning_text.delta":
+				// Some Codex backend models (notably gpt-5.x reasoning
+				// variants under certain conditions) stream reasoning via
+				// reasoning_text.delta events instead of (or in addition to)
+				// reasoning_summary_text.delta. Per the official codex CLI
+				// protocol parser (codex-rs/codex-api/src/sse/responses.rs
+				// case "response.reasoning_text.delta"), this event carries
+				// raw reasoning content keyed by ItemID + ContentIndex.
+				//
+				// We surface the delta as a ReasoningDelta to keep consumers
+				// (lenos and similar bash-protocol agents) seeing thinking
+				// text regardless of which event channel the model uses.
+				rawDelta := event.AsResponseReasoningTextDelta()
+				state := activeReasoning[rawDelta.ItemID]
+				if state != nil {
+					if !yield(fantasy.StreamPart{
+						Type:  fantasy.StreamPartTypeReasoningDelta,
+						ID:    rawDelta.ItemID,
+						Delta: rawDelta.Delta,
+						ProviderMetadata: fantasy.ProviderMetadata{
+							Name: state.metadata,
+						},
+					}) {
+						return
+					}
+				}
+
 			case "response.completed":
 				completed := event.AsResponseCompleted()
 				responseID = completed.Response.ID