-
Notifications
You must be signed in to change notification settings - Fork 3.7k
fix(openai_compat): strip leaked Thinking/Final tags from content #1237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ import ( | |
| "log" | ||
| "net/http" | ||
| "net/url" | ||
| "regexp" | ||
| "strings" | ||
| "time" | ||
|
|
||
|
|
@@ -40,6 +41,13 @@ type Option func(*Provider) | |
|
|
||
| const defaultRequestTimeout = 120 * time.Second | ||
|
|
||
| var ( | ||
| // Providers sometimes leak chain-of-thought in these tags in `content`. | ||
| quickReasoningTagRE = regexp.MustCompile(`(?i)<\s*/?\s*(?:think(?:ing)?|thought|reasoning|final)\b`) | ||
| finalTagRE = regexp.MustCompile(`(?i)<\s*/?\s*final\b[^>]*>`) | ||
| thinkingTagRE = regexp.MustCompile(`(?i)<\s*(/?)\s*(?:think(?:ing)?|thought|reasoning)\b[^>]*>`) | ||
| ) | ||
|
|
||
| func WithMaxTokensField(maxTokensField string) Option { | ||
| return func(p *Provider) { | ||
| p.maxTokensField = maxTokensField | ||
|
|
@@ -271,6 +279,52 @@ func responsePreview(body []byte, maxLen int) string { | |
| return string(trimmed[:maxLen]) + "..." | ||
| } | ||
|
|
||
| // stripThinkingAndFinalTags removes leaked reasoning blocks while preserving | ||
| // user-facing answer text (e.g. <final>answer</final> -> answer). | ||
| func stripThinkingAndFinalTags(content string) string { | ||
| if content == "" { | ||
| return content | ||
| } | ||
| // Some APIs double-escape angle brackets in JSON payloads. | ||
| content = strings.ReplaceAll(content, `\u003c`, "<") | ||
| content = strings.ReplaceAll(content, `\u003e`, ">") | ||
| content = strings.ReplaceAll(content, `\u003C`, "<") | ||
| content = strings.ReplaceAll(content, `\u003E`, ">") | ||
|
|
||
| if !quickReasoningTagRE.MatchString(content) { | ||
| return strings.TrimSpace(content) | ||
| } | ||
|
|
||
| cleaned := finalTagRE.ReplaceAllString(content, "") | ||
| indexes := thinkingTagRE.FindAllStringSubmatchIndex(cleaned, -1) | ||
| if len(indexes) == 0 { | ||
| return strings.TrimSpace(cleaned) | ||
| } | ||
|
|
||
| var b strings.Builder | ||
| lastIndex := 0 | ||
| inThinking := false | ||
| for _, idx := range indexes { | ||
| matchStart, matchEnd := idx[0], idx[1] | ||
| isClose := idx[2] >= 0 && cleaned[idx[2]:idx[3]] == "/" | ||
|
|
||
| if !inThinking { | ||
| b.WriteString(cleaned[lastIndex:matchStart]) | ||
| if !isClose { | ||
| inThinking = true | ||
| } | ||
| } else if isClose { | ||
| inThinking = false | ||
| } | ||
| lastIndex = matchEnd | ||
| } | ||
|
|
||
| if !inThinking { | ||
| b.WriteString(cleaned[lastIndex:]) | ||
| } | ||
| return strings.TrimSpace(b.String()) | ||
| } | ||
|
|
||
| func parseResponse(body io.Reader) (*LLMResponse, error) { | ||
| var apiResponse struct { | ||
| Choices []struct { | ||
|
|
@@ -351,7 +405,7 @@ func parseResponse(body io.Reader) (*LLMResponse, error) { | |
| } | ||
|
|
||
| return &LLMResponse{ | ||
| Content: choice.Message.Content, | ||
| Content: stripThinkingAndFinalTags(choice.Message.Content), | ||
| ReasoningContent: choice.Message.ReasoningContent, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may be better to judge whether it is a think mode by whether these contents exist or not. If the content is judged by regular expression, it may misjudge. It is not a think model, but the original text has these keywords in the communication process. |
||
| Reasoning: choice.Message.Reasoning, | ||
| ReasoningDetails: choice.Message.ReasoningDetails, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -152,6 +152,63 @@ func TestProviderChat_ParsesReasoningContent(t *testing.T) { | |
| } | ||
| } | ||
|
|
||
| func TestParseResponse_StripsThinkingAndFinalTags(t *testing.T) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can put all three tests in one test function, there is no need to split them into so many. A functional test function should be single-point. |
||
| body := strings.NewReader(`{ | ||
| "choices": [{ | ||
| "message": { | ||
| "content": "<think>internal reasoning</think><final>The answer is 2</final>" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-keyword-starting sentences should be added for testing. |
||
| }, | ||
| "finish_reason": "stop" | ||
| }] | ||
| }`) | ||
|
|
||
| out, err := parseResponse(body) | ||
| if err != nil { | ||
| t.Fatalf("parseResponse() error = %v", err) | ||
| } | ||
| if out.Content != "The answer is 2" { | ||
| t.Fatalf("Content = %q, want %q", out.Content, "The answer is 2") | ||
| } | ||
| } | ||
|
|
||
| func TestParseResponse_StripsEscapedThinkingTags(t *testing.T) { | ||
| body := strings.NewReader(`{ | ||
| "choices": [{ | ||
| "message": { | ||
| "content": "\\u003cthink\\u003einternal\\u003c/think\\u003e\\u003cfinal\\u003eOK\\u003c/final\\u003e" | ||
| }, | ||
| "finish_reason": "stop" | ||
| }] | ||
| }`) | ||
|
|
||
| out, err := parseResponse(body) | ||
| if err != nil { | ||
| t.Fatalf("parseResponse() error = %v", err) | ||
| } | ||
| if out.Content != "OK" { | ||
| t.Fatalf("Content = %q, want %q", out.Content, "OK") | ||
| } | ||
| } | ||
|
|
||
| func TestParseResponse_DropsUnclosedThinkingBlock(t *testing.T) { | ||
| body := strings.NewReader(`{ | ||
| "choices": [{ | ||
| "message": { | ||
| "content": "Visible<think>hidden reasoning" | ||
| }, | ||
| "finish_reason": "stop" | ||
| }] | ||
| }`) | ||
|
|
||
| out, err := parseResponse(body) | ||
| if err != nil { | ||
| t.Fatalf("parseResponse() error = %v", err) | ||
| } | ||
| if out.Content != "Visible" { | ||
| t.Fatalf("Content = %q, want %q", out.Content, "Visible") | ||
| } | ||
| } | ||
|
|
||
| func TestProviderChat_PreservesReasoningContentInHistory(t *testing.T) { | ||
| var requestBody map[string]any | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not a dependent function., it can be placed in the tool class directory as a string tool.