Skip to content

Commit 117e735

Browse files
fix: copy output exclude ansi and html (#6495)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 666fc69 commit 117e735

4 files changed

Lines changed: 236 additions & 19 deletions

File tree

frontend/src/components/editor/output/ConsoleOutput.tsx

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import type { OutputMessage } from "@/core/kernel/messages";
1414
import { useSelectAllContent } from "@/hooks/useSelectAllContent";
1515
import { cn } from "@/utils/cn";
1616
import { copyToClipboard } from "@/utils/copy";
17+
import { ansiToPlainText, parseHtmlContent } from "@/utils/dom";
1718
import { invariant } from "@/utils/invariant";
1819
import { Strings } from "@/utils/strings";
1920
import { NameCellContentEditable } from "../actions/name-cell-input";
@@ -124,7 +125,18 @@ const ConsoleOutputInternal = (props: Props): React.ReactNode => {
124125
onClick={() => {
125126
const text = reversedOutputs
126127
.filter((output) => output.channel !== "pdb")
127-
.map((output) => Strings.asString(output.data))
128+
.map((output) => {
129+
// If starts with `<`, then assume it's HTML
130+
if (
131+
typeof output.data === "string" &&
132+
output.data.startsWith("<")
133+
) {
134+
return parseHtmlContent(output.data);
135+
}
136+
137+
// Otherwise, convert the ANSI to HTML, then parse as HTML
138+
return ansiToPlainText(Strings.asString(output.data));
139+
})
128140
.join("\n");
129141
void copyToClipboard(text);
130142
}}

frontend/src/core/ai/context/providers/cell-output.ts

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { displayCellName } from "@/core/cells/names";
99
import { isOutputEmpty } from "@/core/cells/outputs";
1010
import type { OutputMessage } from "@/core/kernel/messages";
1111
import type { JotaiStore } from "@/core/state/jotai";
12+
import { parseHtmlContent } from "@/utils/dom";
1213
import { Logger } from "@/utils/Logger";
1314
import { type AIContextItem, AIContextProvider } from "../registry";
1415
import { contextToXml } from "../utils";
@@ -64,24 +65,6 @@ function isMediaMimetype(
6465
return false;
6566
}
6667

67-
function parseHtmlContent(htmlString: string): string {
68-
try {
69-
// Create a temporary DOM element to parse HTML
70-
const tempDiv = document.createElement("div");
71-
tempDiv.innerHTML = htmlString;
72-
73-
// Extract text content, removing HTML tags
74-
const textContent = tempDiv.textContent || tempDiv.innerText || "";
75-
76-
// Clean up extra whitespace
77-
return textContent.replaceAll(/\s+/g, " ").trim();
78-
} catch (error) {
79-
Logger.error("Error parsing HTML content:", error);
80-
// If parsing fails, return the original string
81-
return htmlString;
82-
}
83-
}
84-
8568
export class CellOutputContextProvider extends AIContextProvider<CellOutputContextItem> {
8669
readonly title = "Cell Outputs";
8770
readonly mentionPrefix = "@";
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/* Copyright 2024 Marimo. All rights reserved. */
2+
3+
import { describe, expect, test } from "vitest";
4+
import { ansiToPlainText, parseHtmlContent } from "../dom";
5+
6+
describe("parseHtmlContent", () => {
7+
test("strips HTML tags and returns plain text", () => {
8+
const htmlString =
9+
'<span style="color: red;">Error: Something went wrong</span>';
10+
const result = parseHtmlContent(htmlString);
11+
expect(result).toMatchInlineSnapshot(`"Error: Something went wrong"`);
12+
});
13+
14+
test("handles ANSI color span tags", () => {
15+
const htmlString =
16+
'<span style="color:#d03050;">ERROR</span>: <span style="color:#8ad03a;">File not found</span>';
17+
const result = parseHtmlContent(htmlString);
18+
expect(result).toMatchInlineSnapshot(`"ERROR: File not found"`);
19+
});
20+
21+
test("normalizes whitespace", () => {
22+
const htmlString = "<span> Multiple \n\n spaces and \t tabs </span>";
23+
const result = parseHtmlContent(htmlString);
24+
expect(result).toMatchInlineSnapshot(`
25+
" Multiple
26+
27+
spaces and tabs"
28+
`);
29+
});
30+
31+
test("handles empty HTML", () => {
32+
const htmlString = "";
33+
const result = parseHtmlContent(htmlString);
34+
expect(result).toMatchInlineSnapshot(`""`);
35+
});
36+
37+
test("handles plain text without HTML", () => {
38+
const htmlString = "Simple error message";
39+
const result = parseHtmlContent(htmlString);
40+
expect(result).toMatchInlineSnapshot(`"Simple error message"`);
41+
});
42+
43+
test("handles nested HTML elements", () => {
44+
const htmlString =
45+
'<div><span>Traceback:</span><pre><code> File "test.py", line 1\n print("hello"</code></pre></div>';
46+
const result = parseHtmlContent(htmlString);
47+
expect(result).toMatchInlineSnapshot(`
48+
"Traceback: File "test.py", line 1
49+
print("hello""
50+
`);
51+
});
52+
53+
test("handles complex ANSI-converted HTML with styles", () => {
54+
const htmlString =
55+
'<span style="background:#fff;color:#000"> File "</span><span style="background:#fff;color:#0000ff">test.py</span><span style="background:#fff;color:#000">", line </span><span style="background:#fff;color:#008000">1</span>';
56+
const result = parseHtmlContent(htmlString);
57+
expect(result).toMatchInlineSnapshot(`" File "test.py", line 1"`);
58+
});
59+
});
60+
61+
describe("ansiToPlainText", () => {
62+
test("converts ANSI color codes to plain text", () => {
63+
const ansiString = "\x1b[31mError:\x1b[0m Something went wrong";
64+
const result = ansiToPlainText(ansiString);
65+
expect(result).toMatchInlineSnapshot(`"Error: Something went wrong"`);
66+
});
67+
68+
test("handles multiple ANSI color codes", () => {
69+
const ansiString =
70+
"\x1b[32mSUCCESS:\x1b[0m \x1b[34mOperation completed\x1b[0m successfully";
71+
const result = ansiToPlainText(ansiString);
72+
expect(result).toMatchInlineSnapshot(
73+
`"SUCCESS: Operation completed successfully"`,
74+
);
75+
});
76+
77+
test("handles ANSI bold and color combinations", () => {
78+
const ansiString =
79+
"\x1b[1;31mBOLD RED ERROR:\x1b[0m \x1b[33mWarning message\x1b[0m";
80+
const result = ansiToPlainText(ansiString);
81+
expect(result).toMatchInlineSnapshot(`"BOLD RED ERROR: Warning message"`);
82+
});
83+
84+
test("handles Python traceback with ANSI codes", () => {
85+
const ansiString =
86+
"\x1b[0;36m File \"\x1b[0m\x1b[0;32mtest.py\x1b[0m\x1b[0;36m\", line \x1b[0m\x1b[0;32m1\x1b[0m\x1b[0;36m, in \x1b[0m\x1b[0;35m<module>\x1b[0m\n\x1b[0;31mNameError\x1b[0m: name 'undefined_var' is not defined";
87+
const result = ansiToPlainText(ansiString);
88+
expect(result).toMatchInlineSnapshot(`
89+
" File "test.py", line 1, in <module>
90+
NameError: name 'undefined_var' is not defined"
91+
`);
92+
});
93+
94+
test("handles error messages with background colors", () => {
95+
const ansiString =
96+
"\x1b[41;37m CRITICAL ERROR \x1b[0m \x1b[31mSystem failure detected\x1b[0m";
97+
const result = ansiToPlainText(ansiString);
98+
expect(result).toMatchInlineSnapshot(
99+
`" CRITICAL ERROR System failure detected"`,
100+
);
101+
});
102+
103+
test("handles complex stack trace with mixed formatting", () => {
104+
const ansiString =
105+
'Traceback (most recent call last):\n \x1b[36mFile "\x1b[32m/path/to/file.py\x1b[36m", line \x1b[32m42\x1b[36m, in \x1b[35mfunction_name\x1b[0m\n \x1b[31mraise ValueError("Something went wrong")\x1b[0m\n\x1b[31mValueError\x1b[0m: Something went wrong';
106+
const result = ansiToPlainText(ansiString);
107+
expect(result).toMatchInlineSnapshot(`
108+
"Traceback (most recent call last):
109+
File "/path/to/file.py", line 42, in function_name
110+
raise ValueError("Something went wrong")
111+
ValueError: Something went wrong"
112+
`);
113+
});
114+
115+
test("handles empty string", () => {
116+
const ansiString = "";
117+
const result = ansiToPlainText(ansiString);
118+
expect(result).toMatchInlineSnapshot(`""`);
119+
});
120+
121+
test("handles plain text without ANSI codes", () => {
122+
const ansiString = "Plain error message without colors";
123+
const result = ansiToPlainText(ansiString);
124+
expect(result).toMatchInlineSnapshot(
125+
`"Plain error message without colors"`,
126+
);
127+
});
128+
129+
test("handles whitespace and newlines correctly", () => {
130+
const ansiString =
131+
"\x1b[31m Error: \x1b[0m\n\n \x1b[33m Warning \x1b[0m ";
132+
const result = ansiToPlainText(ansiString);
133+
expect(result).toMatchInlineSnapshot(`
134+
" Error:
135+
136+
Warning"
137+
`);
138+
});
139+
140+
test("handles JavaScript error stack trace", () => {
141+
const ansiString =
142+
"\x1b[31mReferenceError\x1b[0m: \x1b[33mvariable\x1b[0m is not defined\n at \x1b[36mObject.<anonymous>\x1b[0m (\x1b[32m/path/to/script.js\x1b[0m:\x1b[33m5\x1b[0m:\x1b[33m1\x1b[0m)";
143+
const result = ansiToPlainText(ansiString);
144+
expect(result).toMatchInlineSnapshot(`
145+
"ReferenceError: variable is not defined
146+
at Object.<anonymous> (/path/to/script.js:5:1)"
147+
`);
148+
});
149+
150+
test("handles Rust panic with ANSI formatting", () => {
151+
const ansiString =
152+
"thread '\x1b[32mmain\x1b[0m' panicked at '\x1b[31massertion failed: `(left == right)`\x1b[0m'\n \x1b[36mleft\x1b[0m: `\x1b[33m5\x1b[0m`\n \x1b[36mright\x1b[0m: `\x1b[33m10\x1b[0m`";
153+
const result = ansiToPlainText(ansiString);
154+
expect(result).toMatchInlineSnapshot(`
155+
"thread 'main' panicked at 'assertion failed: \`(left == right)\`'
156+
left: \`5\`
157+
right: \`10\`"
158+
`);
159+
});
160+
161+
test("handles mix of 8-bit and 256-color ANSI codes", () => {
162+
const ansiString =
163+
"\x1b[38;5;196mBright Red\x1b[0m and \x1b[38;5;46mBright Green\x1b[0m text";
164+
const result = ansiToPlainText(ansiString);
165+
expect(result).toMatchInlineSnapshot(`"Bright Red and Bright Green text"`);
166+
});
167+
});

frontend/src/utils/dom.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/* Copyright 2024 Marimo. All rights reserved. */
2+
3+
import { AnsiUp } from "ansi_up";
4+
import { Logger } from "./Logger";
5+
6+
// Create a shared AnsiUp instance
7+
const ansiUp = new AnsiUp();
8+
9+
/**
10+
* Extracts plain text content from HTML by removing all HTML tags and normalizing whitespace.
11+
*
12+
* @param htmlString The HTML string to parse
13+
* @returns Plain text content with HTML tags removed and whitespace normalized
14+
*/
15+
export function parseHtmlContent(htmlString: string): string {
16+
try {
17+
// Create a temporary DOM element to parse HTML
18+
const tempDiv = document.createElement("div");
19+
tempDiv.innerHTML = htmlString;
20+
21+
// Extract text content, removing HTML tags
22+
const textContent = tempDiv.textContent || tempDiv.innerText || "";
23+
const lines = textContent.split("\n");
24+
return lines.map((line) => line.trimEnd()).join("\n");
25+
} catch (error) {
26+
Logger.error("Error parsing HTML content:", error);
27+
// If parsing fails, return the original string
28+
return htmlString;
29+
}
30+
}
31+
32+
/**
33+
* Converts ANSI escape sequences to plain text by first converting to HTML and then stripping HTML tags.
34+
* This is useful for console output that may contain ANSI color codes or formatting.
35+
*
36+
* @param ansiString String that may contain ANSI escape sequences
37+
* @returns Plain text with ANSI codes removed and HTML stripped
38+
*/
39+
export function ansiToPlainText(ansiString: string): string {
40+
if (!ansiString) {
41+
return "";
42+
}
43+
44+
try {
45+
// Convert ANSI escape sequences to HTML
46+
const htmlString = ansiUp.ansi_to_html(ansiString);
47+
48+
// Strip HTML tags and return clean text
49+
return parseHtmlContent(htmlString);
50+
} catch (error) {
51+
Logger.error("Error converting ANSI to plain text:", error);
52+
// If conversion fails, return the original string
53+
return ansiString;
54+
}
55+
}

0 commit comments

Comments
 (0)