diff --git a/frontend/src/components/editor/output/ConsoleOutput.tsx b/frontend/src/components/editor/output/ConsoleOutput.tsx index b901bc97ae4..d258c838a78 100644 --- a/frontend/src/components/editor/output/ConsoleOutput.tsx +++ b/frontend/src/components/editor/output/ConsoleOutput.tsx @@ -14,6 +14,7 @@ import type { OutputMessage } from "@/core/kernel/messages"; import { useSelectAllContent } from "@/hooks/useSelectAllContent"; import { cn } from "@/utils/cn"; import { copyToClipboard } from "@/utils/copy"; +import { ansiToPlainText, parseHtmlContent } from "@/utils/dom"; import { invariant } from "@/utils/invariant"; import { Strings } from "@/utils/strings"; import { NameCellContentEditable } from "../actions/name-cell-input"; @@ -124,7 +125,18 @@ const ConsoleOutputInternal = (props: Props): React.ReactNode => { onClick={() => { const text = reversedOutputs .filter((output) => output.channel !== "pdb") - .map((output) => Strings.asString(output.data)) + .map((output) => { + // If starts with `<`, then assume it's HTML + if ( + typeof output.data === "string" && + output.data.startsWith("<") + ) { + return parseHtmlContent(output.data); + } + + // Otherwise, convert the ANSI to HTML, then parse as HTML + return ansiToPlainText(Strings.asString(output.data)); + }) .join("\n"); void copyToClipboard(text); }} diff --git a/frontend/src/core/ai/context/providers/cell-output.ts b/frontend/src/core/ai/context/providers/cell-output.ts index 728fab924c3..89956cfa306 100644 --- a/frontend/src/core/ai/context/providers/cell-output.ts +++ b/frontend/src/core/ai/context/providers/cell-output.ts @@ -9,6 +9,7 @@ import { displayCellName } from "@/core/cells/names"; import { isOutputEmpty } from "@/core/cells/outputs"; import type { OutputMessage } from "@/core/kernel/messages"; import type { JotaiStore } from "@/core/state/jotai"; +import { parseHtmlContent } from "@/utils/dom"; import { Logger } from "@/utils/Logger"; import { type AIContextItem, AIContextProvider } from "../registry"; import { contextToXml } from "../utils"; @@ -64,24 +65,6 @@ function isMediaMimetype( return false; } -function parseHtmlContent(htmlString: string): string { - try { - // Create a temporary DOM element to parse HTML - const tempDiv = document.createElement("div"); - tempDiv.innerHTML = htmlString; - - // Extract text content, removing HTML tags - const textContent = tempDiv.textContent || tempDiv.innerText || ""; - - // Clean up extra whitespace - return textContent.replaceAll(/\s+/g, " ").trim(); - } catch (error) { - Logger.error("Error parsing HTML content:", error); - // If parsing fails, return the original string - return htmlString; - } -} - export class CellOutputContextProvider extends AIContextProvider { readonly title = "Cell Outputs"; readonly mentionPrefix = "@"; diff --git a/frontend/src/utils/__tests__/dom.test.ts b/frontend/src/utils/__tests__/dom.test.ts new file mode 100644 index 00000000000..314f74016bb --- /dev/null +++ b/frontend/src/utils/__tests__/dom.test.ts @@ -0,0 +1,167 @@ +/* Copyright 2024 Marimo. All rights reserved. */ + +import { describe, expect, test } from "vitest"; +import { ansiToPlainText, parseHtmlContent } from "../dom"; + +describe("parseHtmlContent", () => { + test("strips HTML tags and returns plain text", () => { + const htmlString = + 'Error: Something went wrong'; + const result = parseHtmlContent(htmlString); + expect(result).toMatchInlineSnapshot(`"Error: Something went wrong"`); + }); + + test("handles ANSI color span tags", () => { + const htmlString = + 'ERROR: File not found'; + const result = parseHtmlContent(htmlString); + expect(result).toMatchInlineSnapshot(`"ERROR: File not found"`); + }); + + test("normalizes whitespace", () => { + const htmlString = " Multiple \n\n spaces and \t tabs "; + const result = parseHtmlContent(htmlString); + expect(result).toMatchInlineSnapshot(` + " Multiple + + spaces and tabs" + `); + }); + + test("handles empty HTML", () => { + const htmlString = ""; + const result = parseHtmlContent(htmlString); + expect(result).toMatchInlineSnapshot(`""`); + }); + + test("handles plain text without HTML", () => { + const htmlString = "Simple error message"; + const result = parseHtmlContent(htmlString); + expect(result).toMatchInlineSnapshot(`"Simple error message"`); + }); + + test("handles nested HTML elements", () => { + const htmlString = + '
Traceback:
  File "test.py", line 1\n    print("hello"
'; + const result = parseHtmlContent(htmlString); + expect(result).toMatchInlineSnapshot(` + "Traceback: File "test.py", line 1 + print("hello"" + `); + }); + + test("handles complex ANSI-converted HTML with styles", () => { + const htmlString = + ' File "test.py", line 1'; + const result = parseHtmlContent(htmlString); + expect(result).toMatchInlineSnapshot(`" File "test.py", line 1"`); + }); +}); + +describe("ansiToPlainText", () => { + test("converts ANSI color codes to plain text", () => { + const ansiString = "\x1b[31mError:\x1b[0m Something went wrong"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(`"Error: Something went wrong"`); + }); + + test("handles multiple ANSI color codes", () => { + const ansiString = + "\x1b[32mSUCCESS:\x1b[0m \x1b[34mOperation completed\x1b[0m successfully"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot( + `"SUCCESS: Operation completed successfully"`, + ); + }); + + test("handles ANSI bold and color combinations", () => { + const ansiString = + "\x1b[1;31mBOLD RED ERROR:\x1b[0m \x1b[33mWarning message\x1b[0m"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(`"BOLD RED ERROR: Warning message"`); + }); + + test("handles Python traceback with ANSI codes", () => { + const ansiString = + "\x1b[0;36m File \"\x1b[0m\x1b[0;32mtest.py\x1b[0m\x1b[0;36m\", line \x1b[0m\x1b[0;32m1\x1b[0m\x1b[0;36m, in \x1b[0m\x1b[0;35m\x1b[0m\n\x1b[0;31mNameError\x1b[0m: name 'undefined_var' is not defined"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(` + " File "test.py", line 1, in + NameError: name 'undefined_var' is not defined" + `); + }); + + test("handles error messages with background colors", () => { + const ansiString = + "\x1b[41;37m CRITICAL ERROR \x1b[0m \x1b[31mSystem failure detected\x1b[0m"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot( + `" CRITICAL ERROR System failure detected"`, + ); + }); + + test("handles complex stack trace with mixed formatting", () => { + const ansiString = + 'Traceback (most recent call last):\n \x1b[36mFile "\x1b[32m/path/to/file.py\x1b[36m", line \x1b[32m42\x1b[36m, in \x1b[35mfunction_name\x1b[0m\n \x1b[31mraise ValueError("Something went wrong")\x1b[0m\n\x1b[31mValueError\x1b[0m: Something went wrong'; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(` + "Traceback (most recent call last): + File "/path/to/file.py", line 42, in function_name + raise ValueError("Something went wrong") + ValueError: Something went wrong" + `); + }); + + test("handles empty string", () => { + const ansiString = ""; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(`""`); + }); + + test("handles plain text without ANSI codes", () => { + const ansiString = "Plain error message without colors"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot( + `"Plain error message without colors"`, + ); + }); + + test("handles whitespace and newlines correctly", () => { + const ansiString = + "\x1b[31m Error: \x1b[0m\n\n \x1b[33m Warning \x1b[0m "; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(` + " Error: + + Warning" + `); + }); + + test("handles JavaScript error stack trace", () => { + const ansiString = + "\x1b[31mReferenceError\x1b[0m: \x1b[33mvariable\x1b[0m is not defined\n at \x1b[36mObject.\x1b[0m (\x1b[32m/path/to/script.js\x1b[0m:\x1b[33m5\x1b[0m:\x1b[33m1\x1b[0m)"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(` + "ReferenceError: variable is not defined + at Object. (/path/to/script.js:5:1)" + `); + }); + + test("handles Rust panic with ANSI formatting", () => { + const ansiString = + "thread '\x1b[32mmain\x1b[0m' panicked at '\x1b[31massertion failed: `(left == right)`\x1b[0m'\n \x1b[36mleft\x1b[0m: `\x1b[33m5\x1b[0m`\n \x1b[36mright\x1b[0m: `\x1b[33m10\x1b[0m`"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(` + "thread 'main' panicked at 'assertion failed: \`(left == right)\`' + left: \`5\` + right: \`10\`" + `); + }); + + test("handles mix of 8-bit and 256-color ANSI codes", () => { + const ansiString = + "\x1b[38;5;196mBright Red\x1b[0m and \x1b[38;5;46mBright Green\x1b[0m text"; + const result = ansiToPlainText(ansiString); + expect(result).toMatchInlineSnapshot(`"Bright Red and Bright Green text"`); + }); +}); diff --git a/frontend/src/utils/dom.ts b/frontend/src/utils/dom.ts new file mode 100644 index 00000000000..52da43453d7 --- /dev/null +++ b/frontend/src/utils/dom.ts @@ -0,0 +1,55 @@ +/* Copyright 2024 Marimo. All rights reserved. */ + +import { AnsiUp } from "ansi_up"; +import { Logger } from "./Logger"; + +// Create a shared AnsiUp instance +const ansiUp = new AnsiUp(); + +/** + * Extracts plain text content from HTML by removing all HTML tags and normalizing whitespace. + * + * @param htmlString The HTML string to parse + * @returns Plain text content with HTML tags removed and whitespace normalized + */ +export function parseHtmlContent(htmlString: string): string { + try { + // Create a temporary DOM element to parse HTML + const tempDiv = document.createElement("div"); + tempDiv.innerHTML = htmlString; + + // Extract text content, removing HTML tags + const textContent = tempDiv.textContent || tempDiv.innerText || ""; + const lines = textContent.split("\n"); + return lines.map((line) => line.trimEnd()).join("\n"); + } catch (error) { + Logger.error("Error parsing HTML content:", error); + // If parsing fails, return the original string + return htmlString; + } +} + +/** + * Converts ANSI escape sequences to plain text by first converting to HTML and then stripping HTML tags. + * This is useful for console output that may contain ANSI color codes or formatting. + * + * @param ansiString String that may contain ANSI escape sequences + * @returns Plain text with ANSI codes removed and HTML stripped + */ +export function ansiToPlainText(ansiString: string): string { + if (!ansiString) { + return ""; + } + + try { + // Convert ANSI escape sequences to HTML + const htmlString = ansiUp.ansi_to_html(ansiString); + + // Strip HTML tags and return clean text + return parseHtmlContent(htmlString); + } catch (error) { + Logger.error("Error converting ANSI to plain text:", error); + // If conversion fails, return the original string + return ansiString; + } +}