Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 216 additions & 1 deletion packages/cli/src/ui/utils/markdownParsingUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

import { describe, it, expect, beforeAll, vi } from 'vitest';
import chalk from 'chalk';
import { parseMarkdownToANSI } from './markdownParsingUtils.js';
import {
parseMarkdownToANSI,
stripTrailingPunctuation,
} from './markdownParsingUtils.js';

// Mock the theme to use explicit colors instead of empty strings from the default theme.
// This ensures that ansiColorize actually applies ANSI codes that we can verify.
Expand Down Expand Up @@ -222,5 +225,217 @@ describe('parsingUtils', () => {
),
);
});

it('should strip trailing period from bare URL', () => {
const input = 'Visit https://example.com.';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('Visit ')}${link('https://example.com')}${primary('.')}`,
);
});

it('should strip trailing comma from bare URL', () => {
const input = 'See https://example.com, then continue';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('See ')}${link('https://example.com')}${primary(',')}${primary(' then continue')}`,
);
});

it('should strip multiple trailing punctuation from bare URL', () => {
const input = 'Is it https://example.com?!';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('Is it ')}${link('https://example.com')}${primary('?!')}`,
);
});

it('should preserve balanced parentheses in bare URL (Wikipedia)', () => {
const input = 'See https://en.wikipedia.org/wiki/Foo_(bar) for details';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary(' for details')}`,
);
});

it('should strip trailing period after balanced parens in bare URL', () => {
const input = 'See https://en.wikipedia.org/wiki/Foo_(bar).';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('See ')}${link('https://en.wikipedia.org/wiki/Foo_(bar)')}${primary('.')}`,
);
});

it('should not modify bare URL without trailing punctuation', () => {
const input = 'Visit https://example.com/path now';
const output = parseMarkdownToANSI(input);
expect(output).toBe(
`${primary('Visit ')}${link('https://example.com/path')}${primary(' now')}`,
);
});
});

describe('stripTrailingPunctuation', () => {
it('should strip a trailing period', () => {
expect(stripTrailingPunctuation('https://example.com.')).toEqual({
cleanUrl: 'https://example.com',
trailing: '.',
});
});

it('should strip a trailing comma', () => {
expect(stripTrailingPunctuation('https://example.com,')).toEqual({
cleanUrl: 'https://example.com',
trailing: ',',
});
});

it('should strip trailing semicolon', () => {
expect(stripTrailingPunctuation('https://example.com;')).toEqual({
cleanUrl: 'https://example.com',
trailing: ';',
});
});

it('should strip trailing colon', () => {
expect(stripTrailingPunctuation('https://example.com/path:')).toEqual({
cleanUrl: 'https://example.com/path',
trailing: ':',
});
});

it('should strip trailing exclamation mark', () => {
expect(stripTrailingPunctuation('https://example.com!')).toEqual({
cleanUrl: 'https://example.com',
trailing: '!',
});
});

it('should strip trailing question mark', () => {
expect(stripTrailingPunctuation('https://example.com?')).toEqual({
cleanUrl: 'https://example.com',
trailing: '?',
});
});

it('should strip multiple trailing punctuation chars', () => {
expect(stripTrailingPunctuation('https://example.com?!')).toEqual({
cleanUrl: 'https://example.com',
trailing: '?!',
});
});

it('should strip trailing quotes', () => {
expect(stripTrailingPunctuation('https://example.com"')).toEqual({
cleanUrl: 'https://example.com',
trailing: '"',
});
});

it('should strip trailing single quote', () => {
expect(stripTrailingPunctuation("https://example.com'")).toEqual({
cleanUrl: 'https://example.com',
trailing: "'",
});
});

it('should preserve balanced parentheses', () => {
expect(
stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar)'),
).toEqual({
cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)',
trailing: '',
});
});

it('should strip unbalanced trailing paren', () => {
expect(stripTrailingPunctuation('https://example.com)')).toEqual({
cleanUrl: 'https://example.com',
trailing: ')',
});
});

it('should strip period after balanced parens', () => {
expect(
stripTrailingPunctuation('https://en.wikipedia.org/wiki/Foo_(bar).'),
).toEqual({
cleanUrl: 'https://en.wikipedia.org/wiki/Foo_(bar)',
trailing: '.',
});
});

it('should handle nested balanced parentheses', () => {
expect(stripTrailingPunctuation('https://example.com/a_(b_(c))')).toEqual(
{
cleanUrl: 'https://example.com/a_(b_(c))',
trailing: '',
},
);
});

it('should strip trailing bracket', () => {
expect(stripTrailingPunctuation('https://example.com]')).toEqual({
cleanUrl: 'https://example.com',
trailing: ']',
});
});

it('should strip trailing angle bracket', () => {
expect(stripTrailingPunctuation('https://example.com>')).toEqual({
cleanUrl: 'https://example.com',
trailing: '>',
});
});

it('should strip trailing curly brace', () => {
expect(stripTrailingPunctuation('https://example.com}')).toEqual({
cleanUrl: 'https://example.com',
trailing: '}',
});
});

it('should return unchanged URL with no trailing punctuation', () => {
expect(stripTrailingPunctuation('https://example.com/path')).toEqual({
cleanUrl: 'https://example.com/path',
trailing: '',
});
});

it('should handle URL with query params and trailing period', () => {
expect(
stripTrailingPunctuation('https://example.com/search?q=test.'),
).toEqual({
cleanUrl: 'https://example.com/search?q=test',
trailing: '.',
});
});

it('should strip CJK fullwidth period', () => {
expect(stripTrailingPunctuation('https://example.com\u3002')).toEqual({
cleanUrl: 'https://example.com',
trailing: '\u3002',
});
});

it('should strip CJK fullwidth comma', () => {
expect(stripTrailingPunctuation('https://example.com\uFF0C')).toEqual({
cleanUrl: 'https://example.com',
trailing: '\uFF0C',
});
});

it('should handle empty string', () => {
expect(stripTrailingPunctuation('')).toEqual({
cleanUrl: '',
trailing: '',
});
});

it('should not strip periods that are part of the domain', () => {
expect(stripTrailingPunctuation('https://www.example.com/path')).toEqual({
cleanUrl: 'https://www.example.com/path',
trailing: '',
});
});
});
});
74 changes: 73 additions & 1 deletion packages/cli/src/ui/utils/markdownParsingUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,74 @@ import {
import { theme } from '../semantic-colors.js';
import { debugLogger } from '@google/gemini-cli-core';

// Characters that should be stripped from the end of bare URLs.
// Includes common punctuation and CJK fullwidth equivalents.
const TRAILING_PUNCT = new Set([
'.',
',',
';',
':',
'!',
'?',
"'",
'"',
')',
']',
'>',
'}',
// CJK fullwidth equivalents
'\u3002', // Ideographic full stop
'\uFF0C', // Fullwidth comma
'\uFF1B', // Fullwidth semicolon
'\uFF1A', // Fullwidth colon
'\uFF01', // Fullwidth exclamation
'\uFF1F', // Fullwidth question mark
'\u300D', // Right corner bracket
'\u300F', // Right white corner bracket
'\uFF09', // Fullwidth right parenthesis
'\u3011', // Right black lenticular bracket
'\uFF3D', // Fullwidth right square bracket
'\uFF1E', // Fullwidth greater-than
'\uFF5D', // Fullwidth right curly bracket
]);

/**
* Strips trailing punctuation from a URL while preserving balanced parentheses.
* This handles Wikipedia-style URLs like https://en.wikipedia.org/wiki/Foo_(bar)
* where the closing paren is part of the URL, not trailing punctuation.
*
* Returns the cleaned URL and any stripped trailing characters.
*/
export const stripTrailingPunctuation = (
url: string,
): { cleanUrl: string; trailing: string } => {
let end = url.length;

while (end > 0 && TRAILING_PUNCT.has(url[end - 1])) {
const ch = url[end - 1];

// Preserve balanced parentheses (for Wikipedia URLs etc.)
if (ch === ')' || ch === '\uFF09') {
const open = ch === ')' ? '(' : '\uFF08';
const urlPortion = url.slice(0, end);
let depth = 0;
for (const c of urlPortion) {
if (c === open) depth++;
else if (c === ch) depth--;
}
// depth < 0 means more closing than opening, so this one is trailing
if (depth >= 0) break;
}

end--;
}

return {
cleanUrl: url.slice(0, end),
trailing: url.slice(end),
};
};

// Constants for Markdown parsing
const BOLD_MARKER_LENGTH = 2; // For "**"
const ITALIC_MARKER_LENGTH = 1; // For "*" or "_"
Expand Down Expand Up @@ -197,7 +265,11 @@ export const parseMarkdownToANSI = (
),
);
} else if (fullMatch.match(/^https?:\/\//)) {
styledPart = ansiColorize(fullMatch, theme.text.link);
const { cleanUrl, trailing } = stripTrailingPunctuation(fullMatch);
styledPart = ansiColorize(cleanUrl, theme.text.link);
if (trailing) {
styledPart += ansiColorize(trailing, baseColor);
}
}
} catch (e) {
debugLogger.warn('Error parsing inline markdown part:', fullMatch, e);
Expand Down