1111 * Regular expression to match URLs that are not already in markdown link format
1212 * Matches http/https URLs that are:
1313 * 1. Not preceded by ]( (to avoid double-processing existing markdown links)
14- * 2. Not already wrapped in markdown link syntax
15- * 3. Followed by word boundary, Chinese characters, or other non-URL characters
14+ * 2. Not preceded by < or ` (to avoid processing HTML links or code blocks)
15+ * 3. Not preceded by [ (to avoid creating nested brackets)
16+ * 4. Not already wrapped in markdown link syntax
17+ * 5. Followed by Chinese characters or other non-URL characters
18+ * 6. Excludes trailing punctuation that shouldn't be part of URLs
1619 *
17- * Updated to be more precise and avoid false positives
20+ * Updated to handle more edge cases and avoid false positives
1821 */
1922const URL_REGEX =
20- / (?< ! \] \( ) \b ( h t t p s ? : \/ \/ [ ^ \s \u4e00 - \u9fff \u3000 - \u303f \uff00 - \uffef \) \] ] + ) (? = [ \s \u4e00 - \u9fff \u3000 - \u303f \uff00 - \uffef \ )\] ] | $ ) / g;
23+ / (?< ! \] \( | < | ` | \[ ) \b ( h t t p s ? : \/ \/ [ ^ \s \u4e00 - \u9fff \u3000 - \u303f \uff00 - \uffef \) \] < > ` ] + ? ) (?: [ . , ; : ! ? ] (? = [ \s \u4e00 - \u9fff \u3000 - \u303f \uff00 - \uffef ] ) | (? = [ \s \u4e00 - \u9fff \u3000 - \u303f \uff00 - \uffef \ )\] ] ) | $ ) / g;
2124
2225/**
2326 * Preprocess markdown content to fix URL parsing issues
@@ -30,8 +33,33 @@ export function preprocessMarkdownLinks(content: string): string {
3033 return content ;
3134 }
3235
33- // Replace bare URLs with markdown link format [url](url)
34- return content . replace ( URL_REGEX , '[$1]($1)' ) ;
36+ // Split content by code blocks to avoid processing URLs inside them
37+ const codeBlockRegex = / ` ` ` [ \s \S ] * ?` ` ` | ` [ ^ ` ] * ` / g;
38+ const parts : Array < { text : string ; isCodeBlock : boolean } > = [ ] ;
39+ let lastIndex = 0 ;
40+ let match ;
41+
42+ while ( ( match = codeBlockRegex . exec ( content ) ) !== null ) {
43+ // Add text before code block
44+ if ( match . index > lastIndex ) {
45+ parts . push ( { text : content . slice ( lastIndex , match . index ) , isCodeBlock : false } ) ;
46+ }
47+ // Add code block
48+ parts . push ( { text : match [ 0 ] , isCodeBlock : true } ) ;
49+ lastIndex = match . index + match [ 0 ] . length ;
50+ }
51+
52+ // Add remaining text
53+ if ( lastIndex < content . length ) {
54+ parts . push ( { text : content . slice ( lastIndex ) , isCodeBlock : false } ) ;
55+ }
56+
57+ // Process only non-code-block parts
58+ return parts
59+ . map ( part =>
60+ part . isCodeBlock ? part . text : part . text . replace ( URL_REGEX , '[$1]($1)' )
61+ )
62+ . join ( '' ) ;
3563}
3664
3765/**
0 commit comments