-
Notifications
You must be signed in to change notification settings - Fork 13.3k
Expand file tree
/
Copy pathtextUtils.ts
More file actions
269 lines (233 loc) · 8.01 KB
/
textUtils.ts
File metadata and controls
269 lines (233 loc) · 8.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import stripAnsi from 'strip-ansi';
import ansiRegex from 'ansi-regex';
import { stripVTControlCharacters } from 'node:util';
import stringWidth from 'string-width';
import { LRUCache } from 'mnemonist';
import { LRU_BUFFER_PERF_CACHE_LIMIT } from '../constants.js';
/**
* Calculates the maximum width of a multi-line ASCII art string.
* @param asciiArt The ASCII art string.
* @returns The length of the longest line in the ASCII art.
*/
export const getAsciiArtWidth = (asciiArt: string): number => {
if (!asciiArt) {
return 0;
}
const lines = asciiArt.split('\n');
return Math.max(...lines.map((line) => line.length));
};
/*
* -------------------------------------------------------------------------
* Unicode‑aware helpers (work at the code‑point level rather than UTF‑16
* code units so that surrogate‑pair emoji count as one "column".)
* ---------------------------------------------------------------------- */
/**
* Checks if a string contains only ASCII characters (0-127).
*/
export function isAscii(str: string): boolean {
for (let i = 0; i < str.length; i++) {
if (str.charCodeAt(i) > 127) {
return false;
}
}
return true;
}
// Cache for code points
const MAX_STRING_LENGTH_TO_CACHE = 1000;
const codePointsCache = new LRUCache<string, string[]>(
LRU_BUFFER_PERF_CACHE_LIMIT,
);
export function toCodePoints(str: string): string[] {
// ASCII fast path
if (isAscii(str)) {
return str.split('');
}
// Cache short strings
if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
const cached = codePointsCache.get(str);
if (cached !== undefined) {
return cached;
}
}
const result = Array.from(str);
// Cache result
if (str.length <= MAX_STRING_LENGTH_TO_CACHE) {
codePointsCache.set(str, result);
}
return result;
}
export function cpLen(str: string): number {
if (isAscii(str)) {
return str.length;
}
return toCodePoints(str).length;
}
/**
* Converts a code point index to a UTF-16 code unit offset.
*/
export function cpIndexToOffset(str: string, cpIndex: number): number {
return cpSlice(str, 0, cpIndex).length;
}
export function cpSlice(str: string, start: number, end?: number): string {
if (isAscii(str)) {
return str.slice(start, end);
}
// Slice by code‑point indices and re‑join.
const arr = toCodePoints(str).slice(start, end);
return arr.join('');
}
/**
* Strip characters that can break terminal rendering.
*
* Uses Node.js built-in stripVTControlCharacters to handle VT sequences,
* then filters remaining control characters that can disrupt display.
*
* Characters stripped:
* - ANSI escape sequences (via strip-ansi)
* - VT control sequences (via Node.js util.stripVTControlCharacters)
* - C0 control chars (0x00-0x1F) except TAB(0x09), LF(0x0A), CR(0x0D)
* - C1 control chars (0x80-0x9F) that can cause display issues
* - BiDi control chars (U+200E, U+200F, U+202A-U+202E, U+2066-U+2069)
* - Zero-width chars (U+200B, U+FEFF)
*
* Characters preserved:
* - All printable Unicode including emojis
* - ZWJ (U+200D) - needed for complex emoji sequences
* - ZWNJ (U+200C) - preserve zero-width non-joiner
* - DEL (0x7F) - handled functionally by applyOperations, not a display issue
* - CR/LF (0x0D/0x0A) - needed for line breaks
* - TAB (0x09) - preserve tabs
*/
export function stripUnsafeCharacters(str: string): string {
const strippedAnsi = stripAnsi(str);
const strippedVT = stripVTControlCharacters(strippedAnsi);
// Use a regex to strip remaining unsafe control characters
// C0: 0x00-0x1F except 0x09 (TAB), 0x0A (LF), 0x0D (CR)
// C1: 0x80-0x9F
// BiDi: U+200E (LRM), U+200F (RLM), U+202A-U+202E, U+2066-U+2069
// Zero-width: U+200B (ZWSP), U+FEFF (BOM)
return strippedVT.replace(
// eslint-disable-next-line no-control-regex
/[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\x9F\u200E\u200F\u202A-\u202E\u2066-\u2069\u200B\uFEFF]/g,
'',
);
}
/**
* Sanitize a string for display in inline UI components (e.g. Help, Suggestions).
* Removes ANSI codes, dangerous control characters, collapses whitespace
* characters into a single space, and optionally truncates.
*/
export function sanitizeForDisplay(str: string, maxLength?: number): string {
if (!str) {
return '';
}
let sanitized = stripUnsafeCharacters(str).replace(/\s+/g, ' ');
if (maxLength && sanitized.length > maxLength) {
sanitized = sanitized.substring(0, maxLength - 3) + '...';
}
return sanitized;
}
/**
* Normalizes escaped newline characters (e.g., "\\n") into actual newline characters.
*/
export function normalizeEscapedNewlines(value: string): string {
return value.replace(/\\r\\n/g, '\n').replace(/\\n/g, '\n');
}
const stringWidthCache = new LRUCache<string, number>(
LRU_BUFFER_PERF_CACHE_LIMIT,
);
/**
* Cached version of stringWidth function for better performance
*/
export const getCachedStringWidth = (str: string): number => {
// ASCII printable chars (32-126) have width 1.
// This is a very frequent path, so we use a fast numeric check.
if (str.length === 1) {
const code = str.charCodeAt(0);
if (code >= 0x20 && code <= 0x7e) {
return 1;
}
}
const cached = stringWidthCache.get(str);
if (cached !== undefined) {
return cached;
}
let width: number;
try {
width = stringWidth(str);
} catch {
// Fallback for characters that cause string-width to crash (e.g. U+0602)
// See: https://github.com/google-gemini/gemini-cli/issues/16418
width = toCodePoints(stripAnsi(str)).length;
}
stringWidthCache.set(str, width);
return width;
};
const regex = ansiRegex();
/* Recursively traverses a JSON-like structure (objects, arrays, primitives)
* and escapes all ANSI control characters found in any string values.
*
* This function is designed to be robust, handling deeply nested objects and
* arrays. It applies a regex-based replacement to all string values to
* safely escape control characters.
*
* To optimize performance, this function uses a "copy-on-write" strategy.
* It avoids allocating new objects or arrays if no nested string values
* required escaping, returning the original object reference in such cases.
*
* @param obj The JSON-like value (object, array, string, etc.) to traverse.
* @returns A new value with all nested string fields escaped, or the
* original `obj` reference if no changes were necessary.
*/
export function escapeAnsiCtrlCodes<T>(obj: T): T {
if (typeof obj === 'string') {
if (obj.search(regex) === -1) {
return obj; // No changes return original string
}
regex.lastIndex = 0; // needed for global regex
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
return obj.replace(regex, (match) =>
JSON.stringify(match).slice(1, -1),
) as T;
}
if (obj === null || typeof obj !== 'object') {
return obj;
}
if (Array.isArray(obj)) {
let newArr: unknown[] | null = null;
for (let i = 0; i < obj.length; i++) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const value = obj[i];
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const escapedValue = escapeAnsiCtrlCodes(value);
if (escapedValue !== value) {
if (newArr === null) {
newArr = [...obj];
}
newArr[i] = escapedValue;
}
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
return (newArr !== null ? newArr : obj) as T;
}
let newObj: T | null = null;
const keys = Object.keys(obj);
for (const key of keys) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const value = (obj as Record<string, unknown>)[key];
const escapedValue = escapeAnsiCtrlCodes(value);
if (escapedValue !== value) {
if (newObj === null) {
newObj = { ...obj };
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
(newObj as Record<string, unknown>)[key] = escapedValue;
}
}
return newObj !== null ? newObj : obj;
}