Skip to content

Commit a3588d8

Browse files
authored
feat: add support for getLocFromIndex and getIndexFromLoc (#212)
1 parent 62089bc commit a3588d8

File tree

4 files changed

+2141
-4
lines changed

4 files changed

+2141
-4
lines changed

packages/plugin-kit/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ The `TextSourceCodeBase` class is intended to be a base class that has several o
205205

206206
- `lines` - an array of text lines that is created automatically when the constructor is called.
207207
- `getLoc(node)` - gets the location of a node. Works for nodes that have the ESLint-style `loc` property and nodes that have the Unist-style [`position` property](https://github.com/syntax-tree/unist?tab=readme-ov-file#position). If you're using an AST with a different location format, you'll still need to implement this method yourself.
208+
- `getLocFromIndex(index)` - Converts a source text index into a `{ line: number, column: number }` pair. (For this method to work, the root node should always cover the entire source code text, and the `getLoc()` method needs to be implemented correctly.)
209+
- `getIndexFromLoc(loc)` - Converts a `{ line: number, column: number }` pair into a source text index. (For this method to work, the root node should always cover the entire source code text, and the `getLoc()` method needs to be implemented correctly.)
208210
- `getRange(node)` - gets the range of a node within the source text. Works for nodes that have the ESLint-style `range` property and nodes that have the Unist-style [`position` property](https://github.com/syntax-tree/unist?tab=readme-ov-file#position). If you're using an AST with a different range format, you'll still need to implement this method yourself.
209211
- `getText(nodeOrToken, charsBefore, charsAfter)` - gets the source text for the given node or token that has range information attached. Optionally, can return additional characters before and after the given node or token. As long as `getRange()` is properly implemented, this method will just work.
210212
- `getAncestors(node)` - returns the ancestry of the node. In order for this to work, you must implement the `getParent()` method yourself.

packages/plugin-kit/src/source-code.js

Lines changed: 266 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,32 @@ function hasPosStyleRange(node) {
6363
return "position" in node;
6464
}
6565

66+
/**
67+
* Performs binary search to find the line number containing a given target index.
68+
* Returns the lower bound - the index of the first element greater than the target.
69+
* **Please note that the `lineStartIndices` should be sorted in ascending order**.
70+
* - Time Complexity: O(log n) - Significantly faster than linear search for large files.
71+
* @param {number[]} lineStartIndices Sorted array of line start indices.
72+
* @param {number} targetIndex The target index to find the line number for.
73+
* @returns {number} The line number for the target index.
74+
*/
75+
function findLineNumberBinarySearch(lineStartIndices, targetIndex) {
76+
let low = 0;
77+
let high = lineStartIndices.length - 1;
78+
79+
while (low < high) {
80+
const mid = ((low + high) / 2) | 0; // Use bitwise OR to floor the division.
81+
82+
if (targetIndex < lineStartIndices[mid]) {
83+
high = mid;
84+
} else {
85+
low = mid + 1;
86+
}
87+
}
88+
89+
return low;
90+
}
91+
6692
//-----------------------------------------------------------------------------
6793
// Exports
6894
//-----------------------------------------------------------------------------
@@ -216,15 +242,27 @@ export class Directive {
216242

217243
/**
218244
* Source Code Base Object
219-
* @template {SourceCodeBaseTypeOptions & {SyntaxElementWithLoc: object}} [Options=SourceCodeBaseTypeOptions & {SyntaxElementWithLoc: object}]
245+
* @template {SourceCodeBaseTypeOptions & {RootNode: object, SyntaxElementWithLoc: object}} [Options=SourceCodeBaseTypeOptions & {RootNode: object, SyntaxElementWithLoc: object}]
220246
* @implements {TextSourceCode<Options>}
221247
*/
222248
export class TextSourceCodeBase {
223249
/**
224250
* The lines of text in the source code.
225251
* @type {Array<string>}
226252
*/
227-
#lines;
253+
#lines = [];
254+
255+
/**
256+
* The indices of the start of each line in the source code.
257+
* @type {Array<number>}
258+
*/
259+
#lineStartIndices = [0];
260+
261+
/**
262+
* The pattern to match lineEndings in the source code.
263+
* @type {RegExp}
264+
*/
265+
#lineEndingPattern;
228266

229267
/**
230268
* The AST of the source code.
@@ -243,12 +281,105 @@ export class TextSourceCodeBase {
243281
* @param {Object} options The options for the instance.
244282
* @param {string} options.text The source code text.
245283
* @param {Options['RootNode']} options.ast The root AST node.
246-
* @param {RegExp} [options.lineEndingPattern] The pattern to match lineEndings in the source code.
284+
* @param {RegExp} [options.lineEndingPattern] The pattern to match lineEndings in the source code. Defaults to `/\r?\n/u`.
247285
*/
248286
constructor({ text, ast, lineEndingPattern = /\r?\n/u }) {
249287
this.ast = ast;
250288
this.text = text;
251-
this.#lines = text.split(lineEndingPattern);
289+
// Remove the global(`g`) and sticky(`y`) flags from the `lineEndingPattern` to avoid issues with lastIndex.
290+
this.#lineEndingPattern = new RegExp(
291+
lineEndingPattern.source,
292+
lineEndingPattern.flags.replace(/[gy]/gu, ""),
293+
);
294+
}
295+
296+
/**
297+
* Finds the next line in the source text and updates `#lines` and `#lineStartIndices`.
298+
* @param {string} text The text to search for the next line.
299+
* @returns {boolean} `true` if a next line was found, `false` otherwise.
300+
*/
301+
#findNextLine(text) {
302+
const match = this.#lineEndingPattern.exec(text);
303+
304+
if (!match) {
305+
return false;
306+
}
307+
308+
this.#lines.push(text.slice(0, match.index));
309+
this.#lineStartIndices.push(
310+
(this.#lineStartIndices.at(-1) ?? 0) +
311+
match.index +
312+
match[0].length,
313+
);
314+
315+
return true;
316+
}
317+
318+
/**
319+
* Ensures `#lines` is lazily calculated from the source text.
320+
* @returns {void}
321+
*/
322+
#ensureLines() {
323+
// If `#lines` has already been calculated, do nothing.
324+
if (this.#lines.length === this.#lineStartIndices.length) {
325+
return;
326+
}
327+
328+
while (
329+
this.#findNextLine(this.text.slice(this.#lineStartIndices.at(-1)))
330+
) {
331+
// Continue parsing until no more matches are found.
332+
}
333+
334+
this.#lines.push(this.text.slice(this.#lineStartIndices.at(-1)));
335+
336+
Object.freeze(this.#lines);
337+
}
338+
339+
/**
340+
* Ensures `#lineStartIndices` is lazily calculated up to the specified index.
341+
* @param {number} index The index of a character in a file.
342+
* @returns {void}
343+
*/
344+
#ensureLineStartIndicesFromIndex(index) {
345+
// If we've already parsed up to or beyond this index, do nothing.
346+
if (index <= (this.#lineStartIndices.at(-1) ?? 0)) {
347+
return;
348+
}
349+
350+
while (
351+
index > (this.#lineStartIndices.at(-1) ?? 0) &&
352+
this.#findNextLine(this.text.slice(this.#lineStartIndices.at(-1)))
353+
) {
354+
// Continue parsing until no more matches are found.
355+
}
356+
}
357+
358+
/**
359+
* Ensures `#lineStartIndices` is lazily calculated up to the specified loc.
360+
* @param {Object} loc A line/column location.
361+
* @param {number} loc.line The line number of the location. (0 or 1-indexed based on language.)
362+
* @param {number} lineStart The line number at which the parser starts counting.
363+
* @returns {void}
364+
*/
365+
#ensureLineStartIndicesFromLoc(loc, lineStart) {
366+
// Calculate line indices up to the potentially next line, as it is needed for the follow‑up calculation.
367+
const nextLocLineIndex = loc.line - lineStart + 1;
368+
const lastCalculatedLineIndex = this.#lineStartIndices.length - 1;
369+
let additionalLinesNeeded = nextLocLineIndex - lastCalculatedLineIndex;
370+
371+
// If we've already parsed up to or beyond this line, do nothing.
372+
if (additionalLinesNeeded <= 0) {
373+
return;
374+
}
375+
376+
while (
377+
additionalLinesNeeded > 0 &&
378+
this.#findNextLine(this.text.slice(this.#lineStartIndices.at(-1)))
379+
) {
380+
// Continue parsing until no more matches are found or we have enough lines.
381+
additionalLinesNeeded -= 1;
382+
}
252383
}
253384

254385
/**
@@ -271,6 +402,135 @@ export class TextSourceCodeBase {
271402
);
272403
}
273404

405+
/**
406+
* Converts a source text index into a `{ line: number, column: number }` pair.
407+
* @param {number} index The index of a character in a file.
408+
* @throws {TypeError|RangeError} If non-numeric index or index out of range.
409+
* @returns {{line: number, column: number}} A `{ line: number, column: number }` location object with 0 or 1-indexed line and 0 or 1-indexed column based on language.
410+
* @public
411+
*/
412+
getLocFromIndex(index) {
413+
if (typeof index !== "number") {
414+
throw new TypeError("Expected `index` to be a number.");
415+
}
416+
417+
if (index < 0 || index > this.text.length) {
418+
throw new RangeError(
419+
`Index out of range (requested index ${index}, but source text has length ${this.text.length}).`,
420+
);
421+
}
422+
423+
const {
424+
start: { line: lineStart, column: columnStart },
425+
end: { line: lineEnd, column: columnEnd },
426+
} = this.getLoc(this.ast);
427+
428+
// If the index is at the start, return the start location of the root node.
429+
if (index === 0) {
430+
return {
431+
line: lineStart,
432+
column: columnStart,
433+
};
434+
}
435+
436+
// If the index is `this.text.length`, return the location one "spot" past the last character of the file.
437+
if (index === this.text.length) {
438+
return {
439+
line: lineEnd,
440+
column: columnEnd,
441+
};
442+
}
443+
444+
// Ensure `#lineStartIndices` are lazily calculated.
445+
this.#ensureLineStartIndicesFromIndex(index);
446+
447+
/*
448+
* To figure out which line `index` is on, determine the last place at which index could
449+
* be inserted into `#lineStartIndices` to keep the list sorted.
450+
*/
451+
const lineNumber =
452+
(index >= (this.#lineStartIndices.at(-1) ?? 0)
453+
? this.#lineStartIndices.length
454+
: findLineNumberBinarySearch(this.#lineStartIndices, index)) -
455+
1 +
456+
lineStart;
457+
458+
return {
459+
line: lineNumber,
460+
column:
461+
index -
462+
this.#lineStartIndices[lineNumber - lineStart] +
463+
columnStart,
464+
};
465+
}
466+
467+
/**
468+
* Converts a `{ line: number, column: number }` pair into a source text index.
469+
* @param {Object} loc A line/column location.
470+
* @param {number} loc.line The line number of the location. (0 or 1-indexed based on language.)
471+
* @param {number} loc.column The column number of the location. (0 or 1-indexed based on language.)
472+
* @throws {TypeError|RangeError} If `loc` is not an object with a numeric
473+
* `line` and `column`, if the `line` is less than or equal to zero or
474+
* the `line` or `column` is out of the expected range.
475+
* @returns {number} The index of the line/column location in a file.
476+
* @public
477+
*/
478+
getIndexFromLoc(loc) {
479+
if (
480+
loc === null ||
481+
typeof loc !== "object" ||
482+
typeof loc.line !== "number" ||
483+
typeof loc.column !== "number"
484+
) {
485+
throw new TypeError(
486+
"Expected `loc` to be an object with numeric `line` and `column` properties.",
487+
);
488+
}
489+
490+
const {
491+
start: { line: lineStart, column: columnStart },
492+
end: { line: lineEnd, column: columnEnd },
493+
} = this.getLoc(this.ast);
494+
495+
if (loc.line < lineStart || lineEnd < loc.line) {
496+
throw new RangeError(
497+
`Line number out of range (line ${loc.line} requested). Valid range: ${lineStart}-${lineEnd}`,
498+
);
499+
}
500+
501+
// If the loc is at the start, return the start index of the root node.
502+
if (loc.line === lineStart && loc.column === columnStart) {
503+
return 0;
504+
}
505+
506+
// If the loc is at the end, return the index one "spot" past the last character of the file.
507+
if (loc.line === lineEnd && loc.column === columnEnd) {
508+
return this.text.length;
509+
}
510+
511+
// Ensure `#lineStartIndices` are lazily calculated.
512+
this.#ensureLineStartIndicesFromLoc(loc, lineStart);
513+
514+
const isLastLine = loc.line === lineEnd;
515+
const lineStartIndex = this.#lineStartIndices[loc.line - lineStart];
516+
const lineEndIndex = isLastLine
517+
? this.text.length
518+
: this.#lineStartIndices[loc.line - lineStart + 1];
519+
const positionIndex = lineStartIndex + loc.column - columnStart;
520+
521+
if (
522+
loc.column < columnStart ||
523+
(isLastLine && positionIndex > lineEndIndex) ||
524+
(!isLastLine && positionIndex >= lineEndIndex)
525+
) {
526+
throw new RangeError(
527+
`Column number out of range (column ${loc.column} requested). Valid range for line ${loc.line}: ${columnStart}-${lineEndIndex - lineStartIndex + columnStart + (isLastLine ? 0 : -1)}`,
528+
);
529+
}
530+
531+
return positionIndex;
532+
}
533+
274534
/**
275535
* Returns the range information for the given node or token.
276536
* @param {Options['SyntaxElementWithLoc']} nodeOrToken The node or token to get the range information for.
@@ -356,6 +616,8 @@ export class TextSourceCodeBase {
356616
* @public
357617
*/
358618
get lines() {
619+
this.#ensureLines(); // Ensure `#lines` is lazily calculated.
620+
359621
return this.#lines;
360622
}
361623

0 commit comments

Comments
 (0)