From d491e2e7502f34a7c1b4dc7b2fb9c9de44280002 Mon Sep 17 00:00:00 2001 From: Henning Dieterichs Date: Tue, 14 Feb 2023 17:02:13 +0100 Subject: [PATCH 1/3] Refactors tokenization to support custom background tokenizer. --- src/vs/editor/common/languages.ts | 73 ++-- .../bracketPairsTree/bracketPairsTree.ts | 7 +- src/vs/editor/common/model/textModelTokens.ts | 407 +++++++++++------- .../common/model/tokenizationTextModelPart.ts | 145 +++---- .../common/tokenizationTextModelPart.ts | 5 +- .../common/tokens/contiguousTokensStore.ts | 4 + src/vs/monaco.d.ts | 12 +- 7 files changed, 381 insertions(+), 272 deletions(-) diff --git a/src/vs/editor/common/languages.ts b/src/vs/editor/common/languages.ts index 178a66a4727df..40bbb50e01a63 100644 --- a/src/vs/editor/common/languages.ts +++ b/src/vs/editor/common/languages.ts @@ -19,6 +19,7 @@ import { Selection } from 'vs/editor/common/core/selection'; import { LanguageId } from 'vs/editor/common/encodedTokenAttributes'; import * as model from 'vs/editor/common/model'; import { TokenizationRegistry as TokenizationRegistryImpl } from 'vs/editor/common/tokenizationRegistry'; +import { ContiguousMultilineTokens } from 'vs/editor/common/tokens/contiguousMultilineTokens'; import { ExtensionIdentifier } from 'vs/platform/extensions/common/extensions'; import { IMarkerData } from 'vs/platform/markers/common/markers'; @@ -33,14 +34,11 @@ export interface ILanguageIdCodec { export class Token { _tokenBrand: void = undefined; - public readonly offset: number; - public readonly type: string; - public readonly language: string; - - constructor(offset: number, type: string, language: string) { - this.offset = offset; - this.type = type; - this.language = language; + constructor( + public readonly offset: number, + public readonly type: string, + public readonly language: string, + ) { } public toString(): string { @@ -54,12 +52,10 @@ export class Token { export class TokenizationResult { _tokenizationResultBrand: void = undefined; - public readonly tokens: Token[]; - public readonly endState: IState; - - constructor(tokens: Token[], endState: IState) { - this.tokens = tokens; - this.endState = endState; + constructor( + public readonly tokens: Token[], + public readonly endState: IState, + ) { } } @@ -69,21 +65,30 @@ export class TokenizationResult { export class EncodedTokenizationResult { _encodedTokenizationResultBrand: void = undefined; + constructor( + /** + * The tokens in binary format. Each token occupies two array indices. For token i: + * - at offset 2*i => startIndex + * - at offset 2*i + 1 => metadata + * + */ + public readonly tokens: Uint32Array, + public readonly endState: IState, + ) { + } +} + +export interface IBackgroundTokenizer extends IDisposable { /** - * The tokens in binary format. Each token occupies two array indices. For token i: - * - at offset 2*i => startIndex - * - at offset 2*i + 1 => metadata + * Instructs the background tokenizer to set the tokens for the given range again. * + * This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport, + * when the change does not even propagate to that viewport. */ - public readonly tokens: Uint32Array; - public readonly endState: IState; - - constructor(tokens: Uint32Array, endState: IState) { - this.tokens = tokens; - this.endState = endState; - } + requestTokens(startLineNumber: number, endLineNumberExclusive: number): void; } + /** * @internal */ @@ -94,6 +99,26 @@ export interface ITokenizationSupport { tokenize(line: string, hasEOL: boolean, state: IState): TokenizationResult; tokenizeEncoded(line: string, hasEOL: boolean, state: IState): EncodedTokenizationResult; + + /** + * Can be/return undefined if default background tokenization should be used. + */ + createBackgroundTokenizer?(textModel: model.ITextModel, store: IBackgroundTokenizationStore): IBackgroundTokenizer | undefined; +} + +/** + * @internal + */ +export interface IBackgroundTokenizationStore { + setTokens(tokens: ContiguousMultilineTokens[]): void; + + setEndState(lineNumber: number, state: IState): void; + + /** + * Should be called to indicate that the background tokenization has finished for now. + * (This triggers bracket pair colorization to re-parse the bracket pairs with token information) + */ + backgroundTokenizationFinished(): void; } /** diff --git a/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts b/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts index 0a71c74eb5616..215de67ef9941 100644 --- a/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts +++ b/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts @@ -56,8 +56,7 @@ export class BracketPairsTree extends Disposable { ) { super(); - if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.Uninitialized) { - // There are no token information yet + if (!textModel.tokenization.hasTokens) { const brackets = this.brackets.getSingleLanguageBracketTokens(this.textModel.getLanguageId()); const tokenizer = new FastTokenizer(this.textModel.getValue(), brackets); this.initialAstWithoutTokens = parseDocument(tokenizer, [], undefined, true); @@ -67,7 +66,8 @@ export class BracketPairsTree extends Disposable { // Directly create the tree with token information. this.initialAstWithoutTokens = undefined; this.astWithTokens = this.parseDocumentFromTextBuffer([], undefined, false); - } else if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.InProgress) { + } else { + // We missed some token changes already, so we cannot use the fast tokenizer + delta increments this.initialAstWithoutTokens = this.parseDocumentFromTextBuffer([], undefined, true); this.astWithTokens = this.initialAstWithoutTokens; } @@ -103,6 +103,7 @@ export class BracketPairsTree extends Disposable { } public handleContentChanged(change: IModelContentChangedEvent) { + // Must be sorted in ascending order const edits = change.changes.map(c => { const range = Range.lift(c.range); return new TextEditInfo( diff --git a/src/vs/editor/common/model/textModelTokens.ts b/src/vs/editor/common/model/textModelTokens.ts index 120da61f3f531..f801d2ef4e6a5 100644 --- a/src/vs/editor/common/model/textModelTokens.ts +++ b/src/vs/editor/common/model/textModelTokens.ts @@ -4,22 +4,23 @@ *--------------------------------------------------------------------------------------------*/ import * as arrays from 'vs/base/common/arrays'; -import { onUnexpectedError } from 'vs/base/common/errors'; -import { LineTokens } from 'vs/editor/common/tokens/lineTokens'; +import { IdleDeadline, runWhenIdle } from 'vs/base/common/async'; +import { BugIndicatingError, onUnexpectedError } from 'vs/base/common/errors'; +import { Disposable, MutableDisposable } from 'vs/base/common/lifecycle'; +import { setTimeout0 } from 'vs/base/common/platform'; +import { StopWatch } from 'vs/base/common/stopwatch'; +import { countEOL } from 'vs/editor/common/core/eolCounter'; import { Position } from 'vs/editor/common/core/position'; import { IRange } from 'vs/editor/common/core/range'; -import { EncodedTokenizationResult, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages'; import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes'; +import { EncodedTokenizationResult, IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages'; import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize'; +import { ITextModel } from 'vs/editor/common/model'; import { TextModel } from 'vs/editor/common/model/textModel'; -import { Disposable } from 'vs/base/common/lifecycle'; -import { StopWatch } from 'vs/base/common/stopwatch'; -import { countEOL } from 'vs/editor/common/core/eolCounter'; -import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder'; -import { runWhenIdle, IdleDeadline } from 'vs/base/common/async'; -import { setTimeout0 } from 'vs/base/common/platform'; -import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents'; import { TokenizationTextModelPart } from 'vs/editor/common/model/tokenizationTextModelPart'; +import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents'; +import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder'; +import { LineTokens } from 'vs/editor/common/tokens/lineTokens'; const enum Constants { CHEAP_TOKENIZATION_LENGTH_LIMIT = 2048 @@ -51,6 +52,7 @@ class ContiguousGrowingArray { this._store[index] = value; } + // TODO have `replace` instead of `delete` and `insert` public delete(deleteIndex: number, deleteCount: number): void { if (deleteCount === 0 || deleteIndex >= this._store.length) { return; @@ -72,8 +74,8 @@ class ContiguousGrowingArray { /** * Stores the states at the start of each line and keeps track of which lines - * must be retokenized. Also uses state equality to quickly validate lines - * that don't need to be retokenized. + * must be re-tokenized. Also uses state equality to quickly validate lines + * that don't need to be re-tokenized. * * For example, when typing on a line, the line gets marked as needing to be tokenized. * Once the line is tokenized, the end state is checked for equality against the begin @@ -93,7 +95,7 @@ export class TokenizationStateStore { private readonly _lineNeedsTokenization = new ContiguousGrowingArray(true); /** * `invalidLineStartIndex` indicates that line number `invalidLineStartIndex + 1` - * is the first one that needs to be retokenized. + * is the first one that needs to be re-tokenized. */ private _firstLineNeedsTokenization: number; @@ -118,13 +120,13 @@ export class TokenizationStateStore { return this._lineBeginState.get(lineIndex); } - public setEndState(linesLength: number, lineIndex: number, endState: IState): void { + public setEndState(linesLength: number, lineIndex: number, endState: IState): boolean { this._lineNeedsTokenization.set(lineIndex, false); this._firstLineNeedsTokenization = lineIndex + 1; // Check if this was the last line if (lineIndex === linesLength - 1) { - return; + return false; } // Check if the end state has changed @@ -132,7 +134,7 @@ export class TokenizationStateStore { if (previousEndState === null || !endState.equals(previousEndState)) { this._lineBeginState.set(lineIndex + 1, endState); this.markMustBeTokenized(lineIndex + 1); - return; + return true; } // Perhaps we can skip tokenizing some lines... @@ -144,10 +146,9 @@ export class TokenizationStateStore { i++; } this._firstLineNeedsTokenization = i; + return false; } - //#region Editing - public applyEdits(range: IRange, eolCount: number): void { this.markMustBeTokenized(range.startLineNumber - 1); @@ -158,13 +159,34 @@ export class TokenizationStateStore { this._lineNeedsTokenization.insert(range.startLineNumber, eolCount); } - //#endregion + public updateTokensUntilLine(textModel: ITextModel, languageIdCodec: ILanguageIdCodec, builder: ContiguousMultilineTokensBuilder, lineNumber: number): void { + const languageId = textModel.getLanguageId(); + const linesLength = textModel.getLineCount(); + const endLineIndex = lineNumber - 1; + + // Validate all states up to and including endLineIndex + for (let lineIndex = this.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) { + const text = textModel.getLineContent(lineIndex + 1); + const lineStartState = this.getBeginState(lineIndex); + + const r = safeTokenize(languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState!); + builder.add(lineIndex + 1, r.tokens); + this.setEndState(linesLength, lineIndex, r.endState); + lineIndex = this.invalidLineStartIndex - 1; // -1 because the outer loop increments it + } + } + + isTokenizationComplete(_textModel: ITextModel): boolean { + return this.invalidLineStartIndex >= _textModel.getLineCount(); + } } export class TextModelTokenization extends Disposable { - private _tokenizationStateStore: TokenizationStateStore | null; - private _isDisposed: boolean; + private _tokenizationStateStore: TokenizationStateStore | null = null; + private _defaultBackgroundTokenizer: DefaultBackgroundTokenizer | null = null; + + private readonly backgroundTokenizer = this._register(new MutableDisposable()); constructor( private readonly _textModel: TextModel, @@ -172,8 +194,6 @@ export class TextModelTokenization extends Disposable { private readonly _languageIdCodec: ILanguageIdCodec ) { super(); - this._isDisposed = false; - this._tokenizationStateStore = null; this._register(TokenizationRegistry.onDidChange((e) => { const languageId = this._textModel.getLanguageId(); @@ -188,13 +208,6 @@ export class TextModelTokenization extends Disposable { this._resetTokenizationState(); } - public override dispose(): void { - this._isDisposed = true; - super.dispose(); - } - - //#region TextModel events - public handleDidChangeContent(e: IModelContentChangedEvent): void { if (e.isFlush) { this._resetTokenizationState(); @@ -208,11 +221,11 @@ export class TextModelTokenization extends Disposable { } } - this._beginBackgroundTokenization(); + this._defaultBackgroundTokenizer?.handleChanges(); } public handleDidChangeAttached(): void { - this._beginBackgroundTokenization(); + this._defaultBackgroundTokenizer?.handleChanges(); } public handleDidChangeLanguage(e: IModelLanguageChangedEvent): void { @@ -220,8 +233,6 @@ export class TextModelTokenization extends Disposable { this._tokenizationPart.clearTokens(); } - //#endregion - private _resetTokenizationState(): void { const [tokenizationSupport, initialState] = initializeTokenization(this._textModel, this._tokenizationPart); if (tokenizationSupport && initialState) { @@ -229,81 +240,55 @@ export class TextModelTokenization extends Disposable { } else { this._tokenizationStateStore = null; } - this._beginBackgroundTokenization(); - } - - private _isScheduled = false; - private _beginBackgroundTokenization(): void { - if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) { - return; - } - - this._isScheduled = true; - runWhenIdle((deadline) => { - this._isScheduled = false; - - this._backgroundTokenizeWithDeadline(deadline); - }); - } - - /** - * Tokenize until the deadline occurs, but try to yield every 1-2ms. - */ - private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void { - // Read the time remaining from the `deadline` immediately because it is unclear - // if the `deadline` object will be valid after execution leaves this function. - const endTime = Date.now() + deadline.timeRemaining(); - - const execute = () => { - if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) { - // disposed in the meantime or detached or finished - return; - } - this._backgroundTokenizeForAtLeast1ms(); + this.backgroundTokenizer.clear(); - if (Date.now() < endTime) { - // There is still time before reaching the deadline, so yield to the browser and then - // continue execution - setTimeout0(execute); - } else { - // The deadline has been reached, so schedule a new idle callback if necessary - this._beginBackgroundTokenization(); + this._defaultBackgroundTokenizer = null; + if (this._tokenizationStateStore) { + const b: IBackgroundTokenizationStore = { + setTokens: (tokens) => { + this._tokenizationPart.setTokens(tokens); + }, + backgroundTokenizationFinished: () => { + this._tokenizationPart.handleBackgroundTokenizationFinished(); + }, + setEndState: (lineNumber, state) => { + if (!state) { + throw new BugIndicatingError(); + } + const invalidLineStartIndex = this._tokenizationStateStore?.invalidLineStartIndex; + if (invalidLineStartIndex !== undefined && lineNumber - 1 >= invalidLineStartIndex) { + // Don't accept states for definitely valid states + this._tokenizationStateStore?.setEndState(this._textModel.getLineCount(), lineNumber - 1, state); + } + }, + }; + + this.backgroundTokenizer.clear(); + + if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer) { + this.backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b); } - }; - execute(); - } - - /** - * Tokenize for at least 1ms. - */ - private _backgroundTokenizeForAtLeast1ms(): void { - const lineCount = this._textModel.getLineCount(); - const builder = new ContiguousMultilineTokensBuilder(); - const sw = StopWatch.create(false); - - do { - if (sw.elapsed() > 1) { - // the comparison is intentionally > 1 and not >= 1 to ensure that - // a full millisecond has elapsed, given how microseconds are rounded - // to milliseconds - break; + if (!this.backgroundTokenizer.value) { + this.backgroundTokenizer.value = this._defaultBackgroundTokenizer = + new DefaultBackgroundTokenizer( + this._textModel, + this._tokenizationStateStore!, + b, + this._languageIdCodec + ); + this._defaultBackgroundTokenizer.handleChanges(); } - - const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder); - - if (tokenizedLineNumber >= lineCount) { - break; - } - } while (this._hasLinesToTokenize()); - - this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete()); + } else { + this.backgroundTokenizer.clear(); + } } public tokenizeViewport(startLineNumber: number, endLineNumber: number): void { const builder = new ContiguousMultilineTokensBuilder(); - this._tokenizeViewport(builder, startLineNumber, endLineNumber); - this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete()); + this._heuristicallyTokenizeViewport(builder, startLineNumber, endLineNumber); + this._tokenizationPart.setTokens(builder.finalize()); + this._defaultBackgroundTokenizer?.checkFinished(); } public reset(): void { @@ -313,8 +298,9 @@ export class TextModelTokenization extends Disposable { public forceTokenization(lineNumber: number): void { const builder = new ContiguousMultilineTokensBuilder(); - this._updateTokensUntilLine(builder, lineNumber); - this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete()); + this._tokenizationStateStore?.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber); + this._tokenizationPart.setTokens(builder.finalize()); + this._defaultBackgroundTokenizer?.checkFinished(); } public getTokenTypeIfInsertingCharacter(position: Position, character: string): StandardTokenType { @@ -401,55 +387,14 @@ export class TextModelTokenization extends Disposable { return false; } - private _hasLinesToTokenize(): boolean { - if (!this._tokenizationStateStore) { - return false; - } - return (this._tokenizationStateStore.invalidLineStartIndex < this._textModel.getLineCount()); - } - - private _isTokenizationComplete(): boolean { - if (!this._tokenizationStateStore) { - return false; - } - return (this._tokenizationStateStore.invalidLineStartIndex >= this._textModel.getLineCount()); - } - - private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number { - if (!this._tokenizationStateStore || !this._hasLinesToTokenize()) { - return this._textModel.getLineCount() + 1; - } - const lineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1; - this._updateTokensUntilLine(builder, lineNumber); - return lineNumber; - } - - private _updateTokensUntilLine(builder: ContiguousMultilineTokensBuilder, lineNumber: number): void { - if (!this._tokenizationStateStore) { - return; - } - const languageId = this._textModel.getLanguageId(); - const linesLength = this._textModel.getLineCount(); - const endLineIndex = lineNumber - 1; - - // Validate all states up to and including endLineIndex - for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) { - const text = this._textModel.getLineContent(lineIndex + 1); - const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex); - - const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState!); - builder.add(lineIndex + 1, r.tokens); - this._tokenizationStateStore.setEndState(linesLength, lineIndex, r.endState); - lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it - } - } - - private _tokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void { + /** + * The result is not cached. + */ + private _heuristicallyTokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void { if (!this._tokenizationStateStore) { // nothing to do return; } - if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) { // nothing to do return; @@ -457,24 +402,38 @@ export class TextModelTokenization extends Disposable { if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) { // tokenization has reached the viewport start... - this._updateTokensUntilLine(builder, endLineNumber); + this._tokenizationStateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, endLineNumber); return; } - let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(startLineNumber); - const fakeLines: string[] = []; + let state = this.guessStartState(startLineNumber); + const languageId = this._textModel.getLanguageId(); + + for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) { + const text = this._textModel.getLineContent(lineNumber); + const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state); + builder.add(lineNumber, r.tokens); + state = r.endState; + } + // We overrode the tokens. Because old states might get reused (thus stopping invalidation), + // we have to explicitly request the tokens for this range again. + this.backgroundTokenizer.value?.requestTokens(startLineNumber, endLineNumber + 1); + } + + private guessStartState(lineNumber: number): IState { + let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(lineNumber); + const likelyRelevantLines: string[] = []; let initialState: IState | null = null; - for (let i = startLineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) { + for (let i = lineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) { const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i); - + // Ignore lines full of whitespace if (newNonWhitespaceIndex === 0) { continue; } - if (newNonWhitespaceIndex < nonWhitespaceColumn) { - fakeLines.push(this._textModel.getLineContent(i)); + likelyRelevantLines.push(this._textModel.getLineContent(i)); nonWhitespaceColumn = newNonWhitespaceIndex; - initialState = this._tokenizationStateStore.getBeginState(i - 1); + initialState = this._tokenizationStateStore!.getBeginState(i - 1); if (initialState) { break; } @@ -482,23 +441,17 @@ export class TextModelTokenization extends Disposable { } if (!initialState) { - initialState = this._tokenizationStateStore.initialState; + initialState = this._tokenizationStateStore!.initialState; } + likelyRelevantLines.reverse(); const languageId = this._textModel.getLanguageId(); let state = initialState; - for (let i = fakeLines.length - 1; i >= 0; i--) { - const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, fakeLines[i], false, state); - state = r.endState; - } - - for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) { - const text = this._textModel.getLineContent(lineNumber); - const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state); - builder.add(lineNumber, r.tokens); - this._tokenizationStateStore.markMustBeTokenized(lineNumber - 1); + for (const line of likelyRelevantLines) { + const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore!.tokenizationSupport, line, false, state); state = r.endState; } + return state; } } @@ -538,3 +491,123 @@ function safeTokenize(languageIdCodec: ILanguageIdCodec, languageId: string, tok LineTokens.convertToEndOffset(r.tokens, text.length); return r; } + +class DefaultBackgroundTokenizer implements IBackgroundTokenizer { + private _isDisposed = false; + + constructor( + private readonly _textModel: ITextModel, + private readonly _stateStore: TokenizationStateStore, + private readonly _backgroundTokenStore: IBackgroundTokenizationStore, + private readonly _languageIdCodec: ILanguageIdCodec, + ) { + } + + public dispose(): void { + this._isDisposed = true; + } + + public handleChanges(): void { + this._beginBackgroundTokenization(); + } + + private _isScheduled = false; + private _beginBackgroundTokenization(): void { + if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) { + return; + } + + this._isScheduled = true; + runWhenIdle((deadline) => { + this._isScheduled = false; + + this._backgroundTokenizeWithDeadline(deadline); + }); + } + + /** + * Tokenize until the deadline occurs, but try to yield every 1-2ms. + */ + private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void { + // Read the time remaining from the `deadline` immediately because it is unclear + // if the `deadline` object will be valid after execution leaves this function. + const endTime = Date.now() + deadline.timeRemaining(); + + const execute = () => { + if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) { + // disposed in the meantime or detached or finished + return; + } + + this._backgroundTokenizeForAtLeast1ms(); + + if (Date.now() < endTime) { + // There is still time before reaching the deadline, so yield to the browser and then + // continue execution + setTimeout0(execute); + } else { + // The deadline has been reached, so schedule a new idle callback if necessary + this._beginBackgroundTokenization(); + } + }; + execute(); + } + + /** + * Tokenize for at least 1ms. + */ + private _backgroundTokenizeForAtLeast1ms(): void { + const lineCount = this._textModel.getLineCount(); + const builder = new ContiguousMultilineTokensBuilder(); + const sw = StopWatch.create(false); + + do { + if (sw.elapsed() > 1) { + // the comparison is intentionally > 1 and not >= 1 to ensure that + // a full millisecond has elapsed, given how microseconds are rounded + // to milliseconds + break; + } + + const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder); + + if (tokenizedLineNumber >= lineCount) { + break; + } + } while (this._hasLinesToTokenize()); + + this._backgroundTokenStore.setTokens(builder.finalize()); + this.checkFinished(); + } + + private _hasLinesToTokenize(): boolean { + if (!this._stateStore) { + return false; + } + return this._stateStore.invalidLineStartIndex < this._textModel.getLineCount(); + } + + private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number { + if (!this._stateStore || !this._hasLinesToTokenize()) { + return this._textModel.getLineCount() + 1; + } + const lineNumber = this._stateStore.invalidLineStartIndex + 1; + this._stateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber); + return lineNumber; + } + + public checkFinished(): void { + if (this._isDisposed) { + return; + } + if (this._stateStore.isTokenizationComplete(this._textModel)) { + this._backgroundTokenStore.backgroundTokenizationFinished(); + } + } + + requestTokens(startLineNumber: number, endLineNumberExclusive: number): void { + for (let lineNumber = startLineNumber; lineNumber < endLineNumberExclusive; lineNumber++) { + this._stateStore.markMustBeTokenized(lineNumber - 1); + } + } +} diff --git a/src/vs/editor/common/model/tokenizationTextModelPart.ts b/src/vs/editor/common/model/tokenizationTextModelPart.ts index 378dcb5658958..04bcc3cdf741f 100644 --- a/src/vs/editor/common/model/tokenizationTextModelPart.ts +++ b/src/vs/editor/common/model/tokenizationTextModelPart.ts @@ -69,12 +69,17 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz ); } + public override dispose(): void { + this._languageRegistryListener.dispose(); + this._tokenization.dispose(); + super.dispose(); + } + _hasListeners(): boolean { return ( this._onDidChangeLanguage.hasListeners() || this._onDidChangeLanguageConfiguration.hasListeners() || this._onDidChangeTokens.hasListeners() - || this._onBackgroundTokenizationStateChanged.hasListeners() ); } @@ -104,35 +109,15 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz this._semanticTokens.flush(); } + // TODO@hediet TODO@alexdima what is the difference between this and acceptEdit? public handleDidChangeContent(change: IModelContentChangedEvent): void { this._tokenization.handleDidChangeContent(change); } - public override dispose(): void { - this._languageRegistryListener.dispose(); - this._tokenization.dispose(); - super.dispose(); - } - - private _backgroundTokenizationState = BackgroundTokenizationState.Uninitialized; + private _backgroundTokenizationState = BackgroundTokenizationState.InProgress; public get backgroundTokenizationState(): BackgroundTokenizationState { return this._backgroundTokenizationState; } - private handleTokenizationProgress(completed: boolean) { - if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) { - // We already did a full tokenization and don't go back to progressing. - return; - } - const newState = completed ? BackgroundTokenizationState.Completed : BackgroundTokenizationState.InProgress; - if (this._backgroundTokenizationState !== newState) { - this._backgroundTokenizationState = newState; - this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState(); - this._onBackgroundTokenizationStateChanged.fire(); - } - } - - private readonly _onBackgroundTokenizationStateChanged = this._register(new Emitter()); - public readonly onBackgroundTokenizationStateChanged: Event = this._onBackgroundTokenizationStateChanged.event; public setLineTokens( lineNumber: number, @@ -151,64 +136,76 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz ); } - public setTokens( - tokens: ContiguousMultilineTokens[], - backgroundTokenizationCompleted: boolean = false - ): void { - if (tokens.length !== 0) { - const ranges: { fromLineNumber: number; toLineNumber: number }[] = []; - - for (let i = 0, len = tokens.length; i < len; i++) { - const element = tokens[i]; - let minChangedLineNumber = 0; - let maxChangedLineNumber = 0; - let hasChange = false; - for ( - let lineNumber = element.startLineNumber; - lineNumber <= element.endLineNumber; - lineNumber++ - ) { - if (hasChange) { - this._tokens.setTokens( - this._languageId, - lineNumber - 1, - this._textModel.getLineLength(lineNumber), - element.getLineTokens(lineNumber), - false - ); + public handleBackgroundTokenizationFinished(): void { + if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) { + // We already did a full tokenization and don't go back to progressing. + return; + } + const newState = BackgroundTokenizationState.Completed; + this._backgroundTokenizationState = newState; + this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState(); + } + + public get hasTokens(): boolean { + return this._tokens.hasTokens; + } + + public setTokens(tokens: ContiguousMultilineTokens[]): void { + if (tokens.length === 0) { + return; + } + + const ranges: { fromLineNumber: number; toLineNumber: number }[] = []; + + for (let i = 0, len = tokens.length; i < len; i++) { + const element = tokens[i]; + let minChangedLineNumber = 0; + let maxChangedLineNumber = 0; + let hasChange = false; + for ( + let lineNumber = element.startLineNumber; + lineNumber <= element.endLineNumber; + lineNumber++ + ) { + if (hasChange) { + this._tokens.setTokens( + this._languageId, + lineNumber - 1, + this._textModel.getLineLength(lineNumber), + element.getLineTokens(lineNumber), + false + ); + maxChangedLineNumber = lineNumber; + } else { + const lineHasChange = this._tokens.setTokens( + this._languageId, + lineNumber - 1, + this._textModel.getLineLength(lineNumber), + element.getLineTokens(lineNumber), + true + ); + if (lineHasChange) { + hasChange = true; + minChangedLineNumber = lineNumber; maxChangedLineNumber = lineNumber; - } else { - const lineHasChange = this._tokens.setTokens( - this._languageId, - lineNumber - 1, - this._textModel.getLineLength(lineNumber), - element.getLineTokens(lineNumber), - true - ); - if (lineHasChange) { - hasChange = true; - minChangedLineNumber = lineNumber; - maxChangedLineNumber = lineNumber; - } } } - if (hasChange) { - ranges.push({ - fromLineNumber: minChangedLineNumber, - toLineNumber: maxChangedLineNumber, - }); - } } - - if (ranges.length > 0) { - this._emitModelTokensChangedEvent({ - tokenizationSupportChanged: false, - semanticTokensApplied: false, - ranges: ranges, + if (hasChange) { + ranges.push({ + fromLineNumber: minChangedLineNumber, + toLineNumber: maxChangedLineNumber, }); } } - this.handleTokenizationProgress(backgroundTokenizationCompleted); + + if (ranges.length > 0) { + this._emitModelTokensChangedEvent({ + tokenizationSupportChanged: false, + semanticTokensApplied: false, + ranges: ranges, + }); + } } public setSemanticTokens( diff --git a/src/vs/editor/common/tokenizationTextModelPart.ts b/src/vs/editor/common/tokenizationTextModelPart.ts index bade56184c8b5..3aa78f944a791 100644 --- a/src/vs/editor/common/tokenizationTextModelPart.ts +++ b/src/vs/editor/common/tokenizationTextModelPart.ts @@ -3,7 +3,6 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import { Event } from 'vs/base/common/event'; import { IPosition } from 'vs/editor/common/core/position'; import { Range } from 'vs/editor/common/core/range'; import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes'; @@ -20,6 +19,8 @@ export interface ITokenizationTextModelPart { */ setTokens(tokens: ContiguousMultilineTokens[]): void; + readonly hasTokens: boolean; + /** * Replaces all semantic tokens with the provided `tokens`. * @internal @@ -98,11 +99,9 @@ export interface ITokenizationTextModelPart { setLanguageId(languageId: string, source?: string): void; readonly backgroundTokenizationState: BackgroundTokenizationState; - readonly onBackgroundTokenizationStateChanged: Event; } export const enum BackgroundTokenizationState { - Uninitialized = 0, InProgress = 1, Completed = 2, } diff --git a/src/vs/editor/common/tokens/contiguousTokensStore.ts b/src/vs/editor/common/tokens/contiguousTokensStore.ts index aa2c14214913f..72d4afc3ccb28 100644 --- a/src/vs/editor/common/tokens/contiguousTokensStore.ts +++ b/src/vs/editor/common/tokens/contiguousTokensStore.ts @@ -30,6 +30,10 @@ export class ContiguousTokensStore { this._len = 0; } + get hasTokens(): boolean { + return this._lineTokens.length > 0; + } + public getTokens(topLevelLanguageId: string, lineIndex: number, lineText: string): LineTokens { let rawLineTokens: Uint32Array | ArrayBuffer | null = null; if (lineIndex < this._len) { diff --git a/src/vs/monaco.d.ts b/src/vs/monaco.d.ts index 1a565a2c39933..f8c008e67db86 100644 --- a/src/vs/monaco.d.ts +++ b/src/vs/monaco.d.ts @@ -875,10 +875,10 @@ declare namespace monaco { } export class Token { - _tokenBrand: void; readonly offset: number; readonly type: string; readonly language: string; + _tokenBrand: void; constructor(offset: number, type: string, language: string); toString(): string; } @@ -6409,6 +6409,16 @@ declare namespace monaco.languages { removeText?: number; } + export interface IBackgroundTokenizer extends IDisposable { + /** + * Instructs the background tokenizer to set the tokens for the given range again. + * + * This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport, + * when the change does not even propagate to that viewport. + */ + requestTokens(startLineNumber: number, endLineNumberExclusive: number): void; + } + /** * The state of the tokenizer between two lines. * It is useful to store flags such as in multiline comment, etc. From c63b9a6f13230cf3bce5ee5bb99233ed9d7b31e3 Mon Sep 17 00:00:00 2001 From: Henning Dieterichs Date: Wed, 15 Feb 2023 14:56:31 +0100 Subject: [PATCH 2/3] Resolves PR conversations --- src/vs/editor/common/languages.ts | 3 +++ src/vs/editor/common/model/textModelTokens.ts | 10 +++------- .../common/model/tokenizationTextModelPart.ts | 15 ++++----------- src/vs/monaco.d.ts | 10 ---------- 4 files changed, 10 insertions(+), 28 deletions(-) diff --git a/src/vs/editor/common/languages.ts b/src/vs/editor/common/languages.ts index 40bbb50e01a63..ca0bb68c6bdf8 100644 --- a/src/vs/editor/common/languages.ts +++ b/src/vs/editor/common/languages.ts @@ -78,6 +78,9 @@ export class EncodedTokenizationResult { } } +/** + * @internal + */ export interface IBackgroundTokenizer extends IDisposable { /** * Instructs the background tokenizer to set the tokens for the given range again. diff --git a/src/vs/editor/common/model/textModelTokens.ts b/src/vs/editor/common/model/textModelTokens.ts index f801d2ef4e6a5..ebee591a042ce 100644 --- a/src/vs/editor/common/model/textModelTokens.ts +++ b/src/vs/editor/common/model/textModelTokens.ts @@ -176,8 +176,8 @@ export class TokenizationStateStore { } } - isTokenizationComplete(_textModel: ITextModel): boolean { - return this.invalidLineStartIndex >= _textModel.getLineCount(); + isTokenizationComplete(textModel: ITextModel): boolean { + return this.invalidLineStartIndex >= textModel.getLineCount(); } } @@ -264,8 +264,6 @@ export class TextModelTokenization extends Disposable { }, }; - this.backgroundTokenizer.clear(); - if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer) { this.backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b); } @@ -273,14 +271,12 @@ export class TextModelTokenization extends Disposable { this.backgroundTokenizer.value = this._defaultBackgroundTokenizer = new DefaultBackgroundTokenizer( this._textModel, - this._tokenizationStateStore!, + this._tokenizationStateStore, b, this._languageIdCodec ); this._defaultBackgroundTokenizer.handleChanges(); } - } else { - this.backgroundTokenizer.clear(); } } diff --git a/src/vs/editor/common/model/tokenizationTextModelPart.ts b/src/vs/editor/common/model/tokenizationTextModelPart.ts index 04bcc3cdf741f..efe4f6cae54ff 100644 --- a/src/vs/editor/common/model/tokenizationTextModelPart.ts +++ b/src/vs/editor/common/model/tokenizationTextModelPart.ts @@ -34,7 +34,6 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz private readonly _onDidChangeTokens: Emitter = this._register(new Emitter()); public readonly onDidChangeTokens: Event = this._onDidChangeTokens.event; - private readonly _languageRegistryListener: IDisposable; private readonly _tokens: ContiguousTokensStore; private readonly _semanticTokens: SparseTokensStore; private readonly _tokenization: TextModelTokenization; @@ -54,25 +53,19 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz this._semanticTokens = new SparseTokensStore( this._languageService.languageIdCodec ); - this._tokenization = new TextModelTokenization( + this._tokenization = this._register(new TextModelTokenization( _textModel, this, this._languageService.languageIdCodec - ); + )); - this._languageRegistryListener = this._languageConfigurationService.onDidChange( + this._languageRegistryListener = this._register(this._languageConfigurationService.onDidChange( e => { if (e.affects(this._languageId)) { this._onDidChangeLanguageConfiguration.fire({}); } } - ); - } - - public override dispose(): void { - this._languageRegistryListener.dispose(); - this._tokenization.dispose(); - super.dispose(); + )); } _hasListeners(): boolean { diff --git a/src/vs/monaco.d.ts b/src/vs/monaco.d.ts index f8c008e67db86..e8df5c79bdad5 100644 --- a/src/vs/monaco.d.ts +++ b/src/vs/monaco.d.ts @@ -6409,16 +6409,6 @@ declare namespace monaco.languages { removeText?: number; } - export interface IBackgroundTokenizer extends IDisposable { - /** - * Instructs the background tokenizer to set the tokens for the given range again. - * - * This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport, - * when the change does not even propagate to that viewport. - */ - requestTokens(startLineNumber: number, endLineNumberExclusive: number): void; - } - /** * The state of the tokenizer between two lines. * It is useful to store flags such as in multiline comment, etc. From 6c123a76b44824a638c0028e329b496f71536057 Mon Sep 17 00:00:00 2001 From: Henning Dieterichs Date: Wed, 15 Feb 2023 14:59:52 +0100 Subject: [PATCH 3/3] Fixes CI --- src/vs/editor/common/model/tokenizationTextModelPart.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vs/editor/common/model/tokenizationTextModelPart.ts b/src/vs/editor/common/model/tokenizationTextModelPart.ts index efe4f6cae54ff..603c021408747 100644 --- a/src/vs/editor/common/model/tokenizationTextModelPart.ts +++ b/src/vs/editor/common/model/tokenizationTextModelPart.ts @@ -5,7 +5,6 @@ import { Emitter, Event } from 'vs/base/common/event'; import { CharCode } from 'vs/base/common/charCode'; -import { IDisposable } from 'vs/base/common/lifecycle'; import { IPosition, Position } from 'vs/editor/common/core/position'; import { IRange, Range } from 'vs/editor/common/core/range'; import { getWordAtText, IWordAtPosition } from 'vs/editor/common/core/wordHelper'; @@ -59,7 +58,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz this._languageService.languageIdCodec )); - this._languageRegistryListener = this._register(this._languageConfigurationService.onDidChange( + this._register(this._languageConfigurationService.onDidChange( e => { if (e.affects(this._languageId)) { this._onDidChangeLanguageConfiguration.fire({});