From d491e2e7502f34a7c1b4dc7b2fb9c9de44280002 Mon Sep 17 00:00:00 2001
From: Henning Dieterichs <hdieterichs@microsoft.com>
Date: Tue, 14 Feb 2023 17:02:13 +0100
Subject: [PATCH 1/3] Refactors tokenization to support custom background
 tokenizer.

---
 src/vs/editor/common/languages.ts             |  73 ++--
 .../bracketPairsTree/bracketPairsTree.ts      |   7 +-
 src/vs/editor/common/model/textModelTokens.ts | 407 +++++++++++-------
 .../common/model/tokenizationTextModelPart.ts | 145 +++----
 .../common/tokenizationTextModelPart.ts       |   5 +-
 .../common/tokens/contiguousTokensStore.ts    |   4 +
 src/vs/monaco.d.ts                            |  12 +-
 7 files changed, 381 insertions(+), 272 deletions(-)

diff --git a/src/vs/editor/common/languages.ts b/src/vs/editor/common/languages.ts
index 178a66a4727df..40bbb50e01a63 100644
--- a/src/vs/editor/common/languages.ts
+++ b/src/vs/editor/common/languages.ts
@@ -19,6 +19,7 @@ import { Selection } from 'vs/editor/common/core/selection';
 import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';
 import * as model from 'vs/editor/common/model';
 import { TokenizationRegistry as TokenizationRegistryImpl } from 'vs/editor/common/tokenizationRegistry';
+import { ContiguousMultilineTokens } from 'vs/editor/common/tokens/contiguousMultilineTokens';
 import { ExtensionIdentifier } from 'vs/platform/extensions/common/extensions';
 import { IMarkerData } from 'vs/platform/markers/common/markers';
 
@@ -33,14 +34,11 @@ export interface ILanguageIdCodec {
 export class Token {
 	_tokenBrand: void = undefined;
 
-	public readonly offset: number;
-	public readonly type: string;
-	public readonly language: string;
-
-	constructor(offset: number, type: string, language: string) {
-		this.offset = offset;
-		this.type = type;
-		this.language = language;
+	constructor(
+		public readonly offset: number,
+		public readonly type: string,
+		public readonly language: string,
+	) {
 	}
 
 	public toString(): string {
@@ -54,12 +52,10 @@ export class Token {
 export class TokenizationResult {
 	_tokenizationResultBrand: void = undefined;
 
-	public readonly tokens: Token[];
-	public readonly endState: IState;
-
-	constructor(tokens: Token[], endState: IState) {
-		this.tokens = tokens;
-		this.endState = endState;
+	constructor(
+		public readonly tokens: Token[],
+		public readonly endState: IState,
+	) {
 	}
 }
 
@@ -69,21 +65,30 @@ export class TokenizationResult {
 export class EncodedTokenizationResult {
 	_encodedTokenizationResultBrand: void = undefined;
 
+	constructor(
+		/**
+		 * The tokens in binary format. Each token occupies two array indices. For token i:
+		 *  - at offset 2*i => startIndex
+		 *  - at offset 2*i + 1 => metadata
+		 *
+		 */
+		public readonly tokens: Uint32Array,
+		public readonly endState: IState,
+	) {
+	}
+}
+
+export interface IBackgroundTokenizer extends IDisposable {
 	/**
-	 * The tokens in binary format. Each token occupies two array indices. For token i:
-	 *  - at offset 2*i => startIndex
-	 *  - at offset 2*i + 1 => metadata
+	 * Instructs the background tokenizer to set the tokens for the given range again.
 	 *
+	 * This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport,
+	 * when the change does not even propagate to that viewport.
 	 */
-	public readonly tokens: Uint32Array;
-	public readonly endState: IState;
-
-	constructor(tokens: Uint32Array, endState: IState) {
-		this.tokens = tokens;
-		this.endState = endState;
-	}
+	requestTokens(startLineNumber: number, endLineNumberExclusive: number): void;
 }
 
+
 /**
  * @internal
  */
@@ -94,6 +99,26 @@ export interface ITokenizationSupport {
 	tokenize(line: string, hasEOL: boolean, state: IState): TokenizationResult;
 
 	tokenizeEncoded(line: string, hasEOL: boolean, state: IState): EncodedTokenizationResult;
+
+	/**
+	 * Can be/return undefined if default background tokenization should be used.
+	 */
+	createBackgroundTokenizer?(textModel: model.ITextModel, store: IBackgroundTokenizationStore): IBackgroundTokenizer | undefined;
+}
+
+/**
+ * @internal
+ */
+export interface IBackgroundTokenizationStore {
+	setTokens(tokens: ContiguousMultilineTokens[]): void;
+
+	setEndState(lineNumber: number, state: IState): void;
+
+	/**
+	 * Should be called to indicate that the background tokenization has finished for now.
+	 * (This triggers bracket pair colorization to re-parse the bracket pairs with token information)
+	 */
+	backgroundTokenizationFinished(): void;
 }
 
 /**
diff --git a/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts b/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts
index 0a71c74eb5616..215de67ef9941 100644
--- a/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts
+++ b/src/vs/editor/common/model/bracketPairsTextModelPart/bracketPairsTree/bracketPairsTree.ts
@@ -56,8 +56,7 @@ export class BracketPairsTree extends Disposable {
 	) {
 		super();
 
-		if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.Uninitialized) {
-			// There are no token information yet
+		if (!textModel.tokenization.hasTokens) {
 			const brackets = this.brackets.getSingleLanguageBracketTokens(this.textModel.getLanguageId());
 			const tokenizer = new FastTokenizer(this.textModel.getValue(), brackets);
 			this.initialAstWithoutTokens = parseDocument(tokenizer, [], undefined, true);
@@ -67,7 +66,8 @@ export class BracketPairsTree extends Disposable {
 			// Directly create the tree with token information.
 			this.initialAstWithoutTokens = undefined;
 			this.astWithTokens = this.parseDocumentFromTextBuffer([], undefined, false);
-		} else if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.InProgress) {
+		} else {
+			// We missed some token changes already, so we cannot use the fast tokenizer + delta increments
 			this.initialAstWithoutTokens = this.parseDocumentFromTextBuffer([], undefined, true);
 			this.astWithTokens = this.initialAstWithoutTokens;
 		}
@@ -103,6 +103,7 @@ export class BracketPairsTree extends Disposable {
 	}
 
 	public handleContentChanged(change: IModelContentChangedEvent) {
+		// Must be sorted in ascending order
 		const edits = change.changes.map(c => {
 			const range = Range.lift(c.range);
 			return new TextEditInfo(
diff --git a/src/vs/editor/common/model/textModelTokens.ts b/src/vs/editor/common/model/textModelTokens.ts
index 120da61f3f531..f801d2ef4e6a5 100644
--- a/src/vs/editor/common/model/textModelTokens.ts
+++ b/src/vs/editor/common/model/textModelTokens.ts
@@ -4,22 +4,23 @@
  *--------------------------------------------------------------------------------------------*/
 
 import * as arrays from 'vs/base/common/arrays';
-import { onUnexpectedError } from 'vs/base/common/errors';
-import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
+import { IdleDeadline, runWhenIdle } from 'vs/base/common/async';
+import { BugIndicatingError, onUnexpectedError } from 'vs/base/common/errors';
+import { Disposable, MutableDisposable } from 'vs/base/common/lifecycle';
+import { setTimeout0 } from 'vs/base/common/platform';
+import { StopWatch } from 'vs/base/common/stopwatch';
+import { countEOL } from 'vs/editor/common/core/eolCounter';
 import { Position } from 'vs/editor/common/core/position';
 import { IRange } from 'vs/editor/common/core/range';
-import { EncodedTokenizationResult, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages';
 import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
+import { EncodedTokenizationResult, IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages';
 import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize';
+import { ITextModel } from 'vs/editor/common/model';
 import { TextModel } from 'vs/editor/common/model/textModel';
-import { Disposable } from 'vs/base/common/lifecycle';
-import { StopWatch } from 'vs/base/common/stopwatch';
-import { countEOL } from 'vs/editor/common/core/eolCounter';
-import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
-import { runWhenIdle, IdleDeadline } from 'vs/base/common/async';
-import { setTimeout0 } from 'vs/base/common/platform';
-import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
 import { TokenizationTextModelPart } from 'vs/editor/common/model/tokenizationTextModelPart';
+import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
+import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
+import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
 
 const enum Constants {
 	CHEAP_TOKENIZATION_LENGTH_LIMIT = 2048
@@ -51,6 +52,7 @@ class ContiguousGrowingArray<T> {
 		this._store[index] = value;
 	}
 
+	// TODO have `replace` instead of `delete` and `insert`
 	public delete(deleteIndex: number, deleteCount: number): void {
 		if (deleteCount === 0 || deleteIndex >= this._store.length) {
 			return;
@@ -72,8 +74,8 @@ class ContiguousGrowingArray<T> {
 
 /**
  * Stores the states at the start of each line and keeps track of which lines
- * must be retokenized. Also uses state equality to quickly validate lines
- * that don't need to be retokenized.
+ * must be re-tokenized. Also uses state equality to quickly validate lines
+ * that don't need to be re-tokenized.
  *
  * For example, when typing on a line, the line gets marked as needing to be tokenized.
  * Once the line is tokenized, the end state is checked for equality against the begin
@@ -93,7 +95,7 @@ export class TokenizationStateStore {
 	private readonly _lineNeedsTokenization = new ContiguousGrowingArray<boolean>(true);
 	/**
 	 * `invalidLineStartIndex` indicates that line number `invalidLineStartIndex + 1`
-	 *  is the first one that needs to be retokenized.
+	 *  is the first one that needs to be re-tokenized.
 	 */
 	private _firstLineNeedsTokenization: number;
 
@@ -118,13 +120,13 @@ export class TokenizationStateStore {
 		return this._lineBeginState.get(lineIndex);
 	}
 
-	public setEndState(linesLength: number, lineIndex: number, endState: IState): void {
+	public setEndState(linesLength: number, lineIndex: number, endState: IState): boolean {
 		this._lineNeedsTokenization.set(lineIndex, false);
 		this._firstLineNeedsTokenization = lineIndex + 1;
 
 		// Check if this was the last line
 		if (lineIndex === linesLength - 1) {
-			return;
+			return false;
 		}
 
 		// Check if the end state has changed
@@ -132,7 +134,7 @@ export class TokenizationStateStore {
 		if (previousEndState === null || !endState.equals(previousEndState)) {
 			this._lineBeginState.set(lineIndex + 1, endState);
 			this.markMustBeTokenized(lineIndex + 1);
-			return;
+			return true;
 		}
 
 		// Perhaps we can skip tokenizing some lines...
@@ -144,10 +146,9 @@ export class TokenizationStateStore {
 			i++;
 		}
 		this._firstLineNeedsTokenization = i;
+		return false;
 	}
 
-	//#region Editing
-
 	public applyEdits(range: IRange, eolCount: number): void {
 		this.markMustBeTokenized(range.startLineNumber - 1);
 
@@ -158,13 +159,34 @@ export class TokenizationStateStore {
 		this._lineNeedsTokenization.insert(range.startLineNumber, eolCount);
 	}
 
-	//#endregion
+	public updateTokensUntilLine(textModel: ITextModel, languageIdCodec: ILanguageIdCodec, builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
+		const languageId = textModel.getLanguageId();
+		const linesLength = textModel.getLineCount();
+		const endLineIndex = lineNumber - 1;
+
+		// Validate all states up to and including endLineIndex
+		for (let lineIndex = this.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
+			const text = textModel.getLineContent(lineIndex + 1);
+			const lineStartState = this.getBeginState(lineIndex);
+
+			const r = safeTokenize(languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState!);
+			builder.add(lineIndex + 1, r.tokens);
+			this.setEndState(linesLength, lineIndex, r.endState);
+			lineIndex = this.invalidLineStartIndex - 1; // -1 because the outer loop increments it
+		}
+	}
+
+	isTokenizationComplete(_textModel: ITextModel): boolean {
+		return this.invalidLineStartIndex >= _textModel.getLineCount();
+	}
 }
 
 export class TextModelTokenization extends Disposable {
 
-	private _tokenizationStateStore: TokenizationStateStore | null;
-	private _isDisposed: boolean;
+	private _tokenizationStateStore: TokenizationStateStore | null = null;
+	private _defaultBackgroundTokenizer: DefaultBackgroundTokenizer | null = null;
+
+	private readonly backgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
 
 	constructor(
 		private readonly _textModel: TextModel,
@@ -172,8 +194,6 @@ export class TextModelTokenization extends Disposable {
 		private readonly _languageIdCodec: ILanguageIdCodec
 	) {
 		super();
-		this._isDisposed = false;
-		this._tokenizationStateStore = null;
 
 		this._register(TokenizationRegistry.onDidChange((e) => {
 			const languageId = this._textModel.getLanguageId();
@@ -188,13 +208,6 @@ export class TextModelTokenization extends Disposable {
 		this._resetTokenizationState();
 	}
 
-	public override dispose(): void {
-		this._isDisposed = true;
-		super.dispose();
-	}
-
-	//#region TextModel events
-
 	public handleDidChangeContent(e: IModelContentChangedEvent): void {
 		if (e.isFlush) {
 			this._resetTokenizationState();
@@ -208,11 +221,11 @@ export class TextModelTokenization extends Disposable {
 			}
 		}
 
-		this._beginBackgroundTokenization();
+		this._defaultBackgroundTokenizer?.handleChanges();
 	}
 
 	public handleDidChangeAttached(): void {
-		this._beginBackgroundTokenization();
+		this._defaultBackgroundTokenizer?.handleChanges();
 	}
 
 	public handleDidChangeLanguage(e: IModelLanguageChangedEvent): void {
@@ -220,8 +233,6 @@ export class TextModelTokenization extends Disposable {
 		this._tokenizationPart.clearTokens();
 	}
 
-	//#endregion
-
 	private _resetTokenizationState(): void {
 		const [tokenizationSupport, initialState] = initializeTokenization(this._textModel, this._tokenizationPart);
 		if (tokenizationSupport && initialState) {
@@ -229,81 +240,55 @@ export class TextModelTokenization extends Disposable {
 		} else {
 			this._tokenizationStateStore = null;
 		}
-		this._beginBackgroundTokenization();
-	}
-
-	private _isScheduled = false;
-	private _beginBackgroundTokenization(): void {
-		if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
-			return;
-		}
-
-		this._isScheduled = true;
-		runWhenIdle((deadline) => {
-			this._isScheduled = false;
-
-			this._backgroundTokenizeWithDeadline(deadline);
-		});
-	}
-
-	/**
-	 * Tokenize until the deadline occurs, but try to yield every 1-2ms.
-	 */
-	private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {
-		// Read the time remaining from the `deadline` immediately because it is unclear
-		// if the `deadline` object will be valid after execution leaves this function.
-		const endTime = Date.now() + deadline.timeRemaining();
-
-		const execute = () => {
-			if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
-				// disposed in the meantime or detached or finished
-				return;
-			}
 
-			this._backgroundTokenizeForAtLeast1ms();
+		this.backgroundTokenizer.clear();
 
-			if (Date.now() < endTime) {
-				// There is still time before reaching the deadline, so yield to the browser and then
-				// continue execution
-				setTimeout0(execute);
-			} else {
-				// The deadline has been reached, so schedule a new idle callback if necessary
-				this._beginBackgroundTokenization();
+		this._defaultBackgroundTokenizer = null;
+		if (this._tokenizationStateStore) {
+			const b: IBackgroundTokenizationStore = {
+				setTokens: (tokens) => {
+					this._tokenizationPart.setTokens(tokens);
+				},
+				backgroundTokenizationFinished: () => {
+					this._tokenizationPart.handleBackgroundTokenizationFinished();
+				},
+				setEndState: (lineNumber, state) => {
+					if (!state) {
+						throw new BugIndicatingError();
+					}
+					const invalidLineStartIndex = this._tokenizationStateStore?.invalidLineStartIndex;
+					if (invalidLineStartIndex !== undefined && lineNumber - 1 >= invalidLineStartIndex) {
+						// Don't accept states for definitely valid states
+						this._tokenizationStateStore?.setEndState(this._textModel.getLineCount(), lineNumber - 1, state);
+					}
+				},
+			};
+
+			this.backgroundTokenizer.clear();
+
+			if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer) {
+				this.backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b);
 			}
-		};
-		execute();
-	}
-
-	/**
-	 * Tokenize for at least 1ms.
-	 */
-	private _backgroundTokenizeForAtLeast1ms(): void {
-		const lineCount = this._textModel.getLineCount();
-		const builder = new ContiguousMultilineTokensBuilder();
-		const sw = StopWatch.create(false);
-
-		do {
-			if (sw.elapsed() > 1) {
-				// the comparison is intentionally > 1 and not >= 1 to ensure that
-				// a full millisecond has elapsed, given how microseconds are rounded
-				// to milliseconds
-				break;
+			if (!this.backgroundTokenizer.value) {
+				this.backgroundTokenizer.value = this._defaultBackgroundTokenizer =
+					new DefaultBackgroundTokenizer(
+						this._textModel,
+						this._tokenizationStateStore!,
+						b,
+						this._languageIdCodec
+					);
+				this._defaultBackgroundTokenizer.handleChanges();
 			}
-
-			const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
-
-			if (tokenizedLineNumber >= lineCount) {
-				break;
-			}
-		} while (this._hasLinesToTokenize());
-
-		this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
+		} else {
+			this.backgroundTokenizer.clear();
+		}
 	}
 
 	public tokenizeViewport(startLineNumber: number, endLineNumber: number): void {
 		const builder = new ContiguousMultilineTokensBuilder();
-		this._tokenizeViewport(builder, startLineNumber, endLineNumber);
-		this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
+		this._heuristicallyTokenizeViewport(builder, startLineNumber, endLineNumber);
+		this._tokenizationPart.setTokens(builder.finalize());
+		this._defaultBackgroundTokenizer?.checkFinished();
 	}
 
 	public reset(): void {
@@ -313,8 +298,9 @@ export class TextModelTokenization extends Disposable {
 
 	public forceTokenization(lineNumber: number): void {
 		const builder = new ContiguousMultilineTokensBuilder();
-		this._updateTokensUntilLine(builder, lineNumber);
-		this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
+		this._tokenizationStateStore?.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
+		this._tokenizationPart.setTokens(builder.finalize());
+		this._defaultBackgroundTokenizer?.checkFinished();
 	}
 
 	public getTokenTypeIfInsertingCharacter(position: Position, character: string): StandardTokenType {
@@ -401,55 +387,14 @@ export class TextModelTokenization extends Disposable {
 		return false;
 	}
 
-	private _hasLinesToTokenize(): boolean {
-		if (!this._tokenizationStateStore) {
-			return false;
-		}
-		return (this._tokenizationStateStore.invalidLineStartIndex < this._textModel.getLineCount());
-	}
-
-	private _isTokenizationComplete(): boolean {
-		if (!this._tokenizationStateStore) {
-			return false;
-		}
-		return (this._tokenizationStateStore.invalidLineStartIndex >= this._textModel.getLineCount());
-	}
-
-	private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
-		if (!this._tokenizationStateStore || !this._hasLinesToTokenize()) {
-			return this._textModel.getLineCount() + 1;
-		}
-		const lineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
-		this._updateTokensUntilLine(builder, lineNumber);
-		return lineNumber;
-	}
-
-	private _updateTokensUntilLine(builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
-		if (!this._tokenizationStateStore) {
-			return;
-		}
-		const languageId = this._textModel.getLanguageId();
-		const linesLength = this._textModel.getLineCount();
-		const endLineIndex = lineNumber - 1;
-
-		// Validate all states up to and including endLineIndex
-		for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
-			const text = this._textModel.getLineContent(lineIndex + 1);
-			const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
-
-			const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState!);
-			builder.add(lineIndex + 1, r.tokens);
-			this._tokenizationStateStore.setEndState(linesLength, lineIndex, r.endState);
-			lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
-		}
-	}
-
-	private _tokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void {
+	/**
+	 * The result is not cached.
+	 */
+	private _heuristicallyTokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void {
 		if (!this._tokenizationStateStore) {
 			// nothing to do
 			return;
 		}
-
 		if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
 			// nothing to do
 			return;
@@ -457,24 +402,38 @@ export class TextModelTokenization extends Disposable {
 
 		if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
 			// tokenization has reached the viewport start...
-			this._updateTokensUntilLine(builder, endLineNumber);
+			this._tokenizationStateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, endLineNumber);
 			return;
 		}
 
-		let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(startLineNumber);
-		const fakeLines: string[] = [];
+		let state = this.guessStartState(startLineNumber);
+		const languageId = this._textModel.getLanguageId();
+
+		for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
+			const text = this._textModel.getLineContent(lineNumber);
+			const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
+			builder.add(lineNumber, r.tokens);
+			state = r.endState;
+		}
+		// We overrode the tokens. Because old states might get reused (thus stopping invalidation),
+		// we have to explicitly request the tokens for this range again.
+		this.backgroundTokenizer.value?.requestTokens(startLineNumber, endLineNumber + 1);
+	}
+
+	private guessStartState(lineNumber: number): IState {
+		let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(lineNumber);
+		const likelyRelevantLines: string[] = [];
 		let initialState: IState | null = null;
-		for (let i = startLineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
+		for (let i = lineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
 			const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
-
+			// Ignore lines full of whitespace
 			if (newNonWhitespaceIndex === 0) {
 				continue;
 			}
-
 			if (newNonWhitespaceIndex < nonWhitespaceColumn) {
-				fakeLines.push(this._textModel.getLineContent(i));
+				likelyRelevantLines.push(this._textModel.getLineContent(i));
 				nonWhitespaceColumn = newNonWhitespaceIndex;
-				initialState = this._tokenizationStateStore.getBeginState(i - 1);
+				initialState = this._tokenizationStateStore!.getBeginState(i - 1);
 				if (initialState) {
 					break;
 				}
@@ -482,23 +441,17 @@ export class TextModelTokenization extends Disposable {
 		}
 
 		if (!initialState) {
-			initialState = this._tokenizationStateStore.initialState;
+			initialState = this._tokenizationStateStore!.initialState;
 		}
+		likelyRelevantLines.reverse();
 
 		const languageId = this._textModel.getLanguageId();
 		let state = initialState;
-		for (let i = fakeLines.length - 1; i >= 0; i--) {
-			const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, fakeLines[i], false, state);
-			state = r.endState;
-		}
-
-		for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
-			const text = this._textModel.getLineContent(lineNumber);
-			const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
-			builder.add(lineNumber, r.tokens);
-			this._tokenizationStateStore.markMustBeTokenized(lineNumber - 1);
+		for (const line of likelyRelevantLines) {
+			const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore!.tokenizationSupport, line, false, state);
 			state = r.endState;
 		}
+		return state;
 	}
 }
 
@@ -538,3 +491,123 @@ function safeTokenize(languageIdCodec: ILanguageIdCodec, languageId: string, tok
 	LineTokens.convertToEndOffset(r.tokens, text.length);
 	return r;
 }
+
+class DefaultBackgroundTokenizer implements IBackgroundTokenizer {
+	private _isDisposed = false;
+
+	constructor(
+		private readonly _textModel: ITextModel,
+		private readonly _stateStore: TokenizationStateStore,
+		private readonly _backgroundTokenStore: IBackgroundTokenizationStore,
+		private readonly _languageIdCodec: ILanguageIdCodec,
+	) {
+	}
+
+	public dispose(): void {
+		this._isDisposed = true;
+	}
+
+	public handleChanges(): void {
+		this._beginBackgroundTokenization();
+	}
+
+	private _isScheduled = false;
+	private _beginBackgroundTokenization(): void {
+		if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
+			return;
+		}
+
+		this._isScheduled = true;
+		runWhenIdle((deadline) => {
+			this._isScheduled = false;
+
+			this._backgroundTokenizeWithDeadline(deadline);
+		});
+	}
+
+	/**
+	 * Tokenize until the deadline occurs, but try to yield every 1-2ms.
+	 */
+	private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {
+		// Read the time remaining from the `deadline` immediately because it is unclear
+		// if the `deadline` object will be valid after execution leaves this function.
+		const endTime = Date.now() + deadline.timeRemaining();
+
+		const execute = () => {
+			if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
+				// disposed in the meantime or detached or finished
+				return;
+			}
+
+			this._backgroundTokenizeForAtLeast1ms();
+
+			if (Date.now() < endTime) {
+				// There is still time before reaching the deadline, so yield to the browser and then
+				// continue execution
+				setTimeout0(execute);
+			} else {
+				// The deadline has been reached, so schedule a new idle callback if necessary
+				this._beginBackgroundTokenization();
+			}
+		};
+		execute();
+	}
+
+	/**
+	 * Tokenize for at least 1ms.
+	 */
+	private _backgroundTokenizeForAtLeast1ms(): void {
+		const lineCount = this._textModel.getLineCount();
+		const builder = new ContiguousMultilineTokensBuilder();
+		const sw = StopWatch.create(false);
+
+		do {
+			if (sw.elapsed() > 1) {
+				// the comparison is intentionally > 1 and not >= 1 to ensure that
+				// a full millisecond has elapsed, given how microseconds are rounded
+				// to milliseconds
+				break;
+			}
+
+			const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
+
+			if (tokenizedLineNumber >= lineCount) {
+				break;
+			}
+		} while (this._hasLinesToTokenize());
+
+		this._backgroundTokenStore.setTokens(builder.finalize());
+		this.checkFinished();
+	}
+
+	private _hasLinesToTokenize(): boolean {
+		if (!this._stateStore) {
+			return false;
+		}
+		return this._stateStore.invalidLineStartIndex < this._textModel.getLineCount();
+	}
+
+	private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
+		if (!this._stateStore || !this._hasLinesToTokenize()) {
+			return this._textModel.getLineCount() + 1;
+		}
+		const lineNumber = this._stateStore.invalidLineStartIndex + 1;
+		this._stateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
+		return lineNumber;
+	}
+
+	public checkFinished(): void {
+		if (this._isDisposed) {
+			return;
+		}
+		if (this._stateStore.isTokenizationComplete(this._textModel)) {
+			this._backgroundTokenStore.backgroundTokenizationFinished();
+		}
+	}
+
+	requestTokens(startLineNumber: number, endLineNumberExclusive: number): void {
+		for (let lineNumber = startLineNumber; lineNumber < endLineNumberExclusive; lineNumber++) {
+			this._stateStore.markMustBeTokenized(lineNumber - 1);
+		}
+	}
+}
diff --git a/src/vs/editor/common/model/tokenizationTextModelPart.ts b/src/vs/editor/common/model/tokenizationTextModelPart.ts
index 378dcb5658958..04bcc3cdf741f 100644
--- a/src/vs/editor/common/model/tokenizationTextModelPart.ts
+++ b/src/vs/editor/common/model/tokenizationTextModelPart.ts
@@ -69,12 +69,17 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
 		);
 	}
 
+	public override dispose(): void {
+		this._languageRegistryListener.dispose();
+		this._tokenization.dispose();
+		super.dispose();
+	}
+
 	_hasListeners(): boolean {
 		return (
 			this._onDidChangeLanguage.hasListeners()
 			|| this._onDidChangeLanguageConfiguration.hasListeners()
 			|| this._onDidChangeTokens.hasListeners()
-			|| this._onBackgroundTokenizationStateChanged.hasListeners()
 		);
 	}
 
@@ -104,35 +109,15 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
 		this._semanticTokens.flush();
 	}
 
+	// TODO@hediet TODO@alexdima what is the difference between this and acceptEdit?
 	public handleDidChangeContent(change: IModelContentChangedEvent): void {
 		this._tokenization.handleDidChangeContent(change);
 	}
 
-	public override dispose(): void {
-		this._languageRegistryListener.dispose();
-		this._tokenization.dispose();
-		super.dispose();
-	}
-
-	private _backgroundTokenizationState = BackgroundTokenizationState.Uninitialized;
+	private _backgroundTokenizationState = BackgroundTokenizationState.InProgress;
 	public get backgroundTokenizationState(): BackgroundTokenizationState {
 		return this._backgroundTokenizationState;
 	}
-	private handleTokenizationProgress(completed: boolean) {
-		if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
-			// We already did a full tokenization and don't go back to progressing.
-			return;
-		}
-		const newState = completed ? BackgroundTokenizationState.Completed : BackgroundTokenizationState.InProgress;
-		if (this._backgroundTokenizationState !== newState) {
-			this._backgroundTokenizationState = newState;
-			this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
-			this._onBackgroundTokenizationStateChanged.fire();
-		}
-	}
-
-	private readonly _onBackgroundTokenizationStateChanged = this._register(new Emitter<void>());
-	public readonly onBackgroundTokenizationStateChanged: Event<void> = this._onBackgroundTokenizationStateChanged.event;
 
 	public setLineTokens(
 		lineNumber: number,
@@ -151,64 +136,76 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
 		);
 	}
 
-	public setTokens(
-		tokens: ContiguousMultilineTokens[],
-		backgroundTokenizationCompleted: boolean = false
-	): void {
-		if (tokens.length !== 0) {
-			const ranges: { fromLineNumber: number; toLineNumber: number }[] = [];
-
-			for (let i = 0, len = tokens.length; i < len; i++) {
-				const element = tokens[i];
-				let minChangedLineNumber = 0;
-				let maxChangedLineNumber = 0;
-				let hasChange = false;
-				for (
-					let lineNumber = element.startLineNumber;
-					lineNumber <= element.endLineNumber;
-					lineNumber++
-				) {
-					if (hasChange) {
-						this._tokens.setTokens(
-							this._languageId,
-							lineNumber - 1,
-							this._textModel.getLineLength(lineNumber),
-							element.getLineTokens(lineNumber),
-							false
-						);
+	public handleBackgroundTokenizationFinished(): void {
+		if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
+			// We already did a full tokenization and don't go back to progressing.
+			return;
+		}
+		const newState = BackgroundTokenizationState.Completed;
+		this._backgroundTokenizationState = newState;
+		this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
+	}
+
+	public get hasTokens(): boolean {
+		return this._tokens.hasTokens;
+	}
+
+	public setTokens(tokens: ContiguousMultilineTokens[]): void {
+		if (tokens.length === 0) {
+			return;
+		}
+
+		const ranges: { fromLineNumber: number; toLineNumber: number }[] = [];
+
+		for (let i = 0, len = tokens.length; i < len; i++) {
+			const element = tokens[i];
+			let minChangedLineNumber = 0;
+			let maxChangedLineNumber = 0;
+			let hasChange = false;
+			for (
+				let lineNumber = element.startLineNumber;
+				lineNumber <= element.endLineNumber;
+				lineNumber++
+			) {
+				if (hasChange) {
+					this._tokens.setTokens(
+						this._languageId,
+						lineNumber - 1,
+						this._textModel.getLineLength(lineNumber),
+						element.getLineTokens(lineNumber),
+						false
+					);
+					maxChangedLineNumber = lineNumber;
+				} else {
+					const lineHasChange = this._tokens.setTokens(
+						this._languageId,
+						lineNumber - 1,
+						this._textModel.getLineLength(lineNumber),
+						element.getLineTokens(lineNumber),
+						true
+					);
+					if (lineHasChange) {
+						hasChange = true;
+						minChangedLineNumber = lineNumber;
 						maxChangedLineNumber = lineNumber;
-					} else {
-						const lineHasChange = this._tokens.setTokens(
-							this._languageId,
-							lineNumber - 1,
-							this._textModel.getLineLength(lineNumber),
-							element.getLineTokens(lineNumber),
-							true
-						);
-						if (lineHasChange) {
-							hasChange = true;
-							minChangedLineNumber = lineNumber;
-							maxChangedLineNumber = lineNumber;
-						}
 					}
 				}
-				if (hasChange) {
-					ranges.push({
-						fromLineNumber: minChangedLineNumber,
-						toLineNumber: maxChangedLineNumber,
-					});
-				}
 			}
-
-			if (ranges.length > 0) {
-				this._emitModelTokensChangedEvent({
-					tokenizationSupportChanged: false,
-					semanticTokensApplied: false,
-					ranges: ranges,
+			if (hasChange) {
+				ranges.push({
+					fromLineNumber: minChangedLineNumber,
+					toLineNumber: maxChangedLineNumber,
 				});
 			}
 		}
-		this.handleTokenizationProgress(backgroundTokenizationCompleted);
+
+		if (ranges.length > 0) {
+			this._emitModelTokensChangedEvent({
+				tokenizationSupportChanged: false,
+				semanticTokensApplied: false,
+				ranges: ranges,
+			});
+		}
 	}
 
 	public setSemanticTokens(
diff --git a/src/vs/editor/common/tokenizationTextModelPart.ts b/src/vs/editor/common/tokenizationTextModelPart.ts
index bade56184c8b5..3aa78f944a791 100644
--- a/src/vs/editor/common/tokenizationTextModelPart.ts
+++ b/src/vs/editor/common/tokenizationTextModelPart.ts
@@ -3,7 +3,6 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 
-import { Event } from 'vs/base/common/event';
 import { IPosition } from 'vs/editor/common/core/position';
 import { Range } from 'vs/editor/common/core/range';
 import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
@@ -20,6 +19,8 @@ export interface ITokenizationTextModelPart {
 	 */
 	setTokens(tokens: ContiguousMultilineTokens[]): void;
 
+	readonly hasTokens: boolean;
+
 	/**
 	 * Replaces all semantic tokens with the provided `tokens`.
 	 * @internal
@@ -98,11 +99,9 @@ export interface ITokenizationTextModelPart {
 	setLanguageId(languageId: string, source?: string): void;
 
 	readonly backgroundTokenizationState: BackgroundTokenizationState;
-	readonly onBackgroundTokenizationStateChanged: Event<void>;
 }
 
 export const enum BackgroundTokenizationState {
-	Uninitialized = 0,
 	InProgress = 1,
 	Completed = 2,
 }
diff --git a/src/vs/editor/common/tokens/contiguousTokensStore.ts b/src/vs/editor/common/tokens/contiguousTokensStore.ts
index aa2c14214913f..72d4afc3ccb28 100644
--- a/src/vs/editor/common/tokens/contiguousTokensStore.ts
+++ b/src/vs/editor/common/tokens/contiguousTokensStore.ts
@@ -30,6 +30,10 @@ export class ContiguousTokensStore {
 		this._len = 0;
 	}
 
+	get hasTokens(): boolean {
+		return this._lineTokens.length > 0;
+	}
+
 	public getTokens(topLevelLanguageId: string, lineIndex: number, lineText: string): LineTokens {
 		let rawLineTokens: Uint32Array | ArrayBuffer | null = null;
 		if (lineIndex < this._len) {
diff --git a/src/vs/monaco.d.ts b/src/vs/monaco.d.ts
index 1a565a2c39933..f8c008e67db86 100644
--- a/src/vs/monaco.d.ts
+++ b/src/vs/monaco.d.ts
@@ -875,10 +875,10 @@ declare namespace monaco {
 	}
 
 	export class Token {
-		_tokenBrand: void;
 		readonly offset: number;
 		readonly type: string;
 		readonly language: string;
+		_tokenBrand: void;
 		constructor(offset: number, type: string, language: string);
 		toString(): string;
 	}
@@ -6409,6 +6409,16 @@ declare namespace monaco.languages {
 		removeText?: number;
 	}
 
+	export interface IBackgroundTokenizer extends IDisposable {
+		/**
+		 * Instructs the background tokenizer to set the tokens for the given range again.
+		 *
+		 * This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport,
+		 * when the change does not even propagate to that viewport.
+		 */
+		requestTokens(startLineNumber: number, endLineNumberExclusive: number): void;
+	}
+
 	/**
 	 * The state of the tokenizer between two lines.
 	 * It is useful to store flags such as in multiline comment, etc.

From c63b9a6f13230cf3bce5ee5bb99233ed9d7b31e3 Mon Sep 17 00:00:00 2001
From: Henning Dieterichs <hdieterichs@microsoft.com>
Date: Wed, 15 Feb 2023 14:56:31 +0100
Subject: [PATCH 2/3] Resolves PR conversations

---
 src/vs/editor/common/languages.ts                 |  3 +++
 src/vs/editor/common/model/textModelTokens.ts     | 10 +++-------
 .../common/model/tokenizationTextModelPart.ts     | 15 ++++-----------
 src/vs/monaco.d.ts                                | 10 ----------
 4 files changed, 10 insertions(+), 28 deletions(-)

diff --git a/src/vs/editor/common/languages.ts b/src/vs/editor/common/languages.ts
index 40bbb50e01a63..ca0bb68c6bdf8 100644
--- a/src/vs/editor/common/languages.ts
+++ b/src/vs/editor/common/languages.ts
@@ -78,6 +78,9 @@ export class EncodedTokenizationResult {
 	}
 }
 
+/**
+ * @internal
+ */
 export interface IBackgroundTokenizer extends IDisposable {
 	/**
 	 * Instructs the background tokenizer to set the tokens for the given range again.
diff --git a/src/vs/editor/common/model/textModelTokens.ts b/src/vs/editor/common/model/textModelTokens.ts
index f801d2ef4e6a5..ebee591a042ce 100644
--- a/src/vs/editor/common/model/textModelTokens.ts
+++ b/src/vs/editor/common/model/textModelTokens.ts
@@ -176,8 +176,8 @@ export class TokenizationStateStore {
 		}
 	}
 
-	isTokenizationComplete(_textModel: ITextModel): boolean {
-		return this.invalidLineStartIndex >= _textModel.getLineCount();
+	isTokenizationComplete(textModel: ITextModel): boolean {
+		return this.invalidLineStartIndex >= textModel.getLineCount();
 	}
 }
 
@@ -264,8 +264,6 @@ export class TextModelTokenization extends Disposable {
 				},
 			};
 
-			this.backgroundTokenizer.clear();
-
 			if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer) {
 				this.backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b);
 			}
@@ -273,14 +271,12 @@ export class TextModelTokenization extends Disposable {
 				this.backgroundTokenizer.value = this._defaultBackgroundTokenizer =
 					new DefaultBackgroundTokenizer(
 						this._textModel,
-						this._tokenizationStateStore!,
+						this._tokenizationStateStore,
 						b,
 						this._languageIdCodec
 					);
 				this._defaultBackgroundTokenizer.handleChanges();
 			}
-		} else {
-			this.backgroundTokenizer.clear();
 		}
 	}
 
diff --git a/src/vs/editor/common/model/tokenizationTextModelPart.ts b/src/vs/editor/common/model/tokenizationTextModelPart.ts
index 04bcc3cdf741f..efe4f6cae54ff 100644
--- a/src/vs/editor/common/model/tokenizationTextModelPart.ts
+++ b/src/vs/editor/common/model/tokenizationTextModelPart.ts
@@ -34,7 +34,6 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
 	private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent> = this._register(new Emitter<IModelTokensChangedEvent>());
 	public readonly onDidChangeTokens: Event<IModelTokensChangedEvent> = this._onDidChangeTokens.event;
 
-	private readonly _languageRegistryListener: IDisposable;
 	private readonly _tokens: ContiguousTokensStore;
 	private readonly _semanticTokens: SparseTokensStore;
 	private readonly _tokenization: TextModelTokenization;
@@ -54,25 +53,19 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
 		this._semanticTokens = new SparseTokensStore(
 			this._languageService.languageIdCodec
 		);
-		this._tokenization = new TextModelTokenization(
+		this._tokenization = this._register(new TextModelTokenization(
 			_textModel,
 			this,
 			this._languageService.languageIdCodec
-		);
+		));
 
-		this._languageRegistryListener = this._languageConfigurationService.onDidChange(
+		this._languageRegistryListener = this._register(this._languageConfigurationService.onDidChange(
 			e => {
 				if (e.affects(this._languageId)) {
 					this._onDidChangeLanguageConfiguration.fire({});
 				}
 			}
-		);
-	}
-
-	public override dispose(): void {
-		this._languageRegistryListener.dispose();
-		this._tokenization.dispose();
-		super.dispose();
+		));
 	}
 
 	_hasListeners(): boolean {
diff --git a/src/vs/monaco.d.ts b/src/vs/monaco.d.ts
index f8c008e67db86..e8df5c79bdad5 100644
--- a/src/vs/monaco.d.ts
+++ b/src/vs/monaco.d.ts
@@ -6409,16 +6409,6 @@ declare namespace monaco.languages {
 		removeText?: number;
 	}
 
-	export interface IBackgroundTokenizer extends IDisposable {
-		/**
-		 * Instructs the background tokenizer to set the tokens for the given range again.
-		 *
-		 * This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport,
-		 * when the change does not even propagate to that viewport.
-		 */
-		requestTokens(startLineNumber: number, endLineNumberExclusive: number): void;
-	}
-
 	/**
 	 * The state of the tokenizer between two lines.
 	 * It is useful to store flags such as in multiline comment, etc.

From 6c123a76b44824a638c0028e329b496f71536057 Mon Sep 17 00:00:00 2001
From: Henning Dieterichs <hdieterichs@microsoft.com>
Date: Wed, 15 Feb 2023 14:59:52 +0100
Subject: [PATCH 3/3] Fixes CI

---
 src/vs/editor/common/model/tokenizationTextModelPart.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/vs/editor/common/model/tokenizationTextModelPart.ts b/src/vs/editor/common/model/tokenizationTextModelPart.ts
index efe4f6cae54ff..603c021408747 100644
--- a/src/vs/editor/common/model/tokenizationTextModelPart.ts
+++ b/src/vs/editor/common/model/tokenizationTextModelPart.ts
@@ -5,7 +5,6 @@
 
 import { Emitter, Event } from 'vs/base/common/event';
 import { CharCode } from 'vs/base/common/charCode';
-import { IDisposable } from 'vs/base/common/lifecycle';
 import { IPosition, Position } from 'vs/editor/common/core/position';
 import { IRange, Range } from 'vs/editor/common/core/range';
 import { getWordAtText, IWordAtPosition } from 'vs/editor/common/core/wordHelper';
@@ -59,7 +58,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
 			this._languageService.languageIdCodec
 		));
 
-		this._languageRegistryListener = this._register(this._languageConfigurationService.onDidChange(
+		this._register(this._languageConfigurationService.onDidChange(
 			e => {
 				if (e.affects(this._languageId)) {
 					this._onDidChangeLanguageConfiguration.fire({});