thewh1teagle · Ada-lave · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/desktop/pnpm-lock.yaml b/desktop/pnpm-lock.yaml
diff --git a/desktop/src-tauri/locales/en-US/common.json b/desktop/src-tauri/locales/en-US/common.json
@@ -115,6 +115,8 @@
 	"max-speakers": "Max speakers",
 	"max-text-ctx": "Maximum context",
 	"max-tokens": "Max Tokens",
+	"max-input-chars": "Max Input Characters",
+	"info-max-input-chars": "Max characters sent to AI per request. Transcripts longer than this are split into chunks, each summarized separately, then merged. Default 24000 ≈ 6000 tokens (fits most local models).",
 	"microphone": "Microphone",
 	"modal-close": "Close",
 	"modal-error-body": "A bug happened!",
@@ -177,6 +179,8 @@
 	"stop-and-transcribe": "Stop Recording",
 	"success-action": "Operation succeeded.",
 	"summarize-loading": "Summarizing",
+	"summarize-chunk-progress": "Summarizing part {{current}} of {{total}}...",
+	"summarize-synthesis": "Merging summaries...",
 	"summarize-success": "Summarized",
 	"summary-tab": "Summary",
 	"support-the-project": "Support Vibe",

diff --git a/desktop/src-tauri/locales/ru-RU/common.json b/desktop/src-tauri/locales/ru-RU/common.json
@@ -70,6 +70,7 @@
 	"info-max-speakers": "How many speakers should be in the file. Used for more precise recognition",
 	"info-max-text-ctx": "Max context tokens to use from the past text as prompt for the decoder",
 	"info-max-tokens": "Max tokens for the AI model. Each token is usually considered as a single word. This can save you from sending too much data which costs money. It's recommended to restrict it also in the website.",
+	"info-max-input-chars": "Максимальное количество символов, отправляемых ИИ за один запрос. Если транскрипт длиннее — он разбивается на части, каждая суммаризируется отдельно, затем результаты объединяются. По умолчанию 24000 ≈ 6000 токенов (подходит для большинства локальных моделей).",
 	"info-normalize-loudness": "Enable this option for better transcription accuracy. It may take up to 8 minutes per hour of audio.",
 	"info-prompt": "Make transcripts better by writing expected words.",
 	"info-recognize-speakers": "Detect speaker in each sentence and add it",
@@ -93,6 +94,7 @@
 	"max-speakers": "Максимальное количество говорящих",
 	"max-text-ctx": "Максимальный контекст",
 	"max-tokens": "Максимальное количество токенов",
+	"max-input-chars": "Максимум символов на запрос",
 	"microphone": "Микрофон",
 	"modal-close": "Закрыть",
 	"modal-error-body": "Произошла ошибка!",
@@ -144,7 +146,9 @@
 	"stop-and-transcribe": "Остановить запись",
 	"success-action": "Операция выполнена успешно.",
 	"summarize-loading": "Суммаризация",
-	"summarize-success": "Суммиризация завершена",
+	"summarize-chunk-progress": "Суммаризация части {{current}} из {{total}}...",
+	"summarize-synthesis": "Объединение частей...",
+	"summarize-success": "Суммаризация завершена",
 	"summary-tab": "Саммари",
 	"support-the-project": "Поддержать Vibe",
 	"temp-folder": "Временные файлы",

diff --git a/desktop/src-tauri/src/cmd/audio.rs b/desktop/src-tauri/src/cmd/audio.rs
@@ -215,7 +215,7 @@ fn get_output_device_and_config(host: &cpal::Host, audio_device: &AudioDevice) -
         let config = device
             .default_output_config()
             .context("Failed to get default output config")?;
-        return Ok((device, config));
+        Ok((device, config))
     }
 
     #[cfg(not(target_os = "macos"))]

diff --git a/desktop/src-tauri/src/main.rs b/desktop/src-tauri/src/main.rs
@@ -61,7 +61,7 @@ async fn main() -> Result<()> {
         .plugin(tauri_plugin_dialog::init())
         .plugin(tauri_plugin_updater::Builder::default().build())
         .plugin(tauri_plugin_process::init())
-.plugin(tauri_plugin_global_shortcut::Builder::new().build())
+        .plugin(tauri_plugin_global_shortcut::Builder::new().build())
         .plugin(tauri_plugin_notification::init());
 
     if analytics::is_aptabase_configured() {

diff --git a/desktop/src/components/params.tsx b/desktop/src/components/params.tsx
@@ -315,6 +315,21 @@ export default function ModelOptions({ options, setOptions }: ParamsProps) {
 								/>
 							</Field>
 
+							<Field
+								label={
+									<>
+										<InfoTooltip text={t('common.info-max-input-chars')} />
+										{t('common.max-input-chars')}
+									</>
+								}>
+								<Input
+									type="number"
+									placeholder={String(config.llmDefaultMaxInputChars)}
+									value={llmConfig?.maxInputChars ?? config.llmDefaultMaxInputChars}
+									onChange={(e) => setLlmConfig({ ...llmConfig, maxInputChars: Number(e.target.value) || config.llmDefaultMaxInputChars })}
+								/>
+							</Field>
+
 							<Button onClick={checkLlm} size="sm" className="w-full">
 								{t('common.run-llm-check')}
 							</Button>

diff --git a/desktop/src/lib/config.ts b/desktop/src/lib/config.ts
@@ -28,6 +28,7 @@ export const vadModelUrl = 'https://huggingface.co/ggml-org/whisper-vad/resolve/
 
 export const llmApiKeyUrl = 'https://console.anthropic.com/settings/keys'
 export const llmDefaultMaxTokens = 8192 // https://docs.anthropic.com/en/docs/about-claude/models
+export const llmDefaultMaxInputChars = 24_000 // 24000 chars ≈ 6000 tokens — safe for small local models
 export const llmLimitsUrl = 'https://console.anthropic.com/settings/limits'
 export const llmCostUrl = 'https://console.anthropic.com/settings/cost'
 

diff --git a/desktop/src/lib/llm/chunking.ts b/desktop/src/lib/llm/chunking.ts
@@ -0,0 +1,102 @@
+import { llmDefaultMaxInputChars } from '~/lib/config'
+import { type Llm, type LlmConfig } from '~/lib/llm'
+import { asText, type Segment } from '~/lib/transcript'
+
+export type ChunkingProgress = { phase: 'chunk'; current: number; total: number } | { phase: 'synthesis'; total: number }
+
+function splitIntoChunks(segments: Segment[], maxCharsPerChunk: number, speakerLabel: string): Segment[][] {
+	const chunks: Segment[][] = []
+	let current: Segment[] = []
+	let currentLen = 0
+
+	for (const segment of segments) {
+		const segText = (segment.speaker != null ? `[${speakerLabel} ${segment.speaker + 1}] ` : '') + segment.text.trim() + '\n'
+		const segLen = segText.length
+
+		if (segLen > maxCharsPerChunk) {
+			// Oversized single segment — place it alone
+			if (current.length > 0) {
+				chunks.push(current)
+				current = []
+				currentLen = 0
+			}
+			chunks.push([segment])
+			continue
+		}
+
+		if (currentLen + segLen > maxCharsPerChunk && current.length > 0) {
+			chunks.push(current)
+			current = []
+			currentLen = 0
+		}
+		current.push(segment)
+		currentLen += segLen
+	}
+	if (current.length > 0) chunks.push(current)
+	return chunks
+}
+
+function buildChunkPrompt(promptTemplate: string, chunkText: string, previousSummary: string | null, chunkIndex: number, totalChunks: number): string {
+	const base = promptTemplate.replace('%s', chunkText)
+	if (!previousSummary) {
+		return base
+	}
+	return `Summary of previous sections (use as context, do not repeat verbatim):\n${previousSummary}\n\nNow summarize section ${chunkIndex + 1} of ${totalChunks}:\n${base}`
+}
+
+function buildSynthesisPrompt(partials: string[]): string {
+	const combined = partials.map((s, i) => `### Part ${i + 1}\n${s}`).join('\n\n')
+	return `You are combining ${partials.length} partial summaries of consecutive sections of a single transcript into one coherent final summary.
+
+Synthesize them into a unified summary that:
+- Preserves all key topics, decisions, and action items from every part
+- Eliminates repetition
+- Follows the same markdown format as the partial summaries
+- Reads as if the entire transcript were summarized in one pass
+
+${combined}`
+}
+
+export async function summarizeWithChunking(
+	llm: Llm,
+	segments: Segment[],
+	config: LlmConfig,
+	speakerLabel: string,
+	onProgress?: (progress: ChunkingProgress) => void,
+): Promise<string> {
+	const maxInputChars = config.maxInputChars ?? llmDefaultMaxInputChars
+	const promptTemplate = config.prompt
+	const promptOverhead = promptTemplate.replace('%s', '').length
+	const maxCharsPerChunk = maxInputChars - promptOverhead
+
+	const fullText = asText(segments, speakerLabel)
+
+	// Fast path — fits in one request (current behavior)
+	if (fullText.length <= maxCharsPerChunk) {
+		return llm.ask(promptTemplate.replace('%s', fullText))
+	}
+
+	// Chunk mode
+	const chunks = splitIntoChunks(segments, maxCharsPerChunk, speakerLabel)
+
+	if (chunks.length <= 1) {
+		// Edge case: can't split further, send as-is
+		return llm.ask(promptTemplate.replace('%s', fullText))
+	}
+
+	// Summarize each chunk sequentially, passing the previous summary as rolling context
+	const partials: string[] = []
+	let previousSummary: string | null = null
+	for (let i = 0; i < chunks.length; i++) {
+		onProgress?.({ phase: 'chunk', current: i + 1, total: chunks.length })
+		const chunkText = asText(chunks[i], speakerLabel)
+		const prompt = buildChunkPrompt(promptTemplate, chunkText, previousSummary, i, chunks.length)
+		const partial = await llm.ask(prompt)
+		partials.push(partial)
+		previousSummary = partial
+	}
+
+	// Synthesize all partials into a single coherent summary
+	onProgress?.({ phase: 'synthesis', total: chunks.length })
+	return llm.ask(buildSynthesisPrompt(partials))
+}
diff --git a/desktop/src/lib/llm/index.ts b/desktop/src/lib/llm/index.ts
@@ -22,6 +22,8 @@ export interface LlmConfig {
 	// OpenAI Compatible
 	openaiBaseUrl?: string
 	openaiApiKey?: string
+
+	maxInputChars?: number // INPUT char budget per LLM request; triggers chunking when exceeded
 }
 
 export { Ollama, Claude, OpenAICompatible, defaultClaudeConfig, defaultOllamaConfig, defaultOpenAIConfig }
diff --git a/desktop/src/pages/batch/view-model.tsx b/desktop/src/pages/batch/view-model.tsx
@@ -20,7 +20,8 @@ import { usePreferenceProvider } from '~/providers/preference'
 import { useFilesContext } from '~/providers/files-provider'
 import { basename } from '@tauri-apps/api/path'
 import { Claude, Ollama, Llm, OpenAICompatible } from '~/lib/llm'
-import * as transcript from '~/lib/transcript'
+import { summarizeWithChunking } from '~/lib/llm/chunking'
+
 import { path } from '@tauri-apps/api'
 import { toDocx } from '~/lib/docx'
 import { toast } from 'sonner'
@@ -235,8 +236,7 @@ export function viewModel() {
 				let llmSegments: Segment[] | null = null
 				if (llm && preference.llmConfig?.enabled) {
 					try {
-						const question = `${preference.llmConfig.prompt.replace('%s', transcript.asText(res.segments, speakerLabel))}`
-						const answer = await llm.ask(question)
+						const answer = await summarizeWithChunking(llm!, res.segments, preference.llmConfig, speakerLabel)
 						if (answer) {
 							llmSegments = [{ start: 0, stop: res.segments?.[res.segments?.length - 1].stop ?? 0, text: answer }]
 						}

diff --git a/desktop/src/pages/home/view-model.ts b/desktop/src/pages/home/view-model.ts
@@ -18,6 +18,14 @@ import { ensureSystemAudioPermission } from '~/lib/permissions'
 import { analyticsEvents, trackAnalyticsEvent } from '~/lib/analytics'
 import * as config from '~/lib/config'
 import { Claude, Llm, Ollama, OpenAICompatible } from '~/lib/llm'
+import { summarizeWithChunking, type ChunkingProgress } from '~/lib/llm/chunking'
+
+function summarizeProgressMessage(p: ChunkingProgress, t: (key: string, opts?: Record<string, unknown>) => string): string {
+	if (p.phase === 'chunk') {
+		return t('common.summarize-chunk-progress', { current: p.current, total: p.total })
+	}
+	return t('common.summarize-synthesis')
+}
 import * as transcript from '~/lib/transcript'
 import { isUserError } from '~/lib/sona-errors'
 import { useConfirmExit } from '~/lib/use-confirm-exit'
@@ -571,21 +579,17 @@ export function viewModel() {
 		}
 
 		if (newSegments && llm && preferenceRef.current.llmConfig?.enabled) {
+			const toastId = hotToast.loading(t('common.summarize-loading'))
 			try {
-				const question = `${preferenceRef.current.llmConfig.prompt.replace('%s', transcript.asText(newSegments, t('common.speaker-prefix')))}`
-				const answerPromise = llm.ask(question)
-				hotToast.promise(answerPromise, {
-					loading: t('common.summarize-loading'),
-					error: (error) => {
-						return String(error)
-					},
-					success: t('common.summarize-success'),
+				const answer = await summarizeWithChunking(llm, newSegments, preferenceRef.current.llmConfig, t('common.speaker-prefix'), (p) => {
+					hotToast.loading(summarizeProgressMessage(p, t), { id: toastId })
 				})
-				const answer = await answerPromise
+				hotToast.success(t('common.summarize-success'), { id: toastId })
 				if (answer) {
 					setSummarizeSegments([{ start: 0, stop: newSegments?.[newSegments?.length - 1].stop ?? 0, text: answer }])
 				}
 			} catch (e) {
+				hotToast.error(String(e), { id: toastId })
 				console.error(e)
 			}
 		}
@@ -596,20 +600,19 @@ export function viewModel() {
 	async function resummarize(prompt: string) {
 		if (!segments || !llm) return
 		setSummarizing(true)
+		const toastId = hotToast.loading(t('common.summarize-loading'))
 		try {
-			const question = prompt.replace('%s', transcript.asText(segments, t('common.speaker-prefix')))
-			const answerPromise = llm.ask(question)
-			hotToast.promise(answerPromise, {
-				loading: t('common.summarize-loading'),
-				error: (error) => String(error),
-				success: t('common.summarize-success'),
+			const llmConfig = preferenceRef.current.llmConfig
+			const answer = await summarizeWithChunking(llm, segments, { ...llmConfig, prompt }, t('common.speaker-prefix'), (p) => {
+				hotToast.loading(summarizeProgressMessage(p, t), { id: toastId })
 			})
-			const answer = await answerPromise
+			hotToast.success(t('common.summarize-success'), { id: toastId })
 			if (answer) {
 				setSummarizeSegments([{ start: 0, stop: segments[segments.length - 1]?.stop ?? 0, text: answer }])
 				setTranscriptTab('summary')
 			}
 		} catch (e) {
+			hotToast.error(String(e), { id: toastId })
 			console.error(e)
 		} finally {
 			setSummarizing(false)

diff --git a/desktop/src/providers/Toast.tsx → desktop/src/providers/toast.tsx b/desktop/src/providers/Toast.tsx → desktop/src/providers/toast.tsx