diff --git a/desktop/pnpm-lock.yaml b/desktop/pnpm-lock.yaml
index 41e33b4b..a2cb48e6 100644
--- a/desktop/pnpm-lock.yaml
+++ b/desktop/pnpm-lock.yaml
@@ -89,9 +89,6 @@ importers:
'@tauri-apps/plugin-process':
specifier: ~2.3.1
version: 2.3.1
- '@tauri-apps/plugin-shell':
- specifier: ~2.3.5
- version: 2.3.5
'@tauri-apps/plugin-store':
specifier: ~2.4.2
version: 2.4.2
@@ -1422,9 +1419,6 @@ packages:
'@tauri-apps/plugin-process@2.3.1':
resolution: {integrity: sha512-nCa4fGVaDL/B9ai03VyPOjfAHRHSBz5v6F/ObsB73r/dA3MHHhZtldaDMIc0V/pnUw9ehzr2iEG+XkSEyC0JJA==}
- '@tauri-apps/plugin-shell@2.3.5':
- resolution: {integrity: sha512-jewtULhiQ7lI7+owCKAjc8tYLJr92U16bPOeAa472LHJdgaibLP83NcfAF2e+wkEcA53FxKQAZ7byDzs2eeizg==}
-
'@tauri-apps/plugin-store@2.4.2':
resolution: {integrity: sha512-0ClHS50Oq9HEvLPhNzTNFxbWVOqoAp3dRvtewQBeqfIQ0z5m3JRnOISIn2ZVPCrQC0MyGyhTS9DWhHjpigQE7A==}
@@ -3777,10 +3771,6 @@ snapshots:
dependencies:
'@tauri-apps/api': 2.10.1
- '@tauri-apps/plugin-shell@2.3.5':
- dependencies:
- '@tauri-apps/api': 2.10.1
-
'@tauri-apps/plugin-store@2.4.2':
dependencies:
'@tauri-apps/api': 2.10.1
diff --git a/desktop/src-tauri/locales/en-US/common.json b/desktop/src-tauri/locales/en-US/common.json
index 308910dd..86b8f10c 100644
--- a/desktop/src-tauri/locales/en-US/common.json
+++ b/desktop/src-tauri/locales/en-US/common.json
@@ -115,6 +115,8 @@
"max-speakers": "Max speakers",
"max-text-ctx": "Maximum context",
"max-tokens": "Max Tokens",
+ "max-input-chars": "Max Input Characters",
+ "info-max-input-chars": "Max characters sent to AI per request. Transcripts longer than this are split into chunks, each summarized separately, then merged. Default 24000 ≈ 6000 tokens (fits most local models).",
"microphone": "Microphone",
"modal-close": "Close",
"modal-error-body": "A bug happened!",
@@ -177,6 +179,8 @@
"stop-and-transcribe": "Stop Recording",
"success-action": "Operation succeeded.",
"summarize-loading": "Summarizing",
+ "summarize-chunk-progress": "Summarizing part {{current}} of {{total}}...",
+ "summarize-synthesis": "Merging summaries...",
"summarize-success": "Summarized",
"summary-tab": "Summary",
"support-the-project": "Support Vibe",
diff --git a/desktop/src-tauri/locales/ru-RU/common.json b/desktop/src-tauri/locales/ru-RU/common.json
index eac9c187..43fa7824 100644
--- a/desktop/src-tauri/locales/ru-RU/common.json
+++ b/desktop/src-tauri/locales/ru-RU/common.json
@@ -70,6 +70,7 @@
"info-max-speakers": "How many speakers should be in the file. Used for more precise recognition",
"info-max-text-ctx": "Max context tokens to use from the past text as prompt for the decoder",
"info-max-tokens": "Max tokens for the AI model. Each token is usually considered as a single word. This can save you from sending too much data which costs money. It's recommended to restrict it also in the website.",
+ "info-max-input-chars": "Максимальное количество символов, отправляемых ИИ за один запрос. Если транскрипт длиннее — он разбивается на части, каждая суммаризируется отдельно, затем результаты объединяются. По умолчанию 24000 ≈ 6000 токенов (подходит для большинства локальных моделей).",
"info-normalize-loudness": "Enable this option for better transcription accuracy. It may take up to 8 minutes per hour of audio.",
"info-prompt": "Make transcripts better by writing expected words.",
"info-recognize-speakers": "Detect speaker in each sentence and add it",
@@ -93,6 +94,7 @@
"max-speakers": "Максимальное количество говорящих",
"max-text-ctx": "Максимальный контекст",
"max-tokens": "Максимальное количество токенов",
+ "max-input-chars": "Максимум символов на запрос",
"microphone": "Микрофон",
"modal-close": "Закрыть",
"modal-error-body": "Произошла ошибка!",
@@ -144,7 +146,9 @@
"stop-and-transcribe": "Остановить запись",
"success-action": "Операция выполнена успешно.",
"summarize-loading": "Суммаризация",
- "summarize-success": "Суммиризация завершена",
+ "summarize-chunk-progress": "Суммаризация части {{current}} из {{total}}...",
+ "summarize-synthesis": "Объединение частей...",
+ "summarize-success": "Суммаризация завершена",
"summary-tab": "Саммари",
"support-the-project": "Поддержать Vibe",
"temp-folder": "Временные файлы",
diff --git a/desktop/src-tauri/src/cmd/audio.rs b/desktop/src-tauri/src/cmd/audio.rs
index 0948d6b6..6f8d837b 100644
--- a/desktop/src-tauri/src/cmd/audio.rs
+++ b/desktop/src-tauri/src/cmd/audio.rs
@@ -215,7 +215,7 @@ fn get_output_device_and_config(host: &cpal::Host, audio_device: &AudioDevice) -
let config = device
.default_output_config()
.context("Failed to get default output config")?;
- return Ok((device, config));
+ Ok((device, config))
}
#[cfg(not(target_os = "macos"))]
diff --git a/desktop/src-tauri/src/main.rs b/desktop/src-tauri/src/main.rs
index 98fa5acb..3d5d5326 100644
--- a/desktop/src-tauri/src/main.rs
+++ b/desktop/src-tauri/src/main.rs
@@ -61,7 +61,7 @@ async fn main() -> Result<()> {
.plugin(tauri_plugin_dialog::init())
.plugin(tauri_plugin_updater::Builder::default().build())
.plugin(tauri_plugin_process::init())
-.plugin(tauri_plugin_global_shortcut::Builder::new().build())
+ .plugin(tauri_plugin_global_shortcut::Builder::new().build())
.plugin(tauri_plugin_notification::init());
if analytics::is_aptabase_configured() {
diff --git a/desktop/src/components/params.tsx b/desktop/src/components/params.tsx
index f47c3b0f..2e603951 100644
--- a/desktop/src/components/params.tsx
+++ b/desktop/src/components/params.tsx
@@ -315,6 +315,21 @@ export default function ModelOptions({ options, setOptions }: ParamsProps) {
/>
+
+
+ {t('common.max-input-chars')}
+ >
+ }>
+ setLlmConfig({ ...llmConfig, maxInputChars: Number(e.target.value) || config.llmDefaultMaxInputChars })}
+ />
+
+
diff --git a/desktop/src/lib/config.ts b/desktop/src/lib/config.ts
index 5a6c3457..2028b52d 100644
--- a/desktop/src/lib/config.ts
+++ b/desktop/src/lib/config.ts
@@ -28,6 +28,7 @@ export const vadModelUrl = 'https://huggingface.co/ggml-org/whisper-vad/resolve/
export const llmApiKeyUrl = 'https://console.anthropic.com/settings/keys'
export const llmDefaultMaxTokens = 8192 // https://docs.anthropic.com/en/docs/about-claude/models
+export const llmDefaultMaxInputChars = 24_000 // 24000 chars ≈ 6000 tokens — safe for small local models
export const llmLimitsUrl = 'https://console.anthropic.com/settings/limits'
export const llmCostUrl = 'https://console.anthropic.com/settings/cost'
diff --git a/desktop/src/lib/llm/chunking.ts b/desktop/src/lib/llm/chunking.ts
new file mode 100644
index 00000000..8598a207
--- /dev/null
+++ b/desktop/src/lib/llm/chunking.ts
@@ -0,0 +1,102 @@
+import { llmDefaultMaxInputChars } from '~/lib/config'
+import { type Llm, type LlmConfig } from '~/lib/llm'
+import { asText, type Segment } from '~/lib/transcript'
+
+export type ChunkingProgress = { phase: 'chunk'; current: number; total: number } | { phase: 'synthesis'; total: number }
+
+function splitIntoChunks(segments: Segment[], maxCharsPerChunk: number, speakerLabel: string): Segment[][] {
+ const chunks: Segment[][] = []
+ let current: Segment[] = []
+ let currentLen = 0
+
+ for (const segment of segments) {
+ const segText = (segment.speaker != null ? `[${speakerLabel} ${segment.speaker + 1}] ` : '') + segment.text.trim() + '\n'
+ const segLen = segText.length
+
+ if (segLen > maxCharsPerChunk) {
+ // Oversized single segment — place it alone
+ if (current.length > 0) {
+ chunks.push(current)
+ current = []
+ currentLen = 0
+ }
+ chunks.push([segment])
+ continue
+ }
+
+ if (currentLen + segLen > maxCharsPerChunk && current.length > 0) {
+ chunks.push(current)
+ current = []
+ currentLen = 0
+ }
+ current.push(segment)
+ currentLen += segLen
+ }
+ if (current.length > 0) chunks.push(current)
+ return chunks
+}
+
+function buildChunkPrompt(promptTemplate: string, chunkText: string, previousSummary: string | null, chunkIndex: number, totalChunks: number): string {
+ const base = promptTemplate.replace('%s', chunkText)
+ if (!previousSummary) {
+ return base
+ }
+ return `Summary of previous sections (use as context, do not repeat verbatim):\n${previousSummary}\n\nNow summarize section ${chunkIndex + 1} of ${totalChunks}:\n${base}`
+}
+
+function buildSynthesisPrompt(partials: string[]): string {
+ const combined = partials.map((s, i) => `### Part ${i + 1}\n${s}`).join('\n\n')
+ return `You are combining ${partials.length} partial summaries of consecutive sections of a single transcript into one coherent final summary.
+
+Synthesize them into a unified summary that:
+- Preserves all key topics, decisions, and action items from every part
+- Eliminates repetition
+- Follows the same markdown format as the partial summaries
+- Reads as if the entire transcript were summarized in one pass
+
+${combined}`
+}
+
+export async function summarizeWithChunking(
+ llm: Llm,
+ segments: Segment[],
+ config: LlmConfig,
+ speakerLabel: string,
+ onProgress?: (progress: ChunkingProgress) => void,
+): Promise {
+ const maxInputChars = config.maxInputChars ?? llmDefaultMaxInputChars
+ const promptTemplate = config.prompt
+ const promptOverhead = promptTemplate.replace('%s', '').length
+ const maxCharsPerChunk = maxInputChars - promptOverhead
+
+ const fullText = asText(segments, speakerLabel)
+
+ // Fast path — fits in one request (current behavior)
+ if (fullText.length <= maxCharsPerChunk) {
+ return llm.ask(promptTemplate.replace('%s', fullText))
+ }
+
+ // Chunk mode
+ const chunks = splitIntoChunks(segments, maxCharsPerChunk, speakerLabel)
+
+ if (chunks.length <= 1) {
+ // Edge case: can't split further, send as-is
+ return llm.ask(promptTemplate.replace('%s', fullText))
+ }
+
+ // Summarize each chunk sequentially, passing the previous summary as rolling context
+ const partials: string[] = []
+ let previousSummary: string | null = null
+ for (let i = 0; i < chunks.length; i++) {
+ onProgress?.({ phase: 'chunk', current: i + 1, total: chunks.length })
+ const chunkText = asText(chunks[i], speakerLabel)
+ const prompt = buildChunkPrompt(promptTemplate, chunkText, previousSummary, i, chunks.length)
+ const partial = await llm.ask(prompt)
+ partials.push(partial)
+ previousSummary = partial
+ }
+
+ // Synthesize all partials into a single coherent summary
+ onProgress?.({ phase: 'synthesis', total: chunks.length })
+ return llm.ask(buildSynthesisPrompt(partials))
+}
diff --git a/desktop/src/lib/llm/index.ts b/desktop/src/lib/llm/index.ts
index 0d83d4d9..f64fb552 100644
--- a/desktop/src/lib/llm/index.ts
+++ b/desktop/src/lib/llm/index.ts
@@ -22,6 +22,8 @@ export interface LlmConfig {
// OpenAI Compatible
openaiBaseUrl?: string
openaiApiKey?: string
+
+ maxInputChars?: number // INPUT char budget per LLM request; triggers chunking when exceeded
}
export { Ollama, Claude, OpenAICompatible, defaultClaudeConfig, defaultOllamaConfig, defaultOpenAIConfig }
diff --git a/desktop/src/pages/batch/view-model.tsx b/desktop/src/pages/batch/view-model.tsx
index ccc2af1e..278fd7ef 100644
--- a/desktop/src/pages/batch/view-model.tsx
+++ b/desktop/src/pages/batch/view-model.tsx
@@ -20,7 +20,8 @@ import { usePreferenceProvider } from '~/providers/preference'
import { useFilesContext } from '~/providers/files-provider'
import { basename } from '@tauri-apps/api/path'
import { Claude, Ollama, Llm, OpenAICompatible } from '~/lib/llm'
-import * as transcript from '~/lib/transcript'
+import { summarizeWithChunking } from '~/lib/llm/chunking'
+
import { path } from '@tauri-apps/api'
import { toDocx } from '~/lib/docx'
import { toast } from 'sonner'
@@ -235,8 +236,7 @@ export function viewModel() {
let llmSegments: Segment[] | null = null
if (llm && preference.llmConfig?.enabled) {
try {
- const question = `${preference.llmConfig.prompt.replace('%s', transcript.asText(res.segments, speakerLabel))}`
- const answer = await llm.ask(question)
+ const answer = await summarizeWithChunking(llm!, res.segments, preference.llmConfig, speakerLabel)
if (answer) {
llmSegments = [{ start: 0, stop: res.segments?.[res.segments?.length - 1].stop ?? 0, text: answer }]
}
diff --git a/desktop/src/pages/home/view-model.ts b/desktop/src/pages/home/view-model.ts
index 6dd7a28c..64fcce2f 100644
--- a/desktop/src/pages/home/view-model.ts
+++ b/desktop/src/pages/home/view-model.ts
@@ -18,6 +18,14 @@ import { ensureSystemAudioPermission } from '~/lib/permissions'
import { analyticsEvents, trackAnalyticsEvent } from '~/lib/analytics'
import * as config from '~/lib/config'
import { Claude, Llm, Ollama, OpenAICompatible } from '~/lib/llm'
+import { summarizeWithChunking, type ChunkingProgress } from '~/lib/llm/chunking'
+
+function summarizeProgressMessage(p: ChunkingProgress, t: (key: string, opts?: Record) => string): string {
+ if (p.phase === 'chunk') {
+ return t('common.summarize-chunk-progress', { current: p.current, total: p.total })
+ }
+ return t('common.summarize-synthesis')
+}
import * as transcript from '~/lib/transcript'
import { isUserError } from '~/lib/sona-errors'
import { useConfirmExit } from '~/lib/use-confirm-exit'
@@ -571,21 +579,17 @@ export function viewModel() {
}
if (newSegments && llm && preferenceRef.current.llmConfig?.enabled) {
+ const toastId = hotToast.loading(t('common.summarize-loading'))
try {
- const question = `${preferenceRef.current.llmConfig.prompt.replace('%s', transcript.asText(newSegments, t('common.speaker-prefix')))}`
- const answerPromise = llm.ask(question)
- hotToast.promise(answerPromise, {
- loading: t('common.summarize-loading'),
- error: (error) => {
- return String(error)
- },
- success: t('common.summarize-success'),
+ const answer = await summarizeWithChunking(llm, newSegments, preferenceRef.current.llmConfig, t('common.speaker-prefix'), (p) => {
+ hotToast.loading(summarizeProgressMessage(p, t), { id: toastId })
})
- const answer = await answerPromise
+ hotToast.success(t('common.summarize-success'), { id: toastId })
if (answer) {
setSummarizeSegments([{ start: 0, stop: newSegments?.[newSegments?.length - 1].stop ?? 0, text: answer }])
}
} catch (e) {
+ hotToast.error(String(e), { id: toastId })
console.error(e)
}
}
@@ -596,20 +600,19 @@ export function viewModel() {
async function resummarize(prompt: string) {
if (!segments || !llm) return
setSummarizing(true)
+ const toastId = hotToast.loading(t('common.summarize-loading'))
try {
- const question = prompt.replace('%s', transcript.asText(segments, t('common.speaker-prefix')))
- const answerPromise = llm.ask(question)
- hotToast.promise(answerPromise, {
- loading: t('common.summarize-loading'),
- error: (error) => String(error),
- success: t('common.summarize-success'),
+ const llmConfig = preferenceRef.current.llmConfig
+ const answer = await summarizeWithChunking(llm, segments, { ...llmConfig, prompt }, t('common.speaker-prefix'), (p) => {
+ hotToast.loading(summarizeProgressMessage(p, t), { id: toastId })
})
- const answer = await answerPromise
+ hotToast.success(t('common.summarize-success'), { id: toastId })
if (answer) {
setSummarizeSegments([{ start: 0, stop: segments[segments.length - 1]?.stop ?? 0, text: answer }])
setTranscriptTab('summary')
}
} catch (e) {
+ hotToast.error(String(e), { id: toastId })
console.error(e)
} finally {
setSummarizing(false)
diff --git a/desktop/src/providers/Toast.tsx b/desktop/src/providers/toast.tsx
similarity index 100%
rename from desktop/src/providers/Toast.tsx
rename to desktop/src/providers/toast.tsx