diff --git a/desktop/pnpm-lock.yaml b/desktop/pnpm-lock.yaml index 41e33b4b..a2cb48e6 100644 --- a/desktop/pnpm-lock.yaml +++ b/desktop/pnpm-lock.yaml @@ -89,9 +89,6 @@ importers: '@tauri-apps/plugin-process': specifier: ~2.3.1 version: 2.3.1 - '@tauri-apps/plugin-shell': - specifier: ~2.3.5 - version: 2.3.5 '@tauri-apps/plugin-store': specifier: ~2.4.2 version: 2.4.2 @@ -1422,9 +1419,6 @@ packages: '@tauri-apps/plugin-process@2.3.1': resolution: {integrity: sha512-nCa4fGVaDL/B9ai03VyPOjfAHRHSBz5v6F/ObsB73r/dA3MHHhZtldaDMIc0V/pnUw9ehzr2iEG+XkSEyC0JJA==} - '@tauri-apps/plugin-shell@2.3.5': - resolution: {integrity: sha512-jewtULhiQ7lI7+owCKAjc8tYLJr92U16bPOeAa472LHJdgaibLP83NcfAF2e+wkEcA53FxKQAZ7byDzs2eeizg==} - '@tauri-apps/plugin-store@2.4.2': resolution: {integrity: sha512-0ClHS50Oq9HEvLPhNzTNFxbWVOqoAp3dRvtewQBeqfIQ0z5m3JRnOISIn2ZVPCrQC0MyGyhTS9DWhHjpigQE7A==} @@ -3777,10 +3771,6 @@ snapshots: dependencies: '@tauri-apps/api': 2.10.1 - '@tauri-apps/plugin-shell@2.3.5': - dependencies: - '@tauri-apps/api': 2.10.1 - '@tauri-apps/plugin-store@2.4.2': dependencies: '@tauri-apps/api': 2.10.1 diff --git a/desktop/src-tauri/locales/en-US/common.json b/desktop/src-tauri/locales/en-US/common.json index 308910dd..86b8f10c 100644 --- a/desktop/src-tauri/locales/en-US/common.json +++ b/desktop/src-tauri/locales/en-US/common.json @@ -115,6 +115,8 @@ "max-speakers": "Max speakers", "max-text-ctx": "Maximum context", "max-tokens": "Max Tokens", + "max-input-chars": "Max Input Characters", + "info-max-input-chars": "Max characters sent to AI per request. Transcripts longer than this are split into chunks, each summarized separately, then merged. Default 24000 ≈ 6000 tokens (fits most local models).", "microphone": "Microphone", "modal-close": "Close", "modal-error-body": "A bug happened!", @@ -177,6 +179,8 @@ "stop-and-transcribe": "Stop Recording", "success-action": "Operation succeeded.", "summarize-loading": "Summarizing", + "summarize-chunk-progress": "Summarizing part {{current}} of {{total}}...", + "summarize-synthesis": "Merging summaries...", "summarize-success": "Summarized", "summary-tab": "Summary", "support-the-project": "Support Vibe", diff --git a/desktop/src-tauri/locales/ru-RU/common.json b/desktop/src-tauri/locales/ru-RU/common.json index eac9c187..43fa7824 100644 --- a/desktop/src-tauri/locales/ru-RU/common.json +++ b/desktop/src-tauri/locales/ru-RU/common.json @@ -70,6 +70,7 @@ "info-max-speakers": "How many speakers should be in the file. Used for more precise recognition", "info-max-text-ctx": "Max context tokens to use from the past text as prompt for the decoder", "info-max-tokens": "Max tokens for the AI model. Each token is usually considered as a single word. This can save you from sending too much data which costs money. It's recommended to restrict it also in the website.", + "info-max-input-chars": "Максимальное количество символов, отправляемых ИИ за один запрос. Если транскрипт длиннее — он разбивается на части, каждая суммаризируется отдельно, затем результаты объединяются. По умолчанию 24000 ≈ 6000 токенов (подходит для большинства локальных моделей).", "info-normalize-loudness": "Enable this option for better transcription accuracy. It may take up to 8 minutes per hour of audio.", "info-prompt": "Make transcripts better by writing expected words.", "info-recognize-speakers": "Detect speaker in each sentence and add it", @@ -93,6 +94,7 @@ "max-speakers": "Максимальное количество говорящих", "max-text-ctx": "Максимальный контекст", "max-tokens": "Максимальное количество токенов", + "max-input-chars": "Максимум символов на запрос", "microphone": "Микрофон", "modal-close": "Закрыть", "modal-error-body": "Произошла ошибка!", @@ -144,7 +146,9 @@ "stop-and-transcribe": "Остановить запись", "success-action": "Операция выполнена успешно.", "summarize-loading": "Суммаризация", - "summarize-success": "Суммиризация завершена", + "summarize-chunk-progress": "Суммаризация части {{current}} из {{total}}...", + "summarize-synthesis": "Объединение частей...", + "summarize-success": "Суммаризация завершена", "summary-tab": "Саммари", "support-the-project": "Поддержать Vibe", "temp-folder": "Временные файлы", diff --git a/desktop/src-tauri/src/cmd/audio.rs b/desktop/src-tauri/src/cmd/audio.rs index 0948d6b6..6f8d837b 100644 --- a/desktop/src-tauri/src/cmd/audio.rs +++ b/desktop/src-tauri/src/cmd/audio.rs @@ -215,7 +215,7 @@ fn get_output_device_and_config(host: &cpal::Host, audio_device: &AudioDevice) - let config = device .default_output_config() .context("Failed to get default output config")?; - return Ok((device, config)); + Ok((device, config)) } #[cfg(not(target_os = "macos"))] diff --git a/desktop/src-tauri/src/main.rs b/desktop/src-tauri/src/main.rs index 98fa5acb..3d5d5326 100644 --- a/desktop/src-tauri/src/main.rs +++ b/desktop/src-tauri/src/main.rs @@ -61,7 +61,7 @@ async fn main() -> Result<()> { .plugin(tauri_plugin_dialog::init()) .plugin(tauri_plugin_updater::Builder::default().build()) .plugin(tauri_plugin_process::init()) -.plugin(tauri_plugin_global_shortcut::Builder::new().build()) + .plugin(tauri_plugin_global_shortcut::Builder::new().build()) .plugin(tauri_plugin_notification::init()); if analytics::is_aptabase_configured() { diff --git a/desktop/src/components/params.tsx b/desktop/src/components/params.tsx index f47c3b0f..2e603951 100644 --- a/desktop/src/components/params.tsx +++ b/desktop/src/components/params.tsx @@ -315,6 +315,21 @@ export default function ModelOptions({ options, setOptions }: ParamsProps) { /> + + + {t('common.max-input-chars')} + + }> + setLlmConfig({ ...llmConfig, maxInputChars: Number(e.target.value) || config.llmDefaultMaxInputChars })} + /> + + diff --git a/desktop/src/lib/config.ts b/desktop/src/lib/config.ts index 5a6c3457..2028b52d 100644 --- a/desktop/src/lib/config.ts +++ b/desktop/src/lib/config.ts @@ -28,6 +28,7 @@ export const vadModelUrl = 'https://huggingface.co/ggml-org/whisper-vad/resolve/ export const llmApiKeyUrl = 'https://console.anthropic.com/settings/keys' export const llmDefaultMaxTokens = 8192 // https://docs.anthropic.com/en/docs/about-claude/models +export const llmDefaultMaxInputChars = 24_000 // 24000 chars ≈ 6000 tokens — safe for small local models export const llmLimitsUrl = 'https://console.anthropic.com/settings/limits' export const llmCostUrl = 'https://console.anthropic.com/settings/cost' diff --git a/desktop/src/lib/llm/chunking.ts b/desktop/src/lib/llm/chunking.ts new file mode 100644 index 00000000..8598a207 --- /dev/null +++ b/desktop/src/lib/llm/chunking.ts @@ -0,0 +1,102 @@ +import { llmDefaultMaxInputChars } from '~/lib/config' +import { type Llm, type LlmConfig } from '~/lib/llm' +import { asText, type Segment } from '~/lib/transcript' + +export type ChunkingProgress = { phase: 'chunk'; current: number; total: number } | { phase: 'synthesis'; total: number } + +function splitIntoChunks(segments: Segment[], maxCharsPerChunk: number, speakerLabel: string): Segment[][] { + const chunks: Segment[][] = [] + let current: Segment[] = [] + let currentLen = 0 + + for (const segment of segments) { + const segText = (segment.speaker != null ? `[${speakerLabel} ${segment.speaker + 1}] ` : '') + segment.text.trim() + '\n' + const segLen = segText.length + + if (segLen > maxCharsPerChunk) { + // Oversized single segment — place it alone + if (current.length > 0) { + chunks.push(current) + current = [] + currentLen = 0 + } + chunks.push([segment]) + continue + } + + if (currentLen + segLen > maxCharsPerChunk && current.length > 0) { + chunks.push(current) + current = [] + currentLen = 0 + } + current.push(segment) + currentLen += segLen + } + if (current.length > 0) chunks.push(current) + return chunks +} + +function buildChunkPrompt(promptTemplate: string, chunkText: string, previousSummary: string | null, chunkIndex: number, totalChunks: number): string { + const base = promptTemplate.replace('%s', chunkText) + if (!previousSummary) { + return base + } + return `Summary of previous sections (use as context, do not repeat verbatim):\n${previousSummary}\n\nNow summarize section ${chunkIndex + 1} of ${totalChunks}:\n${base}` +} + +function buildSynthesisPrompt(partials: string[]): string { + const combined = partials.map((s, i) => `### Part ${i + 1}\n${s}`).join('\n\n') + return `You are combining ${partials.length} partial summaries of consecutive sections of a single transcript into one coherent final summary. + +Synthesize them into a unified summary that: +- Preserves all key topics, decisions, and action items from every part +- Eliminates repetition +- Follows the same markdown format as the partial summaries +- Reads as if the entire transcript were summarized in one pass + +${combined}` +} + +export async function summarizeWithChunking( + llm: Llm, + segments: Segment[], + config: LlmConfig, + speakerLabel: string, + onProgress?: (progress: ChunkingProgress) => void, +): Promise { + const maxInputChars = config.maxInputChars ?? llmDefaultMaxInputChars + const promptTemplate = config.prompt + const promptOverhead = promptTemplate.replace('%s', '').length + const maxCharsPerChunk = maxInputChars - promptOverhead + + const fullText = asText(segments, speakerLabel) + + // Fast path — fits in one request (current behavior) + if (fullText.length <= maxCharsPerChunk) { + return llm.ask(promptTemplate.replace('%s', fullText)) + } + + // Chunk mode + const chunks = splitIntoChunks(segments, maxCharsPerChunk, speakerLabel) + + if (chunks.length <= 1) { + // Edge case: can't split further, send as-is + return llm.ask(promptTemplate.replace('%s', fullText)) + } + + // Summarize each chunk sequentially, passing the previous summary as rolling context + const partials: string[] = [] + let previousSummary: string | null = null + for (let i = 0; i < chunks.length; i++) { + onProgress?.({ phase: 'chunk', current: i + 1, total: chunks.length }) + const chunkText = asText(chunks[i], speakerLabel) + const prompt = buildChunkPrompt(promptTemplate, chunkText, previousSummary, i, chunks.length) + const partial = await llm.ask(prompt) + partials.push(partial) + previousSummary = partial + } + + // Synthesize all partials into a single coherent summary + onProgress?.({ phase: 'synthesis', total: chunks.length }) + return llm.ask(buildSynthesisPrompt(partials)) +} diff --git a/desktop/src/lib/llm/index.ts b/desktop/src/lib/llm/index.ts index 0d83d4d9..f64fb552 100644 --- a/desktop/src/lib/llm/index.ts +++ b/desktop/src/lib/llm/index.ts @@ -22,6 +22,8 @@ export interface LlmConfig { // OpenAI Compatible openaiBaseUrl?: string openaiApiKey?: string + + maxInputChars?: number // INPUT char budget per LLM request; triggers chunking when exceeded } export { Ollama, Claude, OpenAICompatible, defaultClaudeConfig, defaultOllamaConfig, defaultOpenAIConfig } diff --git a/desktop/src/pages/batch/view-model.tsx b/desktop/src/pages/batch/view-model.tsx index ccc2af1e..278fd7ef 100644 --- a/desktop/src/pages/batch/view-model.tsx +++ b/desktop/src/pages/batch/view-model.tsx @@ -20,7 +20,8 @@ import { usePreferenceProvider } from '~/providers/preference' import { useFilesContext } from '~/providers/files-provider' import { basename } from '@tauri-apps/api/path' import { Claude, Ollama, Llm, OpenAICompatible } from '~/lib/llm' -import * as transcript from '~/lib/transcript' +import { summarizeWithChunking } from '~/lib/llm/chunking' + import { path } from '@tauri-apps/api' import { toDocx } from '~/lib/docx' import { toast } from 'sonner' @@ -235,8 +236,7 @@ export function viewModel() { let llmSegments: Segment[] | null = null if (llm && preference.llmConfig?.enabled) { try { - const question = `${preference.llmConfig.prompt.replace('%s', transcript.asText(res.segments, speakerLabel))}` - const answer = await llm.ask(question) + const answer = await summarizeWithChunking(llm!, res.segments, preference.llmConfig, speakerLabel) if (answer) { llmSegments = [{ start: 0, stop: res.segments?.[res.segments?.length - 1].stop ?? 0, text: answer }] } diff --git a/desktop/src/pages/home/view-model.ts b/desktop/src/pages/home/view-model.ts index 6dd7a28c..64fcce2f 100644 --- a/desktop/src/pages/home/view-model.ts +++ b/desktop/src/pages/home/view-model.ts @@ -18,6 +18,14 @@ import { ensureSystemAudioPermission } from '~/lib/permissions' import { analyticsEvents, trackAnalyticsEvent } from '~/lib/analytics' import * as config from '~/lib/config' import { Claude, Llm, Ollama, OpenAICompatible } from '~/lib/llm' +import { summarizeWithChunking, type ChunkingProgress } from '~/lib/llm/chunking' + +function summarizeProgressMessage(p: ChunkingProgress, t: (key: string, opts?: Record) => string): string { + if (p.phase === 'chunk') { + return t('common.summarize-chunk-progress', { current: p.current, total: p.total }) + } + return t('common.summarize-synthesis') +} import * as transcript from '~/lib/transcript' import { isUserError } from '~/lib/sona-errors' import { useConfirmExit } from '~/lib/use-confirm-exit' @@ -571,21 +579,17 @@ export function viewModel() { } if (newSegments && llm && preferenceRef.current.llmConfig?.enabled) { + const toastId = hotToast.loading(t('common.summarize-loading')) try { - const question = `${preferenceRef.current.llmConfig.prompt.replace('%s', transcript.asText(newSegments, t('common.speaker-prefix')))}` - const answerPromise = llm.ask(question) - hotToast.promise(answerPromise, { - loading: t('common.summarize-loading'), - error: (error) => { - return String(error) - }, - success: t('common.summarize-success'), + const answer = await summarizeWithChunking(llm, newSegments, preferenceRef.current.llmConfig, t('common.speaker-prefix'), (p) => { + hotToast.loading(summarizeProgressMessage(p, t), { id: toastId }) }) - const answer = await answerPromise + hotToast.success(t('common.summarize-success'), { id: toastId }) if (answer) { setSummarizeSegments([{ start: 0, stop: newSegments?.[newSegments?.length - 1].stop ?? 0, text: answer }]) } } catch (e) { + hotToast.error(String(e), { id: toastId }) console.error(e) } } @@ -596,20 +600,19 @@ export function viewModel() { async function resummarize(prompt: string) { if (!segments || !llm) return setSummarizing(true) + const toastId = hotToast.loading(t('common.summarize-loading')) try { - const question = prompt.replace('%s', transcript.asText(segments, t('common.speaker-prefix'))) - const answerPromise = llm.ask(question) - hotToast.promise(answerPromise, { - loading: t('common.summarize-loading'), - error: (error) => String(error), - success: t('common.summarize-success'), + const llmConfig = preferenceRef.current.llmConfig + const answer = await summarizeWithChunking(llm, segments, { ...llmConfig, prompt }, t('common.speaker-prefix'), (p) => { + hotToast.loading(summarizeProgressMessage(p, t), { id: toastId }) }) - const answer = await answerPromise + hotToast.success(t('common.summarize-success'), { id: toastId }) if (answer) { setSummarizeSegments([{ start: 0, stop: segments[segments.length - 1]?.stop ?? 0, text: answer }]) setTranscriptTab('summary') } } catch (e) { + hotToast.error(String(e), { id: toastId }) console.error(e) } finally { setSummarizing(false) diff --git a/desktop/src/providers/Toast.tsx b/desktop/src/providers/toast.tsx similarity index 100% rename from desktop/src/providers/Toast.tsx rename to desktop/src/providers/toast.tsx