Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions desktop/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions desktop/src-tauri/locales/en-US/common.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@
"max-speakers": "Max speakers",
"max-text-ctx": "Maximum context",
"max-tokens": "Max Tokens",
"max-input-chars": "Max Input Characters",
"info-max-input-chars": "Max characters sent to AI per request. Transcripts longer than this are split into chunks, each summarized separately, then merged. Default 24000 ≈ 6000 tokens (fits most local models).",
"microphone": "Microphone",
"modal-close": "Close",
"modal-error-body": "A bug happened!",
Expand Down Expand Up @@ -177,6 +179,8 @@
"stop-and-transcribe": "Stop Recording",
"success-action": "Operation succeeded.",
"summarize-loading": "Summarizing",
"summarize-chunk-progress": "Summarizing part {{current}} of {{total}}...",
"summarize-synthesis": "Merging summaries...",
"summarize-success": "Summarized",
"summary-tab": "Summary",
"support-the-project": "Support Vibe",
Expand Down
6 changes: 5 additions & 1 deletion desktop/src-tauri/locales/ru-RU/common.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
"info-max-speakers": "How many speakers should be in the file. Used for more precise recognition",
"info-max-text-ctx": "Max context tokens to use from the past text as prompt for the decoder",
"info-max-tokens": "Max tokens for the AI model. Each token is usually considered as a single word. This can save you from sending too much data which costs money. It's recommended to restrict it also in the website.",
"info-max-input-chars": "Максимальное количество символов, отправляемых ИИ за один запрос. Если транскрипт длиннее — он разбивается на части, каждая суммаризируется отдельно, затем результаты объединяются. По умолчанию 24000 ≈ 6000 токенов (подходит для большинства локальных моделей).",
"info-normalize-loudness": "Enable this option for better transcription accuracy. It may take up to 8 minutes per hour of audio.",
"info-prompt": "Make transcripts better by writing expected words.",
"info-recognize-speakers": "Detect speaker in each sentence and add it",
Expand All @@ -93,6 +94,7 @@
"max-speakers": "Максимальное количество говорящих",
"max-text-ctx": "Максимальный контекст",
"max-tokens": "Максимальное количество токенов",
"max-input-chars": "Максимум символов на запрос",
"microphone": "Микрофон",
"modal-close": "Закрыть",
"modal-error-body": "Произошла ошибка!",
Expand Down Expand Up @@ -144,7 +146,9 @@
"stop-and-transcribe": "Остановить запись",
"success-action": "Операция выполнена успешно.",
"summarize-loading": "Суммаризация",
"summarize-success": "Суммиризация завершена",
"summarize-chunk-progress": "Суммаризация части {{current}} из {{total}}...",
"summarize-synthesis": "Объединение частей...",
"summarize-success": "Суммаризация завершена",
"summary-tab": "Саммари",
"support-the-project": "Поддержать Vibe",
"temp-folder": "Временные файлы",
Expand Down
2 changes: 1 addition & 1 deletion desktop/src-tauri/src/cmd/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ fn get_output_device_and_config(host: &cpal::Host, audio_device: &AudioDevice) -
let config = device
.default_output_config()
.context("Failed to get default output config")?;
return Ok((device, config));
Ok((device, config))
}

#[cfg(not(target_os = "macos"))]
Expand Down
2 changes: 1 addition & 1 deletion desktop/src-tauri/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ async fn main() -> Result<()> {
.plugin(tauri_plugin_dialog::init())
.plugin(tauri_plugin_updater::Builder::default().build())
.plugin(tauri_plugin_process::init())
.plugin(tauri_plugin_global_shortcut::Builder::new().build())
.plugin(tauri_plugin_global_shortcut::Builder::new().build())
.plugin(tauri_plugin_notification::init());

if analytics::is_aptabase_configured() {
Expand Down
15 changes: 15 additions & 0 deletions desktop/src/components/params.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,21 @@ export default function ModelOptions({ options, setOptions }: ParamsProps) {
/>
</Field>

<Field
label={
<>
<InfoTooltip text={t('common.info-max-input-chars')} />
{t('common.max-input-chars')}
</>
}>
<Input
type="number"
placeholder={String(config.llmDefaultMaxInputChars)}
value={llmConfig?.maxInputChars ?? config.llmDefaultMaxInputChars}
onChange={(e) => setLlmConfig({ ...llmConfig, maxInputChars: Number(e.target.value) || config.llmDefaultMaxInputChars })}
/>
</Field>

<Button onClick={checkLlm} size="sm" className="w-full">
{t('common.run-llm-check')}
</Button>
Expand Down
1 change: 1 addition & 0 deletions desktop/src/lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export const vadModelUrl = 'https://huggingface.co/ggml-org/whisper-vad/resolve/

export const llmApiKeyUrl = 'https://console.anthropic.com/settings/keys'
export const llmDefaultMaxTokens = 8192 // https://docs.anthropic.com/en/docs/about-claude/models
export const llmDefaultMaxInputChars = 24_000 // 24000 chars ≈ 6000 tokens — safe for small local models
export const llmLimitsUrl = 'https://console.anthropic.com/settings/limits'
export const llmCostUrl = 'https://console.anthropic.com/settings/cost'

Expand Down
102 changes: 102 additions & 0 deletions desktop/src/lib/llm/chunking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { llmDefaultMaxInputChars } from '~/lib/config'
import { type Llm, type LlmConfig } from '~/lib/llm'
import { asText, type Segment } from '~/lib/transcript'

export type ChunkingProgress = { phase: 'chunk'; current: number; total: number } | { phase: 'synthesis'; total: number }

function splitIntoChunks(segments: Segment[], maxCharsPerChunk: number, speakerLabel: string): Segment[][] {
const chunks: Segment[][] = []
let current: Segment[] = []
let currentLen = 0

for (const segment of segments) {
const segText = (segment.speaker != null ? `[${speakerLabel} ${segment.speaker + 1}] ` : '') + segment.text.trim() + '\n'
const segLen = segText.length

if (segLen > maxCharsPerChunk) {
// Oversized single segment — place it alone
if (current.length > 0) {
chunks.push(current)
current = []
currentLen = 0
}
chunks.push([segment])
continue
}

if (currentLen + segLen > maxCharsPerChunk && current.length > 0) {
chunks.push(current)
current = []
currentLen = 0
}
current.push(segment)
currentLen += segLen
}
if (current.length > 0) chunks.push(current)
return chunks
}

function buildChunkPrompt(promptTemplate: string, chunkText: string, previousSummary: string | null, chunkIndex: number, totalChunks: number): string {
const base = promptTemplate.replace('%s', chunkText)
if (!previousSummary) {
return base
}
return `Summary of previous sections (use as context, do not repeat verbatim):\n${previousSummary}\n\nNow summarize section ${chunkIndex + 1} of ${totalChunks}:\n${base}`
}

function buildSynthesisPrompt(partials: string[]): string {
const combined = partials.map((s, i) => `### Part ${i + 1}\n${s}`).join('\n\n')
return `You are combining ${partials.length} partial summaries of consecutive sections of a single transcript into one coherent final summary.

Synthesize them into a unified summary that:
- Preserves all key topics, decisions, and action items from every part
- Eliminates repetition
- Follows the same markdown format as the partial summaries
- Reads as if the entire transcript were summarized in one pass

${combined}`
}

export async function summarizeWithChunking(
llm: Llm,
segments: Segment[],
config: LlmConfig,
speakerLabel: string,
onProgress?: (progress: ChunkingProgress) => void,
): Promise<string> {
const maxInputChars = config.maxInputChars ?? llmDefaultMaxInputChars
const promptTemplate = config.prompt
const promptOverhead = promptTemplate.replace('%s', '').length
const maxCharsPerChunk = maxInputChars - promptOverhead

const fullText = asText(segments, speakerLabel)

// Fast path — fits in one request (current behavior)
if (fullText.length <= maxCharsPerChunk) {
return llm.ask(promptTemplate.replace('%s', fullText))
}

// Chunk mode
const chunks = splitIntoChunks(segments, maxCharsPerChunk, speakerLabel)

if (chunks.length <= 1) {
// Edge case: can't split further, send as-is
return llm.ask(promptTemplate.replace('%s', fullText))
}

// Summarize each chunk sequentially, passing the previous summary as rolling context
const partials: string[] = []
let previousSummary: string | null = null
for (let i = 0; i < chunks.length; i++) {
onProgress?.({ phase: 'chunk', current: i + 1, total: chunks.length })
const chunkText = asText(chunks[i], speakerLabel)
const prompt = buildChunkPrompt(promptTemplate, chunkText, previousSummary, i, chunks.length)
const partial = await llm.ask(prompt)
partials.push(partial)
previousSummary = partial
}

// Synthesize all partials into a single coherent summary
onProgress?.({ phase: 'synthesis', total: chunks.length })
return llm.ask(buildSynthesisPrompt(partials))
}
2 changes: 2 additions & 0 deletions desktop/src/lib/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ export interface LlmConfig {
// OpenAI Compatible
openaiBaseUrl?: string
openaiApiKey?: string

maxInputChars?: number // INPUT char budget per LLM request; triggers chunking when exceeded
}

export { Ollama, Claude, OpenAICompatible, defaultClaudeConfig, defaultOllamaConfig, defaultOpenAIConfig }
6 changes: 3 additions & 3 deletions desktop/src/pages/batch/view-model.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import { usePreferenceProvider } from '~/providers/preference'
import { useFilesContext } from '~/providers/files-provider'
import { basename } from '@tauri-apps/api/path'
import { Claude, Ollama, Llm, OpenAICompatible } from '~/lib/llm'
import * as transcript from '~/lib/transcript'
import { summarizeWithChunking } from '~/lib/llm/chunking'

import { path } from '@tauri-apps/api'
import { toDocx } from '~/lib/docx'
import { toast } from 'sonner'
Expand Down Expand Up @@ -235,8 +236,7 @@ export function viewModel() {
let llmSegments: Segment[] | null = null
if (llm && preference.llmConfig?.enabled) {
try {
const question = `${preference.llmConfig.prompt.replace('%s', transcript.asText(res.segments, speakerLabel))}`
const answer = await llm.ask(question)
const answer = await summarizeWithChunking(llm!, res.segments, preference.llmConfig, speakerLabel)
if (answer) {
llmSegments = [{ start: 0, stop: res.segments?.[res.segments?.length - 1].stop ?? 0, text: answer }]
}
Expand Down
35 changes: 19 additions & 16 deletions desktop/src/pages/home/view-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ import { ensureSystemAudioPermission } from '~/lib/permissions'
import { analyticsEvents, trackAnalyticsEvent } from '~/lib/analytics'
import * as config from '~/lib/config'
import { Claude, Llm, Ollama, OpenAICompatible } from '~/lib/llm'
import { summarizeWithChunking, type ChunkingProgress } from '~/lib/llm/chunking'

function summarizeProgressMessage(p: ChunkingProgress, t: (key: string, opts?: Record<string, unknown>) => string): string {
if (p.phase === 'chunk') {
return t('common.summarize-chunk-progress', { current: p.current, total: p.total })
}
return t('common.summarize-synthesis')
}
import * as transcript from '~/lib/transcript'
import { isUserError } from '~/lib/sona-errors'
import { useConfirmExit } from '~/lib/use-confirm-exit'
Expand Down Expand Up @@ -571,21 +579,17 @@ export function viewModel() {
}

if (newSegments && llm && preferenceRef.current.llmConfig?.enabled) {
const toastId = hotToast.loading(t('common.summarize-loading'))
try {
const question = `${preferenceRef.current.llmConfig.prompt.replace('%s', transcript.asText(newSegments, t('common.speaker-prefix')))}`
const answerPromise = llm.ask(question)
hotToast.promise(answerPromise, {
loading: t('common.summarize-loading'),
error: (error) => {
return String(error)
},
success: t('common.summarize-success'),
const answer = await summarizeWithChunking(llm, newSegments, preferenceRef.current.llmConfig, t('common.speaker-prefix'), (p) => {
hotToast.loading(summarizeProgressMessage(p, t), { id: toastId })
})
const answer = await answerPromise
hotToast.success(t('common.summarize-success'), { id: toastId })
if (answer) {
setSummarizeSegments([{ start: 0, stop: newSegments?.[newSegments?.length - 1].stop ?? 0, text: answer }])
}
} catch (e) {
hotToast.error(String(e), { id: toastId })
console.error(e)
}
}
Expand All @@ -596,20 +600,19 @@ export function viewModel() {
async function resummarize(prompt: string) {
if (!segments || !llm) return
setSummarizing(true)
const toastId = hotToast.loading(t('common.summarize-loading'))
try {
const question = prompt.replace('%s', transcript.asText(segments, t('common.speaker-prefix')))
const answerPromise = llm.ask(question)
hotToast.promise(answerPromise, {
loading: t('common.summarize-loading'),
error: (error) => String(error),
success: t('common.summarize-success'),
const llmConfig = preferenceRef.current.llmConfig
const answer = await summarizeWithChunking(llm, segments, { ...llmConfig, prompt }, t('common.speaker-prefix'), (p) => {
hotToast.loading(summarizeProgressMessage(p, t), { id: toastId })
})
const answer = await answerPromise
hotToast.success(t('common.summarize-success'), { id: toastId })
if (answer) {
setSummarizeSegments([{ start: 0, stop: segments[segments.length - 1]?.stop ?? 0, text: answer }])
setTranscriptTab('summary')
}
} catch (e) {
hotToast.error(String(e), { id: toastId })
console.error(e)
} finally {
setSummarizing(false)
Expand Down
File renamed without changes.
Loading