Skip to content
Open
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
78 commits
Select commit Hold shift + click to select a range
135dbd9
feat: add native voice input support (Gemini zero-install + Whisper)
fayerman-source Feb 28, 2026
82a93d6
fix(voice): replace Alt+R/Ctrl+Q with double-space trigger; revert pa…
fayerman-source Mar 3, 2026
deb7f73
fix(voice): enable voice appending and add Ctrl+Space shortcut
fayerman-source Mar 3, 2026
97b657c
fix(voice): resolve Ink whitespace rendering crash
fayerman-source Mar 3, 2026
3c1f6bf
fix(voice): consume double-space characters properly when appending
fayerman-source Mar 3, 2026
5a44007
fix(voice): prevent voice toggle on continuous spacebar hold
fayerman-source Mar 3, 2026
6079a04
test(voice): use createMockConfig utility to fix strict typing violation
fayerman-source Mar 3, 2026
ea161cf
fix(voice): remove Ctrl+Space shortcut per reviewer feedback
fayerman-source Mar 3, 2026
933af01
fix(voice): update stale Alt+R/Ctrl+Q references in docs and UX strings
fayerman-source Mar 3, 2026
0c21255
fix(voice): cleanup duplicate core exports and use default flash mode…
fayerman-source Mar 3, 2026
1736fcd
fix(voice): eliminate layout shift and prevent misfires on hold
fayerman-source Mar 4, 2026
3bd199e
chore: trigger CI build
fayerman-source Mar 4, 2026
86b703e
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 4, 2026
bcd63c3
Merge main into feat/voice-input-clean, resolving snapshot conflict
fayerman-source Mar 6, 2026
21fdfc6
Merge main into feat/voice-input-clean, resolve extensionsCommand con…
fayerman-source Mar 8, 2026
7c98772
fix(ci): resolve linting errors and sync documentation
fayerman-source Mar 9, 2026
ff0b985
Merge main into feat/voice-input-clean, resolving BuiltinCommandLoade…
fayerman-source Mar 9, 2026
3d26763
test(voice): update keyMatchers test to match new voice input behavior
fayerman-source Mar 10, 2026
cee638f
Merge main into feat/voice-input-clean, resolving keybinding and inpu…
fayerman-source Mar 10, 2026
24c72b0
Merge branch 'main' into feat/voice-input-clean
jacob314 Mar 10, 2026
911eaf6
fix(cli): restore non-interactive voice input flows
fayerman-source Mar 10, 2026
bc05702
docs(cli): refresh voice input references
fayerman-source Mar 10, 2026
bd1665f
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 10, 2026
020b1da
fix(cli): resolve keybindings merge drift
fayerman-source Mar 11, 2026
1082384
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Mar 11, 2026
4ac4fd8
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Mar 11, 2026
f58ed15
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 11, 2026
694f6d8
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 11, 2026
e92cc6e
fix: address voice input review feedback and sync docs
fayerman-source Mar 11, 2026
5392617
Merge origin/main into feat/voice-input-clean
fayerman-source Mar 11, 2026
f59415e
fix(voice): resolve regressions in transcription insertion and proces…
fayerman-source Mar 12, 2026
365fdd1
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 12, 2026
a48c73d
chore: remove accidentally committed broken ink snapshots and restore…
fayerman-source Mar 12, 2026
4546044
fix(voice): handle spaces in whisper path and ensure non-interactive …
fayerman-source Mar 12, 2026
b296dd5
Merge remote-tracking branch 'fork/feat/voice-input-clean' into feat/…
fayerman-source Mar 12, 2026
e71bc52
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 12, 2026
656531b
fix(cli): print voice slash command output in headless mode
fayerman-source Mar 12, 2026
2eb05cd
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 12, 2026
c71e42a
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 12, 2026
faa6581
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 13, 2026
d6b147b
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 13, 2026
10e32ce
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 14, 2026
34c7a0f
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 15, 2026
eb1757e
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 16, 2026
19e4b56
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 16, 2026
9353a19
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 17, 2026
96bee19
fix(voice): make cancellation non-blocking and add SIGKILL fallback
fayerman-source Mar 18, 2026
c0763b5
fix(cli): missing FinishReason members
fayerman-source Mar 18, 2026
91b174a
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 18, 2026
e316d6e
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 18, 2026
da0d3b5
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 18, 2026
1677dae
Merge upstream/main and fix conflicts + lint
fayerman-source Mar 19, 2026
27e7c82
Merge upstream/main and fix non-interactive command conflict
fayerman-source Mar 19, 2026
3078d2a
fix(cli): resolve FinishReason build error and missing bundle depende…
fayerman-source Mar 19, 2026
f00d05c
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 20, 2026
1fbc052
docs: regenerate settings documentation and schema
fayerman-source Mar 20, 2026
615ce40
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 21, 2026
fc5aff7
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 21, 2026
2f89ac2
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 22, 2026
414674f
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 23, 2026
d2ed487
fix(cli): update voice tests for async renderHook
fayerman-source Mar 23, 2026
c87dc06
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 23, 2026
52b8352
Merge upstream/main and resolve conflicts
fayerman-source Mar 25, 2026
77b5dbe
chore(pr): drop accidental local artifacts from voice diff
fayerman-source Mar 25, 2026
94739e1
fix(voice): preserve final audio chunk on stop
fayerman-source Mar 25, 2026
015d28e
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 25, 2026
a2f98f5
chore(pr): trim unrelated voice diff noise
fayerman-source Mar 25, 2026
25b4781
fix(ci): resolve voice input lint and test drift
fayerman-source Mar 25, 2026
0fab703
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 25, 2026
644d4dc
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 25, 2026
b3305c5
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 25, 2026
e9a3156
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Mar 27, 2026
70a4a4c
Merge branch 'main' into feat/voice-input-clean
fayerman-source Mar 27, 2026
1b28c60
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Mar 28, 2026
c8ffca7
Merge upstream/main and resolve conflicts
fayerman-source Apr 1, 2026
a05117f
fix(cli): convert dynamic devtoolsService imports to static for robus…
fayerman-source Apr 1, 2026
a72d219
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Apr 4, 2026
d77d640
Merge branch 'main' into feat/voice-input-clean
fayerman-source Apr 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/cli/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ they appear in the UI.
| Error Verbosity | `ui.errorVerbosity` | Controls whether recoverable errors are hidden (low) or fully shown (full). | `"low"` |
| Screen Reader Mode | `ui.accessibility.screenReader` | Render output in plain-text to be more screen reader accessible | `false` |

### Voice

| UI Label | Setting | Description | Default |
| --------------------------- | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------- |
| Enable Voice Input | `voice.enabled` | Enable voice input support. | `false` |
| Transcription Backend | `voice.provider` | Transcription backend: "gemini" (default, zero-install) or "whisper" (local). | `"gemini"` |
| Whisper Binary Path | `voice.whisperPath` | Path to the whisper executable. Only used when provider is "whisper". | `undefined` |
| Silence Detection Threshold | `voice.silenceThreshold` | RMS energy threshold (0–1000) below which audio is discarded as silence. Lower values allow quieter speech such as whispering. 0 disables silence detection. | `80` |

### IDE

| UI Label | Setting | Description | Default |
Expand Down
25 changes: 25 additions & 0 deletions docs/reference/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,31 @@ Slash commands provide meta-level control over the CLI itself.
- **Description:** List configured MCP servers and tools with descriptions
and schemas.

### `/voice`

- **Description:** Manage voice input configuration and inspect current voice
settings.
- **Shortcuts:** Press **Space Space** on an empty prompt to start or stop
recording. Press **Esc** while recording to cancel.
- **Sub-commands:**
- **`enable`**:
- **Description:** Enable voice input.
- **`disable`**:
- **Description:** Disable voice input.
- **`provider [gemini|whisper]`**:
- **Description:** Set the transcription backend.
- **`sensitivity <0-1000>`**:
- **Description:** Set the silence detection threshold. `0` disables silence
filtering.
- **`set-path <path>`**:
- **Description:** Set the path to the Whisper binary when using the local
Whisper backend.
- **`help`**:
- **Description:** Show voice command help.
- **Default behavior:**
- **Description:** Running `/voice` with no sub-command shows the current
voice settings.

### `/memory`

- **Description:** Manage the AI's instructional context (hierarchical memory
Expand Down
28 changes: 28 additions & 0 deletions docs/reference/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,34 @@ their corresponding top-level category object in your `settings.json` file.
- **Default:** `false`
- **Requires restart:** Yes

#### `voice`

**Note:** Voice input is not natively supported in WSL2 (Windows Subsystem for
Linux) due to lack of microphone device pass-through. If you are using Windows,
please run Gemini CLI natively in PowerShell or Command Prompt to use voice
input.

- **`voice.enabled`** (boolean):
- **Description:** Enable voice input support.
- **Default:** `false`

- **`voice.provider`** (enum):
- **Description:** Transcription backend: "gemini" (default, zero-install) or
"whisper" (local).
- **Default:** `"gemini"`
- **Values:** `"gemini"`, `"whisper"`

- **`voice.whisperPath`** (string):
- **Description:** Path to the whisper executable. Only used when provider is
"whisper".
- **Default:** `undefined`

- **`voice.silenceThreshold`** (number):
- **Description:** RMS energy threshold (0–1000) below which audio is
discarded as silence. Lower values allow quieter speech such as whispering.
0 disables silence detection.
- **Default:** `80`

#### `ide`

- **`ide.enabled`** (boolean):
Expand Down
57 changes: 57 additions & 0 deletions packages/cli/src/config/settingsSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,63 @@ const SETTINGS_SCHEMA = {
},
},

voice: {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets add an additional setting that indicates whether voice support is enabled or not.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done — voice.enabled is implemented in settingsSchema.ts and read in AppContainer.tsx. When false, the backend is not initialized and keyboard shortcuts are suppressed entirely (the key handler now checks isEnabled before consuming the keypress).

type: 'object',
label: 'Voice Input',
category: 'General',
requiresRestart: false,
default: {},
description:
'Settings for voice input. Note: Voice input is not natively supported in WSL2 (Windows Subsystem for Linux).',
showInDialog: false,
properties: {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to add showInDialog: false,
Otherwise there is a distracting entry for Voice Input with type Object

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done — added showInDialog: false to the root voice object in settingsSchema.ts and regenerated schemas/settings.schema.json.

enabled: {
type: 'boolean',
label: 'Enable Voice Input',
category: 'General',
requiresRestart: false,
default: false,
description: 'Enable voice input support.',
showInDialog: true,
},
provider: {
type: 'enum',
label: 'Transcription Backend',
category: 'General',
requiresRestart: false,
default: 'gemini',
description:
'Transcription backend: "gemini" (default, zero-install) or "whisper" (local).',
showInDialog: true,
options: [
{ value: 'gemini', label: 'Gemini (Cloud)' },
{ value: 'whisper', label: 'Whisper (Local)' },
],
},
whisperPath: {
type: 'string',
label: 'Whisper Binary Path',
category: 'General',
requiresRestart: false,
default: undefined as string | undefined,
description:
'Path to the whisper executable. Only used when provider is "whisper".',
showInDialog: true,
},
silenceThreshold: {
type: 'number',
label: 'Silence Detection Threshold',
category: 'General',
requiresRestart: false,
default: 80,
description:
'RMS energy threshold (0–1000) below which audio is discarded as silence. ' +
'Lower values allow quieter speech such as whispering. 0 disables silence detection.',
showInDialog: true,
},
},
},

ide: {
type: 'object',
label: 'IDE',
Expand Down
84 changes: 84 additions & 0 deletions packages/cli/src/integration-tests/voiceWhisperPtyRepro.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import { afterEach, describe, expect, it } from 'vitest';
import { TestRig } from '@google/gemini-cli-test-utils';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import stripAnsi from 'strip-ansi';

const __dirname = path.dirname(fileURLToPath(import.meta.url));

describe.skipIf(process.platform === 'win32')('Voice Whisper PTY repro', () => {
let rig: TestRig | undefined;

afterEach(async () => {
await rig?.cleanup();
});

it('repro: whisper can remain stuck in "Speak now..." after Esc in a real PTY session', async () => {
const fakeResponsesPath = path.join(
__dirname,
'../test-utils/fixtures/simple.responses',
);

rig = new TestRig();
rig.setup('voice-whisper-pty-repro', {
fakeResponsesPath,
});

const ignoreSigintScript = rig.createScript(
'ignore-sigint.js',
[
"process.on('SIGINT', () => {});",
'setInterval(() => {}, 1000);',
'',
].join('\n'),
);

rig.createScript(
'sox',
[
'#!/usr/bin/env bash',
`exec "${process.execPath}" "${ignoreSigintScript}" "$@"`,
'',
].join('\n'),
);

// Make the fake recorder executable.
const fs = await import('node:fs/promises');
await fs.chmod(path.join(rig.testDir!, 'sox'), 0o755);

const run = await rig.runInteractive({
env: {
PATH: `${rig.testDir}:${process.env['PATH'] ?? ''}`,
GEMINI_API_KEY: 'test-key',
},
});

const submitCommand = async (command: string) => {
await run.sendKeys(command);
await new Promise((resolve) => setTimeout(resolve, 75));
await run.sendKeys('\r');
};

await submitCommand('/voice provider whisper');
await run.expectText('Voice transcription backend set to: whisper', 10000);

await submitCommand('/voice enable');
await run.expectText('Voice input enabled', 10000);

await run.sendText(' ');
await new Promise((resolve) => setTimeout(resolve, 120));
await run.sendText(' ');
await run.expectText('Speak now...', 10000);

await run.sendText('\u001B');
await new Promise((resolve) => setTimeout(resolve, 500));

expect(stripAnsi(run.output)).toContain('Speak now...');
}, 60000);
});
Loading