google-gemini · fayerman-source · Feb 28, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
@@ -77,6 +77,15 @@ they appear in the UI.
 | Error Verbosity                      | `ui.errorVerbosity`                    | Controls whether recoverable errors are hidden (low) or fully shown (full).                                                                                       | `"low"`  |
 | Screen Reader Mode                   | `ui.accessibility.screenReader`        | Render output in plain-text to be more screen reader accessible                                                                                                   | `false`  |
 
+### Voice
+
+| UI Label                    | Setting                  | Description                                                                                                                                                  | Default     |
+| --------------------------- | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------- |
+| Enable Voice Input          | `voice.enabled`          | Enable voice input support.                                                                                                                                  | `false`     |
+| Transcription Backend       | `voice.provider`         | Transcription backend: "gemini" (default, zero-install) or "whisper" (local).                                                                                | `"gemini"`  |
+| Whisper Binary Path         | `voice.whisperPath`      | Path to the whisper executable. Only used when provider is "whisper".                                                                                        | `undefined` |
+| Silence Detection Threshold | `voice.silenceThreshold` | RMS energy threshold (0–1000) below which audio is discarded as silence. Lower values allow quieter speech such as whispering. 0 disables silence detection. | `80`        |
+
 ### IDE
 
 | UI Label | Setting       | Description                  | Default |

@@ -257,6 +257,31 @@ Slash commands provide meta-level control over the CLI itself.
     - **Description:** List configured MCP servers and tools with descriptions
       and schemas.
 
+### `/voice`
+
+- **Description:** Manage voice input configuration and inspect current voice
+  settings.
+- **Shortcuts:** Press **Space Space** on an empty prompt to start or stop
+  recording. Press **Esc** while recording to cancel.
+- **Sub-commands:**
+  - **`enable`**:
+    - **Description:** Enable voice input.
+  - **`disable`**:
+    - **Description:** Disable voice input.
+  - **`provider [gemini|whisper]`**:
+    - **Description:** Set the transcription backend.
+  - **`sensitivity <0-1000>`**:
+    - **Description:** Set the silence detection threshold. `0` disables silence
+      filtering.
+  - **`set-path <path>`**:
+    - **Description:** Set the path to the Whisper binary when using the local
+      Whisper backend.
+  - **`help`**:
+    - **Description:** Show voice command help.
+  - **Default behavior:**
+    - **Description:** Running `/voice` with no sub-command shows the current
+      voice settings.
+
 ### `/memory`
 
 - **Description:** Manage the AI's instructional context (hierarchical memory

@@ -363,6 +363,34 @@ their corresponding top-level category object in your `settings.json` file.
   - **Default:** `false`
   - **Requires restart:** Yes
 
+#### `voice`
+
+**Note:** Voice input is not natively supported in WSL2 (Windows Subsystem for
+Linux) due to lack of microphone device pass-through. If you are using Windows,
+please run Gemini CLI natively in PowerShell or Command Prompt to use voice
+input.
+
+- **`voice.enabled`** (boolean):
+  - **Description:** Enable voice input support.
+  - **Default:** `false`
+
+- **`voice.provider`** (enum):
+  - **Description:** Transcription backend: "gemini" (default, zero-install) or
+    "whisper" (local).
+  - **Default:** `"gemini"`
+  - **Values:** `"gemini"`, `"whisper"`
+
+- **`voice.whisperPath`** (string):
+  - **Description:** Path to the whisper executable. Only used when provider is
+    "whisper".
+  - **Default:** `undefined`
+
+- **`voice.silenceThreshold`** (number):
+  - **Description:** RMS energy threshold (0–1000) below which audio is
+    discarded as silence. Lower values allow quieter speech such as whispering.
+    0 disables silence detection.
+  - **Default:** `80`
+
 #### `ide`
 
 - **`ide.enabled`** (boolean):

@@ -816,6 +816,63 @@ const SETTINGS_SCHEMA = {
     },
   },
 
+  voice: {
+    type: 'object',
+    label: 'Voice Input',
+    category: 'General',
+    requiresRestart: false,
+    default: {},
+    description:
+      'Settings for voice input. Note: Voice input is not natively supported in WSL2 (Windows Subsystem for Linux).',
+    showInDialog: false,
+    properties: {
+      enabled: {
+        type: 'boolean',
+        label: 'Enable Voice Input',
+        category: 'General',
+        requiresRestart: false,
+        default: false,
+        description: 'Enable voice input support.',
+        showInDialog: true,
+      },
+      provider: {
+        type: 'enum',
+        label: 'Transcription Backend',
+        category: 'General',
+        requiresRestart: false,
+        default: 'gemini',
+        description:
+          'Transcription backend: "gemini" (default, zero-install) or "whisper" (local).',
+        showInDialog: true,
+        options: [
+          { value: 'gemini', label: 'Gemini (Cloud)' },
+          { value: 'whisper', label: 'Whisper (Local)' },
+        ],
+      },
+      whisperPath: {
+        type: 'string',
+        label: 'Whisper Binary Path',
+        category: 'General',
+        requiresRestart: false,
+        default: undefined as string | undefined,
+        description:
+          'Path to the whisper executable. Only used when provider is "whisper".',
+        showInDialog: true,
+      },
+      silenceThreshold: {
+        type: 'number',
+        label: 'Silence Detection Threshold',
+        category: 'General',
+        requiresRestart: false,
+        default: 80,
+        description:
+          'RMS energy threshold (0–1000) below which audio is discarded as silence. ' +
+          'Lower values allow quieter speech such as whispering. 0 disables silence detection.',
+        showInDialog: true,
+      },
+    },
+  },
+
   ide: {
     type: 'object',
     label: 'IDE',

diff --git a/packages/cli/src/integration-tests/voiceWhisperPtyRepro.test.ts b/packages/cli/src/integration-tests/voiceWhisperPtyRepro.test.ts
@@ -0,0 +1,84 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { afterEach, describe, expect, it } from 'vitest';
+import { TestRig } from '@google/gemini-cli-test-utils';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import stripAnsi from 'strip-ansi';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+describe.skipIf(process.platform === 'win32')('Voice Whisper PTY repro', () => {
+  let rig: TestRig | undefined;
+
+  afterEach(async () => {
+    await rig?.cleanup();
+  });
+
+  it('repro: whisper can remain stuck in "Speak now..." after Esc in a real PTY session', async () => {
+    const fakeResponsesPath = path.join(
+      __dirname,
+      '../test-utils/fixtures/simple.responses',
+    );
+
+    rig = new TestRig();
+    rig.setup('voice-whisper-pty-repro', {
+      fakeResponsesPath,
+    });
+
+    const ignoreSigintScript = rig.createScript(
+      'ignore-sigint.js',
+      [
+        "process.on('SIGINT', () => {});",
+        'setInterval(() => {}, 1000);',
+        '',
+      ].join('\n'),
+    );
+
+    rig.createScript(
+      'sox',
+      [
+        '#!/usr/bin/env bash',
+        `exec "${process.execPath}" "${ignoreSigintScript}" "$@"`,
+        '',
+      ].join('\n'),
+    );
+
+    // Make the fake recorder executable.
+    const fs = await import('node:fs/promises');
+    await fs.chmod(path.join(rig.testDir!, 'sox'), 0o755);
+
+    const run = await rig.runInteractive({
+      env: {
+        PATH: `${rig.testDir}:${process.env['PATH'] ?? ''}`,
+        GEMINI_API_KEY: 'test-key',
+      },
+    });
+
+    const submitCommand = async (command: string) => {
+      await run.sendKeys(command);
+      await new Promise((resolve) => setTimeout(resolve, 75));
+      await run.sendKeys('\r');
+    };
+
+    await submitCommand('/voice provider whisper');
+    await run.expectText('Voice transcription backend set to: whisper', 10000);
+
+    await submitCommand('/voice enable');
+    await run.expectText('Voice input enabled', 10000);
+
+    await run.sendText(' ');
+    await new Promise((resolve) => setTimeout(resolve, 120));
+    await run.sendText(' ');
+    await run.expectText('Speak now...', 10000);
+
+    await run.sendText('\u001B');
+    await new Promise((resolve) => setTimeout(resolve, 500));
+
+    expect(stripAnsi(run.output)).toContain('Speak now...');
+  }, 60000);
+});