diff --git a/.changeset/chatty-comics-lie.md b/.changeset/chatty-comics-lie.md new file mode 100644 index 0000000000..65a55289ad --- /dev/null +++ b/.changeset/chatty-comics-lie.md @@ -0,0 +1,7 @@ +--- +"@gradio/audio": minor +"@gradio/video": minor +"gradio": minor +--- + +feat:Add `playback_position` to gr.Audio and gr.Video, which can be updated and read diff --git a/demo/playback_position/run.ipynb b/demo/playback_position/run.ipynb new file mode 100644 index 0000000000..2617f6d73f --- /dev/null +++ b/demo/playback_position/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: playback_position"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/playback_position/sax.wav\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/playback_position/world.mp4"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from gradio.media import get_audio, get_video\n", "\n", "# Get the directory where this script is located\n", "with gr.Blocks() as demo:\n", " with gr.Tab(\"Audio\"):\n", " gr.Markdown(\"## Audio Playback Position\")\n", " gr.Markdown(\"Click the button to see the current playback position of the audio.\")\n", "\n", " audio = gr.Audio(\n", " value=get_audio(\"sax.wav\"),\n", " playback_position=2.0,\n", " elem_id=\"audio\",\n", " )\n", " audio_btn = gr.Button(\"Get Audio Playback Position\")\n", " audio_position = gr.Number(label=\"Current Audio Position (seconds)\")\n", "\n", " def print_audio_playback_pos(a: gr.Audio):\n", " return a.playback_position\n", "\n", " audio_btn.click(print_audio_playback_pos, inputs=audio, outputs=audio_position)\n", "\n", " set_audio_time_btn = gr.Button(\"Set Audio Playback Position to 10 seconds\")\n", " def set_audio_playback_pos():\n", " return gr.Audio(playback_position=10.0)\n", " \n", " set_audio_time_btn.click(set_audio_playback_pos, outputs=audio)\n", "\n", " with gr.Tab(\"Video\"):\n", " gr.Markdown(\"## Video Playback Position\")\n", " gr.Markdown(\"Click the button to see the current playback position of the video.\")\n", "\n", " video = gr.Video(\n", " value=get_video(\"world.mp4\"),\n", " playback_position=5.0,\n", " elem_id=\"video\",\n", " )\n", " video_btn = gr.Button(\"Get Video Playback Position\")\n", " video_position = gr.Number(label=\"Current Video Position (seconds)\")\n", "\n", " def print_video_playback_pos(v: gr.Video):\n", " return v.playback_position\n", "\n", " video_btn.click(print_video_playback_pos, inputs=video, outputs=video_position)\n", "\n", " set_video_time_btn = gr.Button(\"Set Video Playback Position to 8 seconds\")\n", " def set_video_playback_pos():\n", " return gr.Video(playback_position=8.0)\n", " \n", " set_video_time_btn.click(set_video_playback_pos, outputs=video) \n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/playback_position/run.py b/demo/playback_position/run.py new file mode 100644 index 0000000000..677496b1e6 --- /dev/null +++ b/demo/playback_position/run.py @@ -0,0 +1,53 @@ +import gradio as gr +from gradio.media import get_audio, get_video + +# Get the directory where this script is located +with gr.Blocks() as demo: + with gr.Tab("Audio"): + gr.Markdown("## Audio Playback Position") + gr.Markdown("Click the button to see the current playback position of the audio.") + + audio = gr.Audio( + value=get_audio("sax.wav"), + playback_position=2.0, + elem_id="audio", + ) + audio_btn = gr.Button("Get Audio Playback Position") + audio_position = gr.Number(label="Current Audio Position (seconds)") + + def print_audio_playback_pos(a: gr.Audio): + return a.playback_position + + audio_btn.click(print_audio_playback_pos, inputs=audio, outputs=audio_position) + + set_audio_time_btn = gr.Button("Set Audio Playback Position to 10 seconds") + def set_audio_playback_pos(): + return gr.Audio(playback_position=10.0) + + set_audio_time_btn.click(set_audio_playback_pos, outputs=audio) + + with gr.Tab("Video"): + gr.Markdown("## Video Playback Position") + gr.Markdown("Click the button to see the current playback position of the video.") + + video = gr.Video( + value=get_video("world.mp4"), + playback_position=5.0, + elem_id="video", + ) + video_btn = gr.Button("Get Video Playback Position") + video_position = gr.Number(label="Current Video Position (seconds)") + + def print_video_playback_pos(v: gr.Video): + return v.playback_position + + video_btn.click(print_video_playback_pos, inputs=video, outputs=video_position) + + set_video_time_btn = gr.Button("Set Video Playback Position to 8 seconds") + def set_video_playback_pos(): + return gr.Video(playback_position=8.0) + + set_video_time_btn.click(set_video_playback_pos, outputs=video) + +if __name__ == "__main__": + demo.launch() diff --git a/demo/playback_position/sax.wav b/demo/playback_position/sax.wav new file mode 100644 index 0000000000..de6d16abb0 Binary files /dev/null and b/demo/playback_position/sax.wav differ diff --git a/demo/playback_position/world.mp4 b/demo/playback_position/world.mp4 new file mode 100644 index 0000000000..b11552f9cb Binary files /dev/null and b/demo/playback_position/world.mp4 differ diff --git a/gradio/components/audio.py b/gradio/components/audio.py index 6c1114dc4a..402fd3c911 100644 --- a/gradio/components/audio.py +++ b/gradio/components/audio.py @@ -108,6 +108,7 @@ def __init__( loop: bool = False, recording: bool = False, subtitles: str | Path | list[dict[str, Any]] | None = None, + playback_position: float = 0, ): """ Parameters: @@ -137,6 +138,7 @@ def __init__( loop: If True, the audio will loop when it reaches the end and continue playing from the beginning. recording: If True, the audio component will be set to record audio from the microphone if the source is set to "microphone". Defaults to False. subtitles: A subtitle file (srt, vtt, or json) for the audio, or a list of subtitle dictionaries in the format [{"text": str, "timestamp": [start, end]}] where timestamps are in seconds. JSON files should contain an array of subtitle objects. + playback_position: The starting playback position in seconds. This value is also updated as the audio plays, reflecting the current playback position. """ valid_sources: list[Literal["upload", "microphone"]] = ["upload", "microphone"] if sources is None: @@ -182,6 +184,7 @@ def __init__( else: self.waveform_options = waveform_options self.recording = recording + self.playback_position = playback_position super().__init__( label=label, every=every, diff --git a/gradio/components/video.py b/gradio/components/video.py index 26ad5733b6..41588790a5 100644 --- a/gradio/components/video.py +++ b/gradio/components/video.py @@ -88,6 +88,7 @@ def __init__( streaming: bool = False, watermark: WatermarkOptions | None = None, subtitles: str | Path | list[dict[str, Any]] | None = None, + playback_position: float = 0, ): """ Parameters: @@ -118,6 +119,7 @@ def __init__( watermark: A `gr.WatermarkOptions` instance that includes an image file and position to be used as a watermark on the video. The image is not scaled and is displayed on the provided position on the video. Valid formats for the image are: jpeg, png. webcam_options: A `gr.WebcamOptions` instance that allows developers to specify custom media constraints for the webcam stream. This parameter provides flexibility to control the video stream's properties, such as resolution and front or rear camera on mobile devices. See $demo/webcam_constraints subtitles: A subtitle file (srt, vtt, or json) for the video, or a list of subtitle dictionaries in the format [{"text": str, "timestamp": [start, end]}] where timestamps are in seconds. JSON files should contain an array of subtitle objects. + playback_position: The starting playback position in seconds. This value is also updated as the video plays, reflecting the current playback position. """ valid_sources: list[Literal["upload", "webcam"]] = ["upload", "webcam"] if sources is None: @@ -156,6 +158,7 @@ def __init__( ) self.buttons = buttons self.streaming = streaming + self.playback_position = playback_position self.subtitles = None if subtitles is not None: if isinstance(subtitles, list): diff --git a/gradio/media_assets/audio/sax.wav b/gradio/media_assets/audio/sax.wav new file mode 100644 index 0000000000..de6d16abb0 Binary files /dev/null and b/gradio/media_assets/audio/sax.wav differ diff --git a/gradio/templates.py b/gradio/templates.py index c8d46526e8..b21ce651eb 100644 --- a/gradio/templates.py +++ b/gradio/templates.py @@ -401,6 +401,7 @@ def __init__( streaming: bool = False, watermark: str | Path | None = None, subtitles: str | Path | None = None, + playback_position: int = 0, ): sources = ["upload"] super().__init__( @@ -431,6 +432,7 @@ def __init__( watermark=watermark, webcam_options=webcam_options, subtitles=subtitles, + playback_position=playback_position, ) @@ -479,6 +481,7 @@ def __init__( loop: bool = False, recording: bool = False, subtitles: str | Path | None = None, + playback_position: int = 0, ): sources = ["microphone"] super().__init__( @@ -508,6 +511,7 @@ def __init__( loop=loop, recording=recording, subtitles=subtitles, + playback_position=playback_position, ) diff --git a/js/audio/Index.svelte b/js/audio/Index.svelte index acf118e984..6f7e463978 100644 --- a/js/audio/Index.svelte +++ b/js/audio/Index.svelte @@ -147,6 +147,7 @@ waveform_options={gradio.props.waveform_options} editable={gradio.props.editable} {minimal} + bind:playback_position={gradio.props.playback_position} on:share={(e) => gradio.dispatch("share", e.detail)} on:error={(e) => gradio.dispatch("error", e.detail)} on:play={() => gradio.dispatch("play")} @@ -201,6 +202,7 @@ {handle_reset_value} editable={gradio.props.editable} bind:dragging + bind:playback_position={gradio.props.playback_position} on:edit={() => gradio.dispatch("edit")} on:play={() => gradio.dispatch("play")} on:pause={() => gradio.dispatch("pause")} diff --git a/js/audio/interactive/InteractiveAudio.svelte b/js/audio/interactive/InteractiveAudio.svelte index c7255bb95a..50e4a57b29 100644 --- a/js/audio/interactive/InteractiveAudio.svelte +++ b/js/audio/interactive/InteractiveAudio.svelte @@ -47,6 +47,7 @@ export let class_name = ""; export let upload_promise: Promise | null = null; export let initial_value: FileData | null = null; + export let playback_position = 0; export let time_limit: number | null = null; export let stream_state: "open" | "waiting" | "closed" = "closed"; @@ -314,6 +315,7 @@ {handle_reset_value} {editable} {loop} + bind:playback_position interactive on:stop on:play diff --git a/js/audio/player/AudioPlayer.svelte b/js/audio/player/AudioPlayer.svelte index 1e20239f64..ce8190c292 100644 --- a/js/audio/player/AudioPlayer.svelte +++ b/js/audio/player/AudioPlayer.svelte @@ -29,9 +29,12 @@ export let mode = ""; export let loop: boolean; export let handle_reset_value: () => void = () => {}; + export let playback_position = 0; + let old_playback_position = 0; let container: HTMLDivElement; let waveform: WaveSurfer | undefined; + let waveform_ready = false; let waveform_component_wrapper: HTMLDivElement; let playing = false; @@ -62,6 +65,15 @@ $: use_waveform = waveform_options.show_recording_waveform && !value?.is_stream; + $: if ( + waveform_ready && + old_playback_position !== playback_position && + audio_duration + ) { + waveform?.seekTo(playback_position / audio_duration); + old_playback_position = playback_position; + } + const create_waveform = (): void => { waveform = WaveSurfer.create({ container: container, @@ -85,18 +97,24 @@ durationRef && (durationRef.textContent = format_time(duration)); }); - waveform?.on( - "timeupdate", - (currentTime: any) => - timeRef && (timeRef.textContent = format_time(currentTime)) - ); + let firstTimeUpdate = true; + waveform?.on("timeupdate", (currentTime: any) => { + timeRef && (timeRef.textContent = format_time(currentTime)); + if (firstTimeUpdate) { + firstTimeUpdate = false; + return; + } + old_playback_position = playback_position = currentTime; + }); waveform?.on("interaction", () => { const currentTime = waveform?.getCurrentTime() || 0; timeRef && (timeRef.textContent = format_time(currentTime)); + old_playback_position = playback_position = currentTime; }); waveform?.on("ready", () => { + waveform_ready = true; if (!waveform_settings.autoplay) { waveform?.stop(); } else { @@ -341,7 +359,8 @@ on:ended={() => dispatch("stop")} on:play={() => dispatch("play")} preload="metadata" -/> +> + {#if value === null} diff --git a/js/audio/shared/types.ts b/js/audio/shared/types.ts index 0bf9f10ff0..b52b190bbb 100644 --- a/js/audio/shared/types.ts +++ b/js/audio/shared/types.ts @@ -36,6 +36,7 @@ export interface AudioProps { stream_every: number; input_ready: boolean; minimal?: boolean; + playback_position: number; } export interface AudioEvents { diff --git a/js/audio/static/StaticAudio.svelte b/js/audio/static/StaticAudio.svelte index 7e6304c3a3..249ad8d7f2 100644 --- a/js/audio/static/StaticAudio.svelte +++ b/js/audio/static/StaticAudio.svelte @@ -30,6 +30,7 @@ export let loop: boolean; export let display_icon_button_wrapper_top_corner = false; export let minimal = false; + export let playback_position = 0; const dispatch = createEventDispatcher<{ change: FileData; @@ -90,6 +91,7 @@ {waveform_options} {editable} {loop} + bind:playback_position on:pause on:play on:stop diff --git a/js/spa/test/playback_position.spec.ts b/js/spa/test/playback_position.spec.ts new file mode 100644 index 0000000000..f7fdca8566 --- /dev/null +++ b/js/spa/test/playback_position.spec.ts @@ -0,0 +1,38 @@ +import { test, expect } from "@self/tootils"; + +test("Audio playback position is retrieved correctly and updates as audio plays.", async ({ + page +}) => { + await page.getByRole("tab", { name: "Audio" }).click(); + await page.waitForSelector('[data-testid="waveform-Audio"] svg'); + await page + .getByRole("button", { name: "Get Audio Playback Position" }) + .click(); + + const initialPositionBox = page.getByLabel( + "Current Audio Position (seconds)" + ); + await expect(initialPositionBox).not.toHaveValue("0"); + + const initialPosition = await initialPositionBox.inputValue(); + expect(parseFloat(initialPosition)).toBeGreaterThanOrEqual(1.5); + expect(parseFloat(initialPosition)).toBeLessThanOrEqual(2.5); + + await page + .getByTestId("waveform-Audio") + .getByLabel("Play", { exact: true }) + .click(); + await page.waitForTimeout(2000); + + await page + .getByRole("button", { name: "Get Audio Playback Position" }) + .click(); + await expect(initialPositionBox).not.toHaveValue(initialPosition); + + const updatedPosition = await page + .getByLabel("Current Audio Position (seconds)") + .inputValue(); + expect(parseFloat(updatedPosition)).toBeGreaterThan( + parseFloat(initialPosition) + ); +}); diff --git a/js/video/Index.svelte b/js/video/Index.svelte index 2ff533f718..aaa3b6b2f8 100644 --- a/js/video/Index.svelte +++ b/js/video/Index.svelte @@ -104,6 +104,7 @@ show_download_button={(gradio.props.buttons || ["download"]).includes( "download" )} + bind:playback_position={gradio.props.playback_position} on:play={() => gradio.dispatch("play")} on:pause={() => gradio.dispatch("pause")} on:stop={() => gradio.dispatch("stop")} @@ -158,6 +159,7 @@ root={gradio.shared.root} loop={gradio.props.loop} {handle_reset_value} + bind:playback_position={gradio.props.playback_position} on:clear={() => { gradio.props.value = null; gradio.dispatch("clear"); diff --git a/js/video/shared/InteractiveVideo.svelte b/js/video/shared/InteractiveVideo.svelte index cd10a85689..bb2ce315e1 100644 --- a/js/video/shared/InteractiveVideo.svelte +++ b/js/video/shared/InteractiveVideo.svelte @@ -34,6 +34,7 @@ export let loop: boolean; export let uploading = false; export let upload_promise: Promise | null = null; + export let playback_position = 0; let has_change_history = false; @@ -139,6 +140,7 @@ {show_download_button} {handle_clear} {has_change_history} + bind:playback_position /> {/key} {:else if value.size} diff --git a/js/video/shared/Player.svelte b/js/video/shared/Player.svelte index f89df4ab78..2f4a5f51db 100644 --- a/js/video/shared/Player.svelte +++ b/js/video/shared/Player.svelte @@ -25,6 +25,7 @@ export let value: FileData | null = null; export let handle_clear: () => void = () => {}; export let has_change_history = false; + export let playback_position = 0; const dispatch = createEventDispatcher<{ play: undefined; @@ -100,6 +101,10 @@ $: time = time || 0; $: duration = duration || 0; + $: playback_position = time; + $: if (playback_position !== time && video) { + video.currentTime = playback_position; + }
diff --git a/js/video/shared/VideoPreview.svelte b/js/video/shared/VideoPreview.svelte index 1052a1c9b1..9a2facbe4c 100644 --- a/js/video/shared/VideoPreview.svelte +++ b/js/video/shared/VideoPreview.svelte @@ -26,6 +26,7 @@ export let i18n: I18nFormatter; export let upload: Client["upload"]; export let display_icon_button_wrapper_top_corner = false; + export let playback_position = 0; let old_value: FileData | null = null; let old_subtitle: FileData | null = null; @@ -84,6 +85,7 @@ interactive={false} {upload} {i18n} + bind:playback_position /> {/key}
diff --git a/js/video/types.ts b/js/video/types.ts index 849d7d1457..995386ceb1 100644 --- a/js/video/types.ts +++ b/js/video/types.ts @@ -18,6 +18,7 @@ export interface VideoProps { loop: boolean; webcam_constraints: object; subtitles: FileData | null; + playback_position: number; } export interface VideoEvents { diff --git a/test/components/test_audio.py b/test/components/test_audio.py index c8993791c5..6474c8647f 100644 --- a/test/components/test_audio.py +++ b/test/components/test_audio.py @@ -50,6 +50,7 @@ async def test_component_functions(self, gradio_temp_dir, media_data): "elem_id": None, "elem_classes": [], "visible": True, + "playback_position": 0, "value": None, "interactive": None, "proxy_url": None, @@ -106,6 +107,7 @@ async def test_component_functions(self, gradio_temp_dir, media_data): "elem_id": None, "elem_classes": [], "visible": True, + "playback_position": 0, "value": None, "interactive": None, "proxy_url": None, diff --git a/test/components/test_video.py b/test/components/test_video.py index cfbf4506c1..ca78f4eea1 100644 --- a/test/components/test_video.py +++ b/test/components/test_video.py @@ -52,6 +52,7 @@ async def test_component_functions(self, media_data): "elem_id": None, "elem_classes": [], "visible": True, + "playback_position": 0, "value": None, "interactive": None, "proxy_url": None,