From 5aaad12436ad42950e6c74969286fb3381dcfd91 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 27 Sep 2022 16:22:30 +0100 Subject: [PATCH 01/10] Fix device selection in pre-join screen for Element Call video rooms As per https://github.com/vector-im/element-call/pull/609 --- src/MediaDeviceHandler.ts | 10 +++++++--- src/models/Call.ts | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/MediaDeviceHandler.ts b/src/MediaDeviceHandler.ts index a5160484c8f..65501013951 100644 --- a/src/MediaDeviceHandler.ts +++ b/src/MediaDeviceHandler.ts @@ -50,10 +50,14 @@ export default class MediaDeviceHandler extends EventEmitter { return devices.some(d => Boolean(d.label)); } + // Gets the available audio input/output and video input devices + // from the browser: a thin wrapper around mediaDevices.enumerateDevices() + // that also returns results by type of devices. + // Note that common browser behaviour is for enumerateDevices() to not require + // user media permission, but if you don't have user media permission, all the + // all device names will be the empty string. If you care about device names, + // call and wait for requestPermission() above. public static async getDevices(): Promise { - // Only needed for Electron atm, though should work in modern browsers - // once permission has been granted to the webapp - try { const devices = await navigator.mediaDevices.enumerateDevices(); const output = { diff --git a/src/models/Call.ts b/src/models/Call.ts index 9b11261e85d..e71cef219eb 100644 --- a/src/models/Call.ts +++ b/src/models/Call.ts @@ -762,8 +762,8 @@ export class ElementCall extends Call { ): Promise { try { await this.messaging!.transport.send(ElementWidgetActions.JoinCall, { - audioInput: audioInput?.deviceId ?? null, - videoInput: videoInput?.deviceId ?? null, + audioInput: audioInput?.label ?? null, + videoInput: videoInput?.label ?? null, }); } catch (e) { throw new Error(`Failed to join call in room ${this.roomId}: ${e}`); From 71ae9898601b2e9ab895c5003a7d633a3bb4051b Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 27 Sep 2022 16:36:43 +0100 Subject: [PATCH 02/10] Update unit test --- test/models/Call-test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/models/Call-test.ts b/test/models/Call-test.ts index fbbf22eca88..6e792e31542 100644 --- a/test/models/Call-test.ts +++ b/test/models/Call-test.ts @@ -611,8 +611,8 @@ describe("ElementCall", () => { await call.connect(); expect(call.connectionState).toBe(ConnectionState.Connected); expect(messaging.transport.send).toHaveBeenCalledWith(ElementWidgetActions.JoinCall, { - audioInput: "1", - videoInput: "2", + audioInput: "Headphones", + videoInput: "Built-in webcam" }); }); From 000db940650f343d0082fc83fd04b915079581e7 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 27 Sep 2022 16:40:17 +0100 Subject: [PATCH 03/10] Lint --- test/models/Call-test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/models/Call-test.ts b/test/models/Call-test.ts index 6e792e31542..fb2f99c77eb 100644 --- a/test/models/Call-test.ts +++ b/test/models/Call-test.ts @@ -612,7 +612,7 @@ describe("ElementCall", () => { expect(call.connectionState).toBe(ConnectionState.Connected); expect(messaging.transport.send).toHaveBeenCalledWith(ElementWidgetActions.JoinCall, { audioInput: "Headphones", - videoInput: "Built-in webcam" + videoInput: "Built-in webcam", }); }); From 92e5978405c6cb4d01c5c784a555cd76830802a8 Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 29 Sep 2022 13:20:49 +0100 Subject: [PATCH 04/10] Hold a media stream while we enumerate device so we can do so reliably. This means we can remove the device fallback labels. --- src/MediaDeviceHandler.ts | 20 ++++++---- src/components/views/voip/CallLobby.tsx | 52 ++++++++++++------------- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/MediaDeviceHandler.ts b/src/MediaDeviceHandler.ts index 65501013951..0e6d2b98bc7 100644 --- a/src/MediaDeviceHandler.ts +++ b/src/MediaDeviceHandler.ts @@ -50,13 +50,19 @@ export default class MediaDeviceHandler extends EventEmitter { return devices.some(d => Boolean(d.label)); } - // Gets the available audio input/output and video input devices - // from the browser: a thin wrapper around mediaDevices.enumerateDevices() - // that also returns results by type of devices. - // Note that common browser behaviour is for enumerateDevices() to not require - // user media permission, but if you don't have user media permission, all the - // all device names will be the empty string. If you care about device names, - // call and wait for requestPermission() above. + /** + * Gets the available audio input/output and video input devices + * from the browser: a thin wrapper around mediaDevices.enumerateDevices() + * that also returns results by type of devices. Note that this requires + * user media permissions and an active stream, otherwise you'll get blank + * device labels. + * + * Once the Permissions API + * (https://developer.mozilla.org/en-US/docs/Web/API/Permissions_API) + * is ready for primetime, it might help make this simpler. + * + * @return Promise The available media devices + */ public static async getDevices(): Promise { try { const devices = await navigator.mediaDevices.enumerateDevices(); diff --git a/src/components/views/voip/CallLobby.tsx b/src/components/views/voip/CallLobby.tsx index 39cef9e4061..7d92dd362ec 100644 --- a/src/components/views/voip/CallLobby.tsx +++ b/src/components/views/voip/CallLobby.tsx @@ -41,7 +41,6 @@ interface DeviceButtonProps { devices: MediaDeviceInfo[]; setDevice: (device: MediaDeviceInfo) => void; deviceListLabel: string; - fallbackDeviceLabel: (n: number) => string; muted: boolean; disabled: boolean; toggle: () => void; @@ -50,7 +49,7 @@ interface DeviceButtonProps { } const DeviceButton: FC = ({ - kind, devices, setDevice, deviceListLabel, fallbackDeviceLabel, muted, disabled, toggle, unmutedTitle, mutedTitle, + kind, devices, setDevice, deviceListLabel, muted, disabled, toggle, unmutedTitle, mutedTitle, }) => { const [menuDisplayed, buttonRef, openMenu, closeMenu] = useContextMenu(); let contextMenu; @@ -63,10 +62,10 @@ const DeviceButton: FC = ({ const buttonRect = buttonRef.current!.getBoundingClientRect(); contextMenu = - { devices.map((d, index) => + { devices.map((d) => selectDevice(d)} />, ) } @@ -115,25 +114,8 @@ export const CallLobby: FC = ({ room, call }) => { const participants = useParticipants(call); const videoRef = useRef(null); - const [audioInputs, videoInputs] = useAsyncMemo(async () => { - try { - const devices = await MediaDeviceHandler.getDevices(); - return [devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]]; - } catch (e) { - logger.warn(`Failed to get media device list`, e); - return [[], []]; - } - }, [], [[], []]); - const [videoInputId, setVideoInputId] = useState(() => MediaDeviceHandler.getVideoInput()); - - const setAudioInput = useCallback((device: MediaDeviceInfo) => { - MediaDeviceHandler.instance.setAudioInput(device.deviceId); - }, []); - const setVideoInput = useCallback((device: MediaDeviceInfo) => { - MediaDeviceHandler.instance.setVideoInput(device.deviceId); - setVideoInputId(device.deviceId); - }, []); + const [audioInputId] = useState(() => MediaDeviceHandler.getAudioInput()); const [audioMuted, setAudioMuted] = useState(() => MediaDeviceHandler.startWithAudioMuted); const [videoMuted, setVideoMuted] = useState(() => MediaDeviceHandler.startWithVideoMuted); @@ -147,18 +129,36 @@ export const CallLobby: FC = ({ room, call }) => { setVideoMuted(!videoMuted); }, [videoMuted, setVideoMuted]); - const videoStream = useAsyncMemo(async () => { + const [videoStream, audioInputs, videoInputs] = useAsyncMemo(async () => { if (videoInputId && !videoMuted) { try { - return await navigator.mediaDevices.getUserMedia({ + // We get the preview stream before requesting devices: this is because + // we need (in some browsers) an active media stream in order to get + // non-blank labels for the devices. According to the docs, we + // need a stream of each type (audio + video) if we want to enumerate + // audio & video devices, although this didn't seem to be the case + // in practice for me. We request both anyway. + const s = await navigator.mediaDevices.getUserMedia({ video: { deviceId: videoInputId }, + audio: { deviceId: audioInputId }, }); + + const devices = await MediaDeviceHandler.getDevices(); + return [s, devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]]; } catch (e) { logger.error(`Failed to get stream for device ${videoInputId}`, e); } } return null; - }, [videoInputId, videoMuted]); + }, [videoInputId, videoMuted], [null, [], []]); + + const setAudioInput = useCallback((device: MediaDeviceInfo) => { + MediaDeviceHandler.instance.setAudioInput(device.deviceId); + }, []); + const setVideoInput = useCallback((device: MediaDeviceInfo) => { + MediaDeviceHandler.instance.setVideoInput(device.deviceId); + setVideoInputId(device.deviceId); + }, []); useEffect(() => { if (videoStream) { @@ -213,7 +213,6 @@ export const CallLobby: FC = ({ room, call }) => { devices={audioInputs} setDevice={setAudioInput} deviceListLabel={_t("Audio devices")} - fallbackDeviceLabel={n => _t("Audio input %(n)s", { n })} muted={audioMuted} disabled={connecting} toggle={toggleAudio} @@ -225,7 +224,6 @@ export const CallLobby: FC = ({ room, call }) => { devices={videoInputs} setDevice={setVideoInput} deviceListLabel={_t("Video devices")} - fallbackDeviceLabel={n => _t("Video input %(n)s", { n })} muted={videoMuted} disabled={connecting} toggle={toggleVideo} From d087a42ed86e1d807501586f38dd5c52bbde9faf Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 29 Sep 2022 13:25:32 +0100 Subject: [PATCH 05/10] i18n --- src/i18n/strings/en_EN.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/i18n/strings/en_EN.json b/src/i18n/strings/en_EN.json index c1b25cb2ab2..c5b5de95c89 100644 --- a/src/i18n/strings/en_EN.json +++ b/src/i18n/strings/en_EN.json @@ -1047,11 +1047,9 @@ "Hint: Begin your message with // to start it with a slash.": "Hint: Begin your message with // to start it with a slash.", "Send as message": "Send as message", "Audio devices": "Audio devices", - "Audio input %(n)s": "Audio input %(n)s", "Mute microphone": "Mute microphone", "Unmute microphone": "Unmute microphone", "Video devices": "Video devices", - "Video input %(n)s": "Video input %(n)s", "Turn off camera": "Turn off camera", "Turn on camera": "Turn on camera", "Join": "Join", From 3f5ec0fe9c423895aee5b130cd8026d1d824b7e4 Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 29 Sep 2022 17:16:25 +0100 Subject: [PATCH 06/10] Remove unnecessary useState --- src/components/views/voip/CallView.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/components/views/voip/CallView.tsx b/src/components/views/voip/CallView.tsx index 1d79b00a493..02dceffd15b 100644 --- a/src/components/views/voip/CallView.tsx +++ b/src/components/views/voip/CallView.tsx @@ -119,7 +119,6 @@ export const Lobby: FC = ({ room, connect, children }) => { const videoRef = useRef(null); const [videoInputId, setVideoInputId] = useState(() => MediaDeviceHandler.getVideoInput()); - const [audioInputId] = useState(() => MediaDeviceHandler.getAudioInput()); const [audioMuted, setAudioMuted] = useState(() => MediaDeviceHandler.startWithAudioMuted); const [videoMuted, setVideoMuted] = useState(() => MediaDeviceHandler.startWithVideoMuted); @@ -144,7 +143,7 @@ export const Lobby: FC = ({ room, connect, children }) => { // in practice for me. We request both anyway. const s = await navigator.mediaDevices.getUserMedia({ video: { deviceId: videoInputId }, - audio: { deviceId: audioInputId }, + audio: { deviceId: MediaDeviceHandler.getAudioInput() }, }); const devices = await MediaDeviceHandler.getDevices(); From 69310a585d650a904931ba28aa4aec8ab99ee2b6 Mon Sep 17 00:00:00 2001 From: David Baker Date: Thu, 29 Sep 2022 17:47:43 +0100 Subject: [PATCH 07/10] Fix fetching video devices when video muted --- src/components/views/voip/CallView.tsx | 44 ++++++++++++++++---------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/components/views/voip/CallView.tsx b/src/components/views/voip/CallView.tsx index 02dceffd15b..d0b234a31db 100644 --- a/src/components/views/voip/CallView.tsx +++ b/src/components/views/voip/CallView.tsx @@ -133,24 +133,34 @@ export const Lobby: FC = ({ room, connect, children }) => { }, [videoMuted, setVideoMuted]); const [videoStream, audioInputs, videoInputs] = useAsyncMemo(async () => { - if (videoInputId && !videoMuted) { - try { - // We get the preview stream before requesting devices: this is because - // we need (in some browsers) an active media stream in order to get - // non-blank labels for the devices. According to the docs, we - // need a stream of each type (audio + video) if we want to enumerate - // audio & video devices, although this didn't seem to be the case - // in practice for me. We request both anyway. - const s = await navigator.mediaDevices.getUserMedia({ - video: { deviceId: videoInputId }, - audio: { deviceId: MediaDeviceHandler.getAudioInput() }, - }); - - const devices = await MediaDeviceHandler.getDevices(); - return [s, devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]]; - } catch (e) { - logger.error(`Failed to get stream for device ${videoInputId}`, e); + try { + // We get the preview stream before requesting devices: this is because + // we need (in some browsers) an active media stream in order to get + // non-blank labels for the devices. According to the docs, we + // need a stream of each type (audio + video) if we want to enumerate + // audio & video devices, although this didn't seem to be the case + // in practice for me. We request both anyway. + // For similar reasons, we also request a stream even if video is muted, + // which could be a bit strange but allows us to get the device list + // reliably. One option could be to try & get devices without a stream, + // then try again with a stream if we get blank deviceids, but... ew. + let s = await navigator.mediaDevices.getUserMedia({ + video: { deviceId: videoInputId }, + audio: { deviceId: MediaDeviceHandler.getAudioInput() }, + }); + + const devices = await MediaDeviceHandler.getDevices(); + + // If video is muted, we don't actually want the stream, so we can get rid of + // it now. + if (videoMuted) { + s.getTracks().forEach(t => t.stop()); + s = null; } + + return [s, devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]]; + } catch (e) { + logger.error(`Failed to get stream for device ${videoInputId}`, e); } return null; }, [videoInputId, videoMuted], [null, [], []]); From c5356d9a4660aa0464d44fb9a646e036dd744cf1 Mon Sep 17 00:00:00 2001 From: David Baker Date: Fri, 30 Sep 2022 09:52:09 +0100 Subject: [PATCH 08/10] Actually fix preview stream code --- src/components/views/voip/CallView.tsx | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/components/views/voip/CallView.tsx b/src/components/views/voip/CallView.tsx index d0b234a31db..fbab581e0f3 100644 --- a/src/components/views/voip/CallView.tsx +++ b/src/components/views/voip/CallView.tsx @@ -133,6 +133,7 @@ export const Lobby: FC = ({ room, connect, children }) => { }, [videoMuted, setVideoMuted]); const [videoStream, audioInputs, videoInputs] = useAsyncMemo(async () => { + let previewStream: MediaStream; try { // We get the preview stream before requesting devices: this is because // we need (in some browsers) an active media stream in order to get @@ -144,25 +145,24 @@ export const Lobby: FC = ({ room, connect, children }) => { // which could be a bit strange but allows us to get the device list // reliably. One option could be to try & get devices without a stream, // then try again with a stream if we get blank deviceids, but... ew. - let s = await navigator.mediaDevices.getUserMedia({ + previewStream = await navigator.mediaDevices.getUserMedia({ video: { deviceId: videoInputId }, audio: { deviceId: MediaDeviceHandler.getAudioInput() }, }); - - const devices = await MediaDeviceHandler.getDevices(); - - // If video is muted, we don't actually want the stream, so we can get rid of - // it now. - if (videoMuted) { - s.getTracks().forEach(t => t.stop()); - s = null; - } - - return [s, devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]]; } catch (e) { logger.error(`Failed to get stream for device ${videoInputId}`, e); } - return null; + + const devices = await MediaDeviceHandler.getDevices(); + + // If video is muted, we don't actually want the stream, so we can get rid of + // it now. + if (videoMuted) { + previewStream.getTracks().forEach(t => t.stop()); + previewStream = undefined; + } + + return [previewStream, devices[MediaDeviceKindEnum.AudioInput], devices[MediaDeviceKindEnum.VideoInput]]; }, [videoInputId, videoMuted], [null, [], []]); const setAudioInput = useCallback((device: MediaDeviceInfo) => { From c536ccb4a98c4de7a5fa0be399b2611efb50ecaf Mon Sep 17 00:00:00 2001 From: David Baker Date: Fri, 30 Sep 2022 13:28:08 +0100 Subject: [PATCH 09/10] Fix unit test now fallback is no longer a thing --- test/components/views/voip/CallView-test.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/components/views/voip/CallView-test.tsx b/test/components/views/voip/CallView-test.tsx index 827b4d29c15..1549aee35c8 100644 --- a/test/components/views/voip/CallView-test.tsx +++ b/test/components/views/voip/CallView-test.tsx @@ -219,7 +219,7 @@ describe("CallLobby", () => { { deviceId: "2", groupId: "1", - label: "", // Should fall back to "Audio input 2" + label: "Tailphones", kind: "audioinput", toJSON: () => {}, }, @@ -229,7 +229,7 @@ describe("CallLobby", () => { screen.getByRole("button", { name: /microphone/ }); fireEvent.click(screen.getByRole("button", { name: "Audio devices" })); screen.getByRole("menuitem", { name: "Headphones" }); - screen.getByRole("menuitem", { name: "Audio input 2" }); + screen.getByRole("menuitem", { name: "Tailphones" }); }); }); }); From 96b7323ef7e64040d6c0d536c080383586ca78f4 Mon Sep 17 00:00:00 2001 From: David Baker Date: Fri, 30 Sep 2022 16:02:51 +0100 Subject: [PATCH 10/10] Test changing devices --- test/components/views/voip/CallView-test.tsx | 78 ++++++++++++++------ test/test-utils/test-utils.ts | 6 ++ 2 files changed, 63 insertions(+), 21 deletions(-) diff --git a/test/components/views/voip/CallView-test.tsx b/test/components/views/voip/CallView-test.tsx index 1549aee35c8..24d26e5d52a 100644 --- a/test/components/views/voip/CallView-test.tsx +++ b/test/components/views/voip/CallView-test.tsx @@ -187,6 +187,35 @@ describe("CallLobby", () => { }); describe("device buttons", () => { + const fakeVideoInput1: MediaDeviceInfo = { + deviceId: "v1", + groupId: "v1", + label: "Webcam", + kind: "videoinput", + toJSON: () => {}, + }; + const fakeVideoInput2: MediaDeviceInfo = { + deviceId: "v2", + groupId: "v2", + label: "Othercam", + kind: "videoinput", + toJSON: () => {}, + }; + const fakeAudioInput1: MediaDeviceInfo = { + deviceId: "v1", + groupId: "v1", + label: "Headphones", + kind: "audioinput", + toJSON: () => {}, + }; + const fakeAudioInput2: MediaDeviceInfo = { + deviceId: "v2", + groupId: "v2", + label: "Tailphones", + kind: "audioinput", + toJSON: () => {}, + }; + it("hide when no devices are available", async () => { await renderView(); expect(screen.queryByRole("button", { name: /microphone/ })).toBe(null); @@ -194,13 +223,7 @@ describe("CallLobby", () => { }); it("show without dropdown when only one device is available", async () => { - mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([{ - deviceId: "1", - groupId: "1", - label: "Webcam", - kind: "videoinput", - toJSON: () => {}, - }]); + mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([fakeVideoInput1]); await renderView(); screen.getByRole("button", { name: /camera/ }); @@ -209,20 +232,7 @@ describe("CallLobby", () => { it("show with dropdown when multiple devices are available", async () => { mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([ - { - deviceId: "1", - groupId: "1", - label: "Headphones", - kind: "audioinput", - toJSON: () => {}, - }, - { - deviceId: "2", - groupId: "1", - label: "Tailphones", - kind: "audioinput", - toJSON: () => {}, - }, + fakeAudioInput1, fakeAudioInput2, ]); await renderView(); @@ -231,5 +241,31 @@ describe("CallLobby", () => { screen.getByRole("menuitem", { name: "Headphones" }); screen.getByRole("menuitem", { name: "Tailphones" }); }); + + it("sets video device when selected", async () => { + mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([ + fakeVideoInput1, fakeVideoInput2, + ]); + + await renderView(); + screen.getByRole("button", { name: /camera/ }); + fireEvent.click(screen.getByRole("button", { name: "Video devices" })); + fireEvent.click(screen.getByRole("menuitem", { name: fakeVideoInput2.label })); + + expect(client.getMediaHandler().setVideoInput).toHaveBeenCalledWith(fakeVideoInput2.deviceId); + }); + + it("sets audio device when selected", async () => { + mocked(navigator.mediaDevices.enumerateDevices).mockResolvedValue([ + fakeAudioInput1, fakeAudioInput2, + ]); + + await renderView(); + screen.getByRole("button", { name: /microphone/ }); + fireEvent.click(screen.getByRole("button", { name: "Audio devices" })); + fireEvent.click(screen.getByRole("menuitem", { name: fakeAudioInput2.label })); + + expect(client.getMediaHandler().setAudioInput).toHaveBeenCalledWith(fakeAudioInput2.deviceId); + }); }); }); diff --git a/test/test-utils/test-utils.ts b/test/test-utils/test-utils.ts index 67c038502f2..0647f2604df 100644 --- a/test/test-utils/test-utils.ts +++ b/test/test-utils/test-utils.ts @@ -34,6 +34,7 @@ import { } from 'matrix-js-sdk/src/matrix'; import { normalize } from "matrix-js-sdk/src/utils"; import { ReEmitter } from "matrix-js-sdk/src/ReEmitter"; +import { MediaHandler } from "matrix-js-sdk/src/webrtc/mediaHandler"; import { MatrixClientPeg as peg } from '../../src/MatrixClientPeg'; import { makeType } from "../../src/utils/TypeUtils"; @@ -175,6 +176,11 @@ export function createTestClient(): MatrixClient { sendToDevice: jest.fn().mockResolvedValue(undefined), queueToDevice: jest.fn().mockResolvedValue(undefined), encryptAndSendToDevices: jest.fn().mockResolvedValue(undefined), + + getMediaHandler: jest.fn().mockReturnValue({ + setVideoInput: jest.fn(), + setAudioInput: jest.fn(), + } as unknown as MediaHandler), } as unknown as MatrixClient; client.reEmitter = new ReEmitter(client);