Skip to content

Commit a7c6c8e

Browse files
jkuriclaude
andcommitted
feat(captions): add speech-to-text captions using WhisperKit
Add on-device transcription with WhisperKit for generating captions from mic or system audio. Includes model management (base/small/medium/large), language selection, caption styling (font size/weight/position/colors/ background opacity), real-time preview overlay, burned-in export via CoreText rendering, and SRT/VTT sidecar export. Short segments are auto-merged for natural subtitle display. Captions tab is hidden on Intel Macs since WhisperKit requires Apple Silicon. Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent d44e7d4 commit a7c6c8e

21 files changed

Lines changed: 1712 additions & 8 deletions

Reframed.xcodeproj/project.pbxproj

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
0255AD69477B42D2B028628B /* CaptureTarget.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3CFD921258364EA6A604537F /* CaptureTarget.swift */; };
1111
3DCA03E12F3B511C003FE309 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 3DCA03E02F3B511C003FE309 /* Assets.xcassets */; };
1212
3DFD8D482F4E1CA100B8BB8F /* RNNoise in Frameworks */ = {isa = PBXBuildFile; productRef = E10000000000000000000006 /* RNNoise */; };
13+
E10000000000000000000009 /* WhisperKit in Frameworks */ = {isa = PBXBuildFile; productRef = E10000000000000000000008 /* WhisperKit */; };
1314
A10000000000000000000001 /* ReframedApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000000000000000000001 /* ReframedApp.swift */; };
1415
A10000000000000000000002 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000000000000000000002 /* AppDelegate.swift */; };
1516
A10000000000000000000003 /* Permissions.swift in Sources */ = {isa = PBXBuildFile; fileRef = B10000000000000000000003 /* Permissions.swift */; };
@@ -176,6 +177,14 @@
176177
CD88FB9E14F75D2DA0513D3C /* CameraVideoCompositor+Cursor.swift in Sources */ = {isa = PBXBuildFile; fileRef = BDF80A89DF90D50CD417CEE5 /* CameraVideoCompositor+Cursor.swift */; };
177178
CAB001000000000000000001 /* CameraBackgroundStyle.swift in Sources */ = {isa = PBXBuildFile; fileRef = CAB002000000000000000001 /* CameraBackgroundStyle.swift */; };
178179
CAB001000000000000000002 /* PersonSegmentationProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = CAB002000000000000000002 /* PersonSegmentationProcessor.swift */; };
180+
CA10000000000000000000A1 /* WhisperModelManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A1 /* WhisperModelManager.swift */; };
181+
CA10000000000000000000A2 /* TranscriptionService.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A2 /* TranscriptionService.swift */; };
182+
CA10000000000000000000A3 /* SubtitleExporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A3 /* SubtitleExporter.swift */; };
183+
CA10000000000000000000A4 /* EditorState+Captions.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A4 /* EditorState+Captions.swift */; };
184+
CA10000000000000000000A5 /* PropertiesPanel+CaptionsTab.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A5 /* PropertiesPanel+CaptionsTab.swift */; };
185+
CA10000000000000000000A6 /* CaptionOverlayView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A6 /* CaptionOverlayView.swift */; };
186+
CA10000000000000000000A7 /* CaptionSegmentRow.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A7 /* CaptionSegmentRow.swift */; };
187+
CA10000000000000000000A8 /* CameraVideoCompositor+Captions.swift in Sources */ = {isa = PBXBuildFile; fileRef = CA20000000000000000000A8 /* CameraVideoCompositor+Captions.swift */; };
179188
/* End PBXBuildFile section */
180189

181190
/* Begin PBXFileReference section */
@@ -349,13 +358,22 @@
349358
BDF80A89DF90D50CD417CEE5 /* CameraVideoCompositor+Cursor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "CameraVideoCompositor+Cursor.swift"; sourceTree = "<group>"; };
350359
CAB002000000000000000001 /* CameraBackgroundStyle.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CameraBackgroundStyle.swift; sourceTree = "<group>"; };
351360
CAB002000000000000000002 /* PersonSegmentationProcessor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PersonSegmentationProcessor.swift; sourceTree = "<group>"; };
361+
CA20000000000000000000A1 /* WhisperModelManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperModelManager.swift; sourceTree = "<group>"; };
362+
CA20000000000000000000A2 /* TranscriptionService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TranscriptionService.swift; sourceTree = "<group>"; };
363+
CA20000000000000000000A3 /* SubtitleExporter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SubtitleExporter.swift; sourceTree = "<group>"; };
364+
CA20000000000000000000A4 /* EditorState+Captions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "EditorState+Captions.swift"; sourceTree = "<group>"; };
365+
CA20000000000000000000A5 /* PropertiesPanel+CaptionsTab.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "PropertiesPanel+CaptionsTab.swift"; sourceTree = "<group>"; };
366+
CA20000000000000000000A6 /* CaptionOverlayView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CaptionOverlayView.swift; sourceTree = "<group>"; };
367+
CA20000000000000000000A7 /* CaptionSegmentRow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CaptionSegmentRow.swift; sourceTree = "<group>"; };
368+
CA20000000000000000000A8 /* CameraVideoCompositor+Captions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "CameraVideoCompositor+Captions.swift"; sourceTree = "<group>"; };
352369
/* End PBXFileReference section */
353370

354371
/* Begin PBXFrameworksBuildPhase section */
355372
C10000000000000000000001 /* Frameworks */ = {
356373
isa = PBXFrameworksBuildPhase;
357374
files = (
358375
3DFD8D482F4E1CA100B8BB8F /* RNNoise in Frameworks */,
376+
E10000000000000000000009 /* WhisperKit in Frameworks */,
359377
A10000000000000000000017 /* ScreenCaptureKit.framework in Frameworks */,
360378
A10000000000000000000018 /* AVFoundation.framework in Frameworks */,
361379
A10000000000000000000019 /* Logging in Frameworks */,
@@ -473,6 +491,9 @@
473491
DA000000000000000000001D /* RNNoiseProcessor.swift */,
474492
B1000000000000000000003E /* KeyboardShortcut.swift */,
475493
B1000000000000000000004C /* UpdateChecker.swift */,
494+
CA20000000000000000000A1 /* WhisperModelManager.swift */,
495+
CA20000000000000000000A2 /* TranscriptionService.swift */,
496+
CA20000000000000000000A3 /* SubtitleExporter.swift */,
476497
);
477498
path = Utilities;
478499
sourceTree = "<group>";
@@ -532,6 +553,8 @@
532553
B1000000000000000000004B /* SettingsAboutTab.swift */,
533554
B1000000000000000000004D /* PrimaryButton.swift */,
534555
B1000000000000000000004E /* SelectButton.swift */,
556+
CA20000000000000000000A6 /* CaptionOverlayView.swift */,
557+
CA20000000000000000000A7 /* CaptionSegmentRow.swift */,
535558
);
536559
path = UI;
537560
sourceTree = "<group>";
@@ -643,6 +666,8 @@
643666
B17A303B9C60827322EAE0A9 /* VideoPreviewContainer+Cursor.swift */,
644667
5F3469C70747A78B7BDD2B5C /* VideoPreviewContainer+Interaction.swift */,
645668
BC84C35E4DD49FCEA9DE6CBB /* CursorRenderer+Shapes.swift */,
669+
CA20000000000000000000A4 /* EditorState+Captions.swift */,
670+
CA20000000000000000000A5 /* PropertiesPanel+CaptionsTab.swift */,
646671
);
647672
path = Editor;
648673
sourceTree = "<group>";
@@ -678,6 +703,7 @@
678703
BDF80A89DF90D50CD417CEE5 /* CameraVideoCompositor+Cursor.swift */,
679704
CAB002000000000000000001 /* CameraBackgroundStyle.swift */,
680705
CAB002000000000000000002 /* PersonSegmentationProcessor.swift */,
706+
CA20000000000000000000A8 /* CameraVideoCompositor+Captions.swift */,
681707
);
682708
path = Compositor;
683709
sourceTree = "<group>";
@@ -700,6 +726,7 @@
700726
E10000000000000000000001 /* Logging */,
701727
E10000000000000000000002 /* MenuBarExtraAccess */,
702728
E10000000000000000000006 /* RNNoise */,
729+
E10000000000000000000008 /* WhisperKit */,
703730
);
704731
productName = Reframed;
705732
productReference = B1000000000000000000001B /* Reframed.app */;
@@ -732,6 +759,7 @@
732759
E10000000000000000000003 /* XCRemoteSwiftPackageReference "swift-log" */,
733760
E10000000000000000000004 /* XCRemoteSwiftPackageReference "MenuBarExtraAccess" */,
734761
E10000000000000000000005 /* XCRemoteSwiftPackageReference "rnnoise-spm" */,
762+
E10000000000000000000007 /* XCRemoteSwiftPackageReference "WhisperKit" */,
735763
);
736764
preferredProjectObjectVersion = 90;
737765
productRefGroup = D1000000000000000000000B /* Products */;
@@ -920,6 +948,14 @@
920948
CD88FB9E14F75D2DA0513D3C /* CameraVideoCompositor+Cursor.swift in Sources */,
921949
CAB001000000000000000001 /* CameraBackgroundStyle.swift in Sources */,
922950
CAB001000000000000000002 /* PersonSegmentationProcessor.swift in Sources */,
951+
CA10000000000000000000A1 /* WhisperModelManager.swift in Sources */,
952+
CA10000000000000000000A2 /* TranscriptionService.swift in Sources */,
953+
CA10000000000000000000A3 /* SubtitleExporter.swift in Sources */,
954+
CA10000000000000000000A4 /* EditorState+Captions.swift in Sources */,
955+
CA10000000000000000000A5 /* PropertiesPanel+CaptionsTab.swift in Sources */,
956+
CA10000000000000000000A6 /* CaptionOverlayView.swift in Sources */,
957+
CA10000000000000000000A7 /* CaptionSegmentRow.swift in Sources */,
958+
CA10000000000000000000A8 /* CameraVideoCompositor+Captions.swift in Sources */,
923959
);
924960
};
925961
/* End PBXSourcesBuildPhase section */
@@ -1183,6 +1219,14 @@
11831219
minimumVersion = 1.0.0;
11841220
};
11851221
};
1222+
E10000000000000000000007 /* XCRemoteSwiftPackageReference "WhisperKit" */ = {
1223+
isa = XCRemoteSwiftPackageReference;
1224+
repositoryURL = "https://github.com/argmaxinc/WhisperKit.git";
1225+
requirement = {
1226+
kind = upToNextMajorVersion;
1227+
minimumVersion = 0.15.0;
1228+
};
1229+
};
11861230
/* End XCRemoteSwiftPackageReference section */
11871231

11881232
/* Begin XCSwiftPackageProductDependency section */
@@ -1201,6 +1245,11 @@
12011245
package = E10000000000000000000005 /* XCRemoteSwiftPackageReference "rnnoise-spm" */;
12021246
productName = RNNoise;
12031247
};
1248+
E10000000000000000000008 /* WhisperKit */ = {
1249+
isa = XCSwiftPackageProductDependency;
1250+
package = E10000000000000000000007 /* XCRemoteSwiftPackageReference "WhisperKit" */;
1251+
productName = WhisperKit;
1252+
};
12041253
/* End XCSwiftPackageProductDependency section */
12051254
};
12061255
rootObject = F10000000000000000000002 /* Project object */;

Reframed.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved

Lines changed: 73 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)