Skip to content

Commit b78dc2d

Browse files
jkuriclaude
andcommitted
fix(captions): fix segment grouping, display linger, and export sync
Improve mergeShortSegments to merge bidirectionally with a 16-word cap, add 1.5s caption linger so text stays visible during speech gaps, and fix export compositor to apply trimStartSeconds offset for correct caption timing in trimmed exports. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d5750f4 commit b78dc2d

File tree

3 files changed

+60
-23
lines changed

3 files changed

+60
-23
lines changed

Reframed/Compositor/CameraVideoCompositor+Captions.swift

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ extension CameraVideoCompositor {
1111
) {
1212
guard instruction.captionsEnabled, !instruction.captionSegments.isEmpty else { return }
1313

14-
let time = CMTimeGetSeconds(compositionTime)
14+
let time = CMTimeGetSeconds(compositionTime) + instruction.trimStartSeconds
1515
guard
16-
let segment = instruction.captionSegments.first(where: {
17-
time >= $0.startSeconds && time < $0.endSeconds
18-
})
16+
let segment = captionSegmentAt(
17+
time: time,
18+
in: instruction.captionSegments
19+
)
1920
else { return }
2021

2122
let displayText = visibleText(
@@ -133,6 +134,30 @@ extension CameraVideoCompositor {
133134
context.restoreGState()
134135
}
135136

137+
static func captionSegmentAt(
138+
time: Double,
139+
in segments: [CaptionSegment]
140+
) -> CaptionSegment? {
141+
if let segment = segments.first(where: {
142+
time >= $0.startSeconds && time < $0.endSeconds
143+
}) {
144+
return segment
145+
}
146+
147+
let maxLinger = 1.5
148+
guard
149+
let previous = segments.last(where: { $0.endSeconds <= time }),
150+
time - previous.endSeconds < maxLinger
151+
else { return nil }
152+
153+
let nextStart = segments.first(where: { $0.startSeconds > time })?.startSeconds
154+
if let nextStart, time >= nextStart {
155+
return nil
156+
}
157+
158+
return previous
159+
}
160+
136161
private static func visibleText(
137162
for segment: CaptionSegment,
138163
at time: Double,

Reframed/Editor/EditorState+Captions.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ extension EditorState {
6262
}
6363

6464
func captionAtTime(_ time: Double) -> CaptionSegment? {
65-
captionSegments.first { time >= $0.startSeconds && time < $0.endSeconds }
65+
CameraVideoCompositor.captionSegmentAt(time: time, in: captionSegments)
6666
}
6767

6868
func visibleCaptionText(at time: Double) -> String? {

Reframed/Utilities/TranscriptionService.swift

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -112,31 +112,43 @@ enum TranscriptionService {
112112
guard segments.count > 1 else { return segments }
113113

114114
let minWordCount = 4
115-
let maxGap = 2.0
115+
let maxMergedWordCount = 16
116+
let maxGap = 1.5
116117

117118
var merged: [CaptionSegment] = []
118119
for segment in segments {
119-
let wordCount = segment.text.split(separator: " ").count
120-
if wordCount < minWordCount,
121-
let last = merged.last,
122-
segment.startSeconds - last.endSeconds < maxGap
123-
{
124-
let combinedText = last.text + " " + segment.text
125-
let combinedWords: [CaptionWord]? = {
126-
guard let lw = last.words, let sw = segment.words else { return last.words ?? segment.words }
127-
return lw + sw
128-
}()
129-
merged[merged.count - 1] = CaptionSegment(
130-
id: last.id,
131-
startSeconds: last.startSeconds,
132-
endSeconds: segment.endSeconds,
133-
text: combinedText,
134-
words: combinedWords
135-
)
120+
let currentWordCount = segment.text.split(separator: " ").count
121+
122+
if let lastIdx = merged.indices.last {
123+
let last = merged[lastIdx]
124+
let lastWordCount = last.text.split(separator: " ").count
125+
let gap = segment.startSeconds - last.endSeconds
126+
let canMerge =
127+
gap < maxGap && (lastWordCount + currentWordCount) <= maxMergedWordCount
128+
129+
if canMerge && (currentWordCount < minWordCount || lastWordCount < minWordCount) {
130+
let combinedText = last.text + " " + segment.text
131+
let combinedWords: [CaptionWord]? = {
132+
guard let lw = last.words, let sw = segment.words else {
133+
return last.words ?? segment.words
134+
}
135+
return lw + sw
136+
}()
137+
merged[lastIdx] = CaptionSegment(
138+
id: last.id,
139+
startSeconds: last.startSeconds,
140+
endSeconds: segment.endSeconds,
141+
text: combinedText,
142+
words: combinedWords
143+
)
144+
} else {
145+
merged.append(segment)
146+
}
136147
} else {
137148
merged.append(segment)
138149
}
139150
}
151+
140152
return merged
141153
}
142154

0 commit comments

Comments
 (0)