Skip to content

Commit 7829aef

Browse files
authored
Merge pull request #387 from ShaneIsrael/copilot/investigate-file-corruption-issue
Fix transcoding corruption handling: distinguish corruption vs encoder failures, add AV1 leniency
2 parents 1cf3611 + 9419a78 commit 7829aef

2 files changed

Lines changed: 75 additions & 15 deletions

File tree

app/server/fireshare/cli.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,13 @@ def transcode_videos(regenerate, video):
548548
logger.info(f"Transcoding {vi.video_id} to 1080p")
549549
# Pass None for timeout to use smart calculation, or pass base_timeout if needed
550550
timeout = None # Uses smart calculation based on video duration
551-
success = util.transcode_video_quality(video_path, transcode_1080p_path, 1080, use_gpu, timeout)
551+
success, failure_reason = util.transcode_video_quality(video_path, transcode_1080p_path, 1080, use_gpu, timeout)
552552
if success:
553553
vi.has_1080p = True
554554
db.session.add(vi)
555555
db.session.commit()
556+
elif failure_reason == 'corruption':
557+
logger.warning(f"Skipping video {vi.video_id} 1080p transcode - source file appears corrupt")
556558
else:
557559
logger.warning(f"Skipping video {vi.video_id} 1080p transcode - all encoders failed")
558560
elif transcode_1080p_path.exists():
@@ -567,11 +569,13 @@ def transcode_videos(regenerate, video):
567569
logger.info(f"Transcoding {vi.video_id} to 720p")
568570
# Pass None for timeout to use smart calculation, or pass base_timeout if needed
569571
timeout = None # Uses smart calculation based on video duration
570-
success = util.transcode_video_quality(video_path, transcode_720p_path, 720, use_gpu, timeout)
572+
success, failure_reason = util.transcode_video_quality(video_path, transcode_720p_path, 720, use_gpu, timeout)
571573
if success:
572574
vi.has_720p = True
573575
db.session.add(vi)
574576
db.session.commit()
577+
elif failure_reason == 'corruption':
578+
logger.warning(f"Skipping video {vi.video_id} 720p transcode - source file appears corrupt")
575579
else:
576580
logger.warning(f"Skipping video {vi.video_id} 720p transcode - all encoders failed")
577581
elif transcode_720p_path.exists():

app/server/fireshare/util.py

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,29 @@
2222
"Could not find codec parameters",
2323
]
2424

25+
# Corruption indicators that are known false positives for AV1 files
26+
# These warnings can occur during initial frame decoding of valid AV1 files
27+
# and should be ignored if the decode test succeeds (returncode 0)
28+
# Note: Values are lowercase for consistent case-insensitive matching
29+
AV1_FALSE_POSITIVE_INDICATORS = frozenset([
30+
"corrupt frame detected",
31+
"no sequence header",
32+
"error submitting packet to decoder",
33+
"decode error rate",
34+
"invalid nal unit size",
35+
"non-existing pps",
36+
])
37+
38+
# Known AV1 codec names as reported by ffprobe (lowercase for matching)
39+
# These are used to detect AV1-encoded source files for special handling
40+
AV1_CODEC_NAMES = frozenset([
41+
'av1',
42+
'libaom-av1',
43+
'libsvtav1',
44+
'av1_nvenc',
45+
'av1_qsv',
46+
])
47+
2548
def lock_exists(path: Path):
2649
"""
2750
Checks if a lockfile currently exists
@@ -93,6 +116,9 @@ def validate_video_file(path, timeout=30):
93116
This function performs a quick decode test on the first few seconds of the video
94117
to detect corruption issues like missing sequence headers, corrupt frames, etc.
95118
119+
For AV1 files, validation is more lenient as some AV1 encoders produce files
120+
that generate warnings during initial frame decoding but play back correctly.
121+
96122
Args:
97123
path: Path to the video file
98124
timeout: Maximum time in seconds to wait for validation (default: 30)
@@ -124,13 +150,24 @@ def validate_video_file(path, timeout=30):
124150
return False, f"ffprobe failed: {error_msg}"
125151

126152
# Check if we got valid stream data
153+
# Note: -select_streams v:0 in probe_cmd ensures only video streams are returned
127154
try:
128155
probe_data = json.loads(probe_result.stdout)
129-
if not probe_data.get('streams') or len(probe_data['streams']) == 0:
156+
streams = probe_data.get('streams', [])
157+
if not streams:
130158
return False, "No video streams found in file"
131159
except json.JSONDecodeError:
132160
return False, "Failed to parse video metadata"
133161

162+
# Get the codec name from the video stream
163+
# Safe to access streams[0] because we checked for empty streams above
164+
video_stream = streams[0]
165+
codec_name = video_stream.get('codec_name', '').lower()
166+
167+
# Detect if the source file is AV1-encoded
168+
# AV1 files may produce false positive corruption warnings during initial frame decoding
169+
is_av1_source = codec_name in AV1_CODEC_NAMES
170+
134171
# Now perform a quick decode test by decoding the first 2 seconds
135172
# This catches issues like "No sequence header" or "Corrupt frame detected"
136173
decode_cmd = [
@@ -145,20 +182,36 @@ def validate_video_file(path, timeout=30):
145182
# Check for decode errors - only treat as error if return code is non-zero
146183
# or if stderr contains known corruption indicators
147184
stderr = decode_result.stderr.strip() if decode_result.stderr else ""
185+
stderr_lower = stderr.lower()
148186

149187
if decode_result.returncode != 0:
150188
# Decode failed - check for specific corruption indicators
151189
for indicator in VIDEO_CORRUPTION_INDICATORS:
152-
if indicator.lower() in stderr.lower():
190+
if indicator.lower() in stderr_lower:
153191
return False, f"Video file appears to be corrupt: {indicator}"
154192
# Generic decode failure
155193
return False, f"Decode test failed: {stderr[:200] if stderr else 'Unknown error'}"
156194

157-
# Return code is 0, but check for corruption indicators in warnings
158-
# These are serious enough to indicate corruption even if ffmpeg "succeeded"
159-
for indicator in VIDEO_CORRUPTION_INDICATORS:
160-
if indicator.lower() in stderr.lower():
161-
return False, f"Video file appears to be corrupt: {indicator}"
195+
# Return code is 0 (success), but check for corruption indicators in warnings
196+
# For AV1 files, be more lenient - if ffmpeg succeeded (returncode 0),
197+
# don't fail on warnings that are known false positives for AV1
198+
if is_av1_source:
199+
# For AV1 files, only fail on indicators that are NOT known false positives
200+
# Check each corruption indicator, but skip known false positives for AV1
201+
for indicator in VIDEO_CORRUPTION_INDICATORS:
202+
indicator_lower = indicator.lower()
203+
if indicator_lower in AV1_FALSE_POSITIVE_INDICATORS:
204+
continue # Skip known false positives for AV1
205+
if indicator_lower in stderr_lower:
206+
return False, f"Video file appears to be corrupt: {indicator}"
207+
# Log a debug message if we're ignoring warnings for AV1
208+
if stderr:
209+
logger.debug(f"AV1 file had warnings during validation (ignoring since decode succeeded): {stderr[:100]}")
210+
else:
211+
# For non-AV1 files, check all corruption indicators as before
212+
for indicator in VIDEO_CORRUPTION_INDICATORS:
213+
if indicator.lower() in stderr_lower:
214+
return False, f"Video file appears to be corrupt: {indicator}"
162215

163216
return True, None
164217

@@ -435,7 +488,10 @@ def transcode_video_quality(video_path, out_path, height, use_gpu=False, timeout
435488
timeout_seconds: Maximum time allowed for encoding (default: calculated based on video duration)
436489
437490
Returns:
438-
bool: True if transcoding succeeded, False if all encoders failed
491+
tuple: (success: bool, failure_reason: str or None)
492+
- (True, None) if transcoding succeeded
493+
- (False, 'corruption') if source file appears corrupt
494+
- (False, 'encoders') if all encoders failed
439495
"""
440496
global _working_encoder_cache
441497
s = time.time()
@@ -446,7 +502,7 @@ def transcode_video_quality(video_path, out_path, height, use_gpu=False, timeout
446502
if not is_valid:
447503
logger.error(f"Source video validation failed: {error_msg}")
448504
logger.warning("Skipping transcoding for this video due to file corruption or read errors")
449-
return False
505+
return (False, 'corruption')
450506

451507
# Calculate smart timeout based on video duration if not provided
452508
if timeout_seconds is None:
@@ -475,7 +531,7 @@ def transcode_video_quality(video_path, out_path, height, use_gpu=False, timeout
475531
if result.returncode == 0:
476532
e = time.time()
477533
logger.info(f'Transcoded {str(out_path)} to {height}p in {e-s:.2f}s')
478-
return True
534+
return (True, None)
479535
else:
480536
# Cached encoder failed - clear cache and fall through to try all encoders
481537
logger.warning(f"Cached encoder {encoder['name']} failed with exit code {result.returncode}")
@@ -605,7 +661,7 @@ def transcode_video_quality(video_path, out_path, height, use_gpu=False, timeout
605661
_working_encoder_cache[mode] = encoder
606662
e = time.time()
607663
logger.info(f'Transcoded {str(out_path)} to {height}p in {e-s:.2f}s')
608-
return True
664+
return (True, None)
609665
else:
610666
logger.warning(f"✗ {encoder['name']} failed with exit code {result.returncode}")
611667
last_exception = Exception(f"Transcode failed with exit code {result.returncode}")
@@ -643,9 +699,9 @@ def transcode_video_quality(video_path, out_path, height, use_gpu=False, timeout
643699
if last_exception:
644700
logger.error(f"Last error was: {last_exception}")
645701

646-
# Return False to indicate failure instead of raising exception
702+
# Return failure with 'encoders' reason to indicate encoder failure (not corruption)
647703
# This allows the calling code to continue processing other videos
648-
return False
704+
return (False, 'encoders')
649705

650706
def create_boomerang_preview(video_path, out_path, clip_duration=1.5):
651707
# https://stackoverflow.com/questions/65874316/trim-a-video-and-add-the-boomerang-effect-on-it-with-ffmpeg

0 commit comments

Comments
 (0)