Skip to content

Commit 56f7579

Browse files
JoshClark-gitjca351afourney
authored
FIX YouTube transcript errors (#1241)
* FIX YouTube transcript errors * Fixed formatting. --------- Co-authored-by: Josh <[email protected]> Co-authored-by: afourney <[email protected]>
1 parent cb421cf commit 56f7579

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

packages/markitdown/src/markitdown/converters/_youtube_converter.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,14 @@ def convert(
151151
params = parse_qs(parsed_url.query) # type: ignore
152152
if "v" in params and params["v"][0]:
153153
video_id = str(params["v"][0])
154+
transcript_list = ytt_api.list(video_id)
155+
languages = ["en"]
156+
for transcript in transcript_list:
157+
languages.append(transcript.language_code)
158+
break
154159
try:
155160
youtube_transcript_languages = kwargs.get(
156-
"youtube_transcript_languages", ("en",)
161+
"youtube_transcript_languages", languages
157162
)
158163
# Retry the transcript fetching operation
159164
transcript = self._retry_operation(
@@ -163,12 +168,23 @@ def convert(
163168
retries=3, # Retry 3 times
164169
delay=2, # 2 seconds delay between retries
165170
)
171+
166172
if transcript:
167173
transcript_text = " ".join(
168174
[part.text for part in transcript]
169175
) # type: ignore
170176
except Exception as e:
171-
print(f"Error fetching transcript: {e}")
177+
# No transcript available
178+
if len(languages) == 1:
179+
print(f"Error fetching transcript: {e}")
180+
else:
181+
# Translate transcript into first kwarg
182+
transcript = (
183+
transcript_list.find_transcript(languages)
184+
.translate(youtube_transcript_languages[0])
185+
.fetch()
186+
)
187+
transcript_text = " ".join([part.text for part in transcript])
172188
if transcript_text:
173189
webpage_text += f"\n### Transcript\n{transcript_text}\n"
174190

0 commit comments

Comments
 (0)