|
1 | | -import warnings |
2 | 1 | from typing import Optional, Iterable |
3 | 2 |
|
4 | 3 | from requests import Session |
5 | 4 | from requests.adapters import HTTPAdapter |
6 | 5 | from urllib3 import Retry |
7 | 6 |
|
8 | | -from .proxies import ProxyConfig, GenericProxyConfig |
| 7 | +from .proxies import ProxyConfig |
9 | 8 |
|
10 | 9 | from ._transcripts import TranscriptListFetcher, FetchedTranscript, TranscriptList |
11 | 10 |
|
@@ -126,169 +125,3 @@ def list( |
126 | 125 | Make sure that this is the actual ID, NOT the full URL to the video! |
127 | 126 | """ |
128 | 127 | return self._fetcher.fetch(video_id) |
129 | | - |
130 | | - @classmethod |
131 | | - def list_transcripts(cls, video_id, proxies=None): |
132 | | - """ |
133 | | - DEPRECATED: use the `list` method instead! |
134 | | -
|
135 | | - Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object |
136 | | - which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating |
137 | | - over the `TranscriptList` the individual transcripts are represented by `Transcript` objects, which provide |
138 | | - metadata and can either be fetched by calling `transcript.fetch()` or translated by calling |
139 | | - `transcript.translate('en')`. Example: |
140 | | -
|
141 | | - # retrieve the available transcripts |
142 | | - transcript_list = YouTubeTranscriptApi.list_transcripts('video_id') |
143 | | -
|
144 | | - # iterate over all available transcripts |
145 | | - for transcript in transcript_list: |
146 | | - # the Transcript object provides metadata properties |
147 | | - print( |
148 | | - transcript.video_id, |
149 | | - transcript.language, |
150 | | - transcript.language_code, |
151 | | - # whether it has been manually created or generated by YouTube |
152 | | - transcript.is_generated, |
153 | | - # a list of languages the transcript can be translated to |
154 | | - transcript.translation_languages, |
155 | | - ) |
156 | | -
|
157 | | - # fetch the actual transcript data |
158 | | - print(transcript.fetch()) |
159 | | -
|
160 | | - # translating the transcript will return another transcript object |
161 | | - print(transcript.translate('en').fetch()) |
162 | | -
|
163 | | - # you can also directly filter for the language you are looking for, using the transcript list |
164 | | - transcript = transcript_list.find_transcript(['de', 'en']) |
165 | | -
|
166 | | - # or just filter for manually created transcripts |
167 | | - transcript = transcript_list.find_manually_created_transcript(['de', 'en']) |
168 | | -
|
169 | | - # or automatically generated ones |
170 | | - transcript = transcript_list.find_generated_transcript(['de', 'en']) |
171 | | -
|
172 | | - :param video_id: the youtube video id |
173 | | - :type video_id: str |
174 | | - :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |
175 | | - :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |
176 | | - :return: the list of available transcripts |
177 | | - :rtype TranscriptList: |
178 | | - """ |
179 | | - warnings.warn( |
180 | | - "`list_transcripts` is deprecated and will be removed in a future version. " |
181 | | - "Use the `list` method instead!", |
182 | | - DeprecationWarning, |
183 | | - ) |
184 | | - |
185 | | - proxy_config = None |
186 | | - if proxies: |
187 | | - if isinstance(proxies, ProxyConfig): |
188 | | - proxy_config = proxies |
189 | | - else: |
190 | | - proxy_config = GenericProxyConfig( |
191 | | - http_url=proxies.get("http"), https_url=proxies.get("https") |
192 | | - ) |
193 | | - |
194 | | - ytt_api = YouTubeTranscriptApi( |
195 | | - proxy_config=proxy_config, |
196 | | - ) |
197 | | - return ytt_api.list(video_id) |
198 | | - |
199 | | - @classmethod |
200 | | - def get_transcripts( |
201 | | - cls, |
202 | | - video_ids, |
203 | | - languages=("en",), |
204 | | - continue_after_error=False, |
205 | | - proxies=None, |
206 | | - preserve_formatting=False, |
207 | | - ): |
208 | | - """ |
209 | | - DEPRECATED: use the `fetch` method instead! |
210 | | -
|
211 | | - Retrieves the transcripts for a list of videos. |
212 | | -
|
213 | | - :param video_ids: a list of youtube video ids |
214 | | - :type video_ids: list[str] |
215 | | - :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] |
216 | | - it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to |
217 | | - do so. |
218 | | - :type languages: list[str] |
219 | | - :param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving |
220 | | - one of the video transcripts |
221 | | - :type continue_after_error: bool |
222 | | - :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |
223 | | - :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |
224 | | - :param preserve_formatting: whether to keep select HTML text formatting |
225 | | - :type preserve_formatting: bool |
226 | | - :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of |
227 | | - video ids, which could not be retrieved |
228 | | - :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): |
229 | | - """ |
230 | | - warnings.warn( |
231 | | - "`get_transcripts` is deprecated and will be removed in a future version. " |
232 | | - "Use the `fetch` method instead!", |
233 | | - DeprecationWarning, |
234 | | - ) |
235 | | - |
236 | | - assert isinstance(video_ids, list), "`video_ids` must be a list of strings" |
237 | | - |
238 | | - data = {} |
239 | | - unretrievable_videos = [] |
240 | | - |
241 | | - for video_id in video_ids: |
242 | | - try: |
243 | | - data[video_id] = cls.get_transcript( |
244 | | - video_id, languages, proxies, preserve_formatting |
245 | | - ) |
246 | | - except Exception as exception: |
247 | | - if not continue_after_error: |
248 | | - raise exception |
249 | | - |
250 | | - unretrievable_videos.append(video_id) |
251 | | - |
252 | | - return data, unretrievable_videos |
253 | | - |
254 | | - @classmethod |
255 | | - def get_transcript( |
256 | | - cls, |
257 | | - video_id, |
258 | | - languages=("en",), |
259 | | - proxies=None, |
260 | | - preserve_formatting=False, |
261 | | - ): |
262 | | - """ |
263 | | - DEPRECATED: use the `fetch` method instead! |
264 | | -
|
265 | | - Retrieves the transcript for a single video. This is just a shortcut for calling:: |
266 | | -
|
267 | | - YouTubeTranscriptApi.list_transcripts(video_id, proxies).find_transcript(languages).fetch() |
268 | | -
|
269 | | - :param video_id: the youtube video id |
270 | | - :type video_id: str |
271 | | - :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] |
272 | | - it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to |
273 | | - do so. |
274 | | - :type languages: list[str] |
275 | | - :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |
276 | | - :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |
277 | | - :param preserve_formatting: whether to keep select HTML text formatting |
278 | | - :type preserve_formatting: bool |
279 | | - :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys |
280 | | - :rtype [{'text': str, 'start': float, 'end': float}]: |
281 | | - """ |
282 | | - warnings.warn( |
283 | | - "`get_transcript` is deprecated and will be removed in a future version. " |
284 | | - "Use the `fetch` method instead!", |
285 | | - DeprecationWarning, |
286 | | - ) |
287 | | - |
288 | | - assert isinstance(video_id, str), "`video_id` must be a string" |
289 | | - return ( |
290 | | - cls.list_transcripts(video_id, proxies) |
291 | | - .find_transcript(languages) |
292 | | - .fetch(preserve_formatting=preserve_formatting) |
293 | | - .to_raw_data() |
294 | | - ) |
0 commit comments