Skip to content

Commit 67aa5b7

Browse files
committed
added specific error messages when RequestBlocked is raised, despite using a proxy
1 parent bdf96f6 commit 67aa5b7

4 files changed

Lines changed: 93 additions & 15 deletions

File tree

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,8 @@ therefore integrated it into this module, to make setting it up as easy as possi
284284

285285
Once you have created a [Webshare account](https://www.webshare.io/?referral_code=w0xno53eb50g) and purchased a
286286
"Residential" proxy package that suits your workload (make sure NOT to purchase "Proxy Server" or
287-
"Static Residential"!), open the [Webshare Proxy Settings](https://dashboard.webshare.io/proxy/settings) to retrieve
287+
"Static Residential"!), open the
288+
[Webshare Proxy Settings](https://dashboard.webshare.io/proxy/settings?referral_code=w0xno53eb50g) to retrieve
288289
your "Proxy Username" and "Proxy Password". Using this information you can initialize the `YouTubeTranscriptApi` as
289290
follows:
290291

@@ -306,8 +307,8 @@ ytt_api.fetch(video_id)
306307
Using the `WebshareProxyConfig` will default to using rotating residential proxies and requires no further
307308
configuration.
308309

309-
Note that referral links are used here and any purchases made through these links will support this Open Source
310-
project, which is very much appreciated! 💖😊🙏💖
310+
Note that [referral links are used here](https://www.webshare.io/?referral_code=w0xno53eb50g) and any purchases
311+
made through these links will support this Open Source project, which is very much appreciated! 💖😊🙏💖
311312

312313
However, you are of course free to integrate your own proxy solution using the `GenericProxyConfig` class, if you
313314
prefer using another provider or want to implement your own solution, as covered by the following section.
@@ -511,7 +512,7 @@ using residential proxies as explained in
511512
create a [Webshare account](https://www.webshare.io/?referral_code=w0xno53eb50g) and purchase a "Residential" proxy
512513
package that suits your workload (make sure NOT to purchase "Proxy Server" or "Static Residential"!). Then you can use
513514
the "Proxy Username" and "Proxy Password" which you can find in your
514-
[Webshare Proxy Settings](https://dashboard.webshare.io/proxy/settings), to run the following command:
515+
[Webshare Proxy Settings](https://dashboard.webshare.io/proxy/settings?referral_code=w0xno53eb50g), to run the following command:
515516

516517
```
517518
youtube_transcript_api <first_video_id> <second_video_id> --webshare-proxy-username "username" --webshare-proxy-password "password"

youtube_transcript_api/_errors.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from requests import HTTPError
55

66
from ._settings import WATCH_URL
7+
from .proxies import ProxyConfig, GenericProxyConfig, WebshareProxyConfig
78

89

910
class YouTubeTranscriptApiException(Exception):
@@ -45,7 +46,7 @@ class CouldNotRetrieveTranscript(YouTubeTranscriptApiException):
4546

4647
def __init__(self, video_id: str):
4748
self.video_id = video_id
48-
super().__init__(self._build_error_message())
49+
super().__init__()
4950

5051
def _build_error_message(self) -> str:
5152
error_message = self.ERROR_MESSAGE.format(
@@ -64,6 +65,9 @@ def _build_error_message(self) -> str:
6465
def cause(self) -> str:
6566
return self.CAUSE_MESSAGE
6667

68+
def __str__(self) -> str:
69+
return self._build_error_message()
70+
6771

6872
class YouTubeRequestFailed(CouldNotRetrieveTranscript):
6973
CAUSE_MESSAGE = "Request to YouTube failed: {reason}"
@@ -135,6 +139,51 @@ class RequestBlocked(CouldNotRetrieveTranscript):
135139
"eventually permanently ban the account that you have used to authenticate "
136140
"with! So only do this if you don't mind your account being banned!"
137141
)
142+
WITH_GENERIC_PROXY_CAUSE_MESSAGE = (
143+
"YouTube is blocking your requests, despite you using proxies. Keep in mind "
144+
"a proxy is just a way to hide your real IP behind the IP of that proxy, but "
145+
"there is no guarantee that the IP of that proxy won't be blocked as well.\n\n"
146+
"The only truly reliable way to prevent IP blocks is rotating through a large "
147+
"pool of residential IPs, by using a provider like Webshare "
148+
"(https://www.webshare.io/?referral_code=w0xno53eb50g), which provides you "
149+
"with a pool of >30M residential IPs (make sure to purchase "
150+
'"Residential" proxies, NOT "Proxy Server" or "Static Residential"!).\n\n'
151+
"You will find more information on how to easily integrate Webshare here: "
152+
"https://github.com/jdepoix/youtube-transcript-api"
153+
"?tab=readme-ov-file#using-webshare"
154+
)
155+
WITH_WEBSHARE_PROXY_CAUSE_MESSAGE = (
156+
"YouTube is blocking your requests, despite you using Webshare proxies. "
157+
'Please make sure that you have purchased "Residential" proxies and '
158+
'NOT "Proxy Server" or "Static Residential", as those won\'t work as '
159+
'reliably! The free tier also uses "Proxy Server" and will NOT work!\n\n'
160+
'The only reliable option is using "Residential" proxies (not "Static '
161+
'Residential"), as this allows you to rotate through a pool of over 30M IPs, '
162+
"which means you will always find an IP that hasn't been blocked by YouTube "
163+
"yet!\n\n"
164+
"You can support the development of this open source project by making your "
165+
"Webshare purchases through this affiliate link: "
166+
"https://www.webshare.io/?referral_code=w0xno53eb50g \n\n"
167+
"Thank you for your support! <3"
168+
)
169+
170+
def __init__(self, video_id: str):
171+
self._proxy_config = None
172+
super().__init__(video_id)
173+
174+
def with_proxy_config(
175+
self, proxy_config: Optional[ProxyConfig]
176+
) -> "RequestBlocked":
177+
self._proxy_config = proxy_config
178+
return self
179+
180+
@property
181+
def cause(self) -> str:
182+
if isinstance(self._proxy_config, WebshareProxyConfig):
183+
return self.WITH_WEBSHARE_PROXY_CAUSE_MESSAGE
184+
if isinstance(self._proxy_config, GenericProxyConfig):
185+
return self.WITH_GENERIC_PROXY_CAUSE_MESSAGE
186+
return super().cause
138187

139188

140189
class IpBlocked(RequestBlocked):

youtube_transcript_api/_transcripts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ def _fetch_captions_json(self, video_id: str, try_number: int = 0) -> Dict:
364364
)
365365
if try_number + 1 < retries:
366366
return self._fetch_captions_json(video_id, try_number=try_number + 1)
367-
raise exception
367+
raise exception.with_proxy_config(self._proxy_config)
368368

369369
def _extract_captions_json(self, html: str, video_id: str) -> Dict:
370370
splitted_html = html.split("var ytInitialPlayerResponse = ")

youtube_transcript_api/test/test_api.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,11 @@ def test_fetch__exception_if_youtube_request_fails(self):
247247
httpretty.GET, "https://www.youtube.com/watch", status=500
248248
)
249249

250-
with self.assertRaises(YouTubeRequestFailed):
250+
with self.assertRaises(YouTubeRequestFailed) as cm:
251251
YouTubeTranscriptApi().fetch("abc")
252252

253+
self.assertIn("Request to YouTube failed: ", str(cm.exception))
254+
253255
def test_fetch__exception_if_age_restricted(self):
254256
httpretty.register_uri(
255257
httpretty.GET,
@@ -277,21 +279,24 @@ def test_fetch__exception_request_blocked(self):
277279
body=load_asset("youtube_request_blocked.html.static"),
278280
)
279281

280-
with self.assertRaises(RequestBlocked):
282+
with self.assertRaises(RequestBlocked) as cm:
281283
YouTubeTranscriptApi().fetch("Njp5uhTorCo")
282284

285+
self.assertIn("YouTube is blocking requests from your IP", str(cm.exception))
286+
283287
def test_fetch__exception_unplayable(self):
284288
httpretty.register_uri(
285289
httpretty.GET,
286290
"https://www.youtube.com/watch",
287291
body=load_asset("youtube_unplayable.html.static"),
288292
)
289293

290-
with self.assertRaises(VideoUnplayable) as error:
294+
with self.assertRaises(VideoUnplayable) as cm:
291295
YouTubeTranscriptApi().fetch("Njp5uhTorCo")
292-
error = error.exception
293-
self.assertEqual(error.reason, "Custom Reason")
294-
self.assertEqual(error.sub_reasons, ["Sub Reason 1", "Sub Reason 2"])
296+
exception = cm.exception
297+
self.assertEqual(exception.reason, "Custom Reason")
298+
self.assertEqual(exception.sub_reasons, ["Sub Reason 1", "Sub Reason 2"])
299+
self.assertIn("Custom Reason", str(exception))
295300

296301
def test_fetch__exception_if_transcripts_disabled(self):
297302
httpretty.register_uri(
@@ -312,9 +317,11 @@ def test_fetch__exception_if_transcripts_disabled(self):
312317
YouTubeTranscriptApi().fetch("Fjg5lYqvzUs")
313318

314319
def test_fetch__exception_if_language_unavailable(self):
315-
with self.assertRaises(NoTranscriptFound):
320+
with self.assertRaises(NoTranscriptFound) as cm:
316321
YouTubeTranscriptApi().fetch("GJLlxj_dtq8", languages=["cz"])
317322

323+
self.assertIn("No transcripts were found for", str(cm.exception))
324+
318325
@patch("youtube_transcript_api.proxies.GenericProxyConfig.to_requests_dict")
319326
def test_fetch__with_proxy(self, to_requests_dict):
320327
proxy_config = GenericProxyConfig(
@@ -359,7 +366,7 @@ def test_fetch__with_proxy_retry_when_blocked(self, to_requests_dict):
359366
self.assertEqual(len(httpretty.latest_requests()), 3 + 2)
360367

361368
@patch("youtube_transcript_api.proxies.GenericProxyConfig.to_requests_dict")
362-
def test_fetch__with_proxy_reraise_when_blocked(self, to_requests_dict):
369+
def test_fetch__with_webshare_proxy_reraise_when_blocked(self, to_requests_dict):
363370
retries = 5
364371
for _ in range(retries):
365372
httpretty.register_uri(
@@ -373,10 +380,31 @@ def test_fetch__with_proxy_reraise_when_blocked(self, to_requests_dict):
373380
retries_when_blocked=retries,
374381
)
375382

376-
with self.assertRaises(RequestBlocked):
383+
with self.assertRaises(RequestBlocked) as cm:
377384
YouTubeTranscriptApi(proxy_config=proxy_config).fetch("Njp5uhTorCo")
378385

379386
self.assertEqual(len(httpretty.latest_requests()), retries)
387+
self.assertEqual(cm.exception._proxy_config, proxy_config)
388+
self.assertIn("Webshare", str(cm.exception))
389+
390+
@patch("youtube_transcript_api.proxies.GenericProxyConfig.to_requests_dict")
391+
def test_fetch__with_generic_proxy_reraise_when_blocked(self, to_requests_dict):
392+
httpretty.register_uri(
393+
httpretty.GET,
394+
"https://www.youtube.com/watch",
395+
body=load_asset("youtube_request_blocked.html.static"),
396+
)
397+
proxy_config = GenericProxyConfig(
398+
http_url="http://localhost:8080",
399+
https_url="http://localhost:8080",
400+
)
401+
402+
with self.assertRaises(RequestBlocked) as cm:
403+
YouTubeTranscriptApi(proxy_config=proxy_config).fetch("Njp5uhTorCo")
404+
405+
self.assertEqual(len(httpretty.latest_requests()), 1)
406+
self.assertEqual(cm.exception._proxy_config, proxy_config)
407+
self.assertIn("YouTube is blocking your requests", str(cm.exception))
380408

381409
def test_fetch__with_cookies(self):
382410
cookie_path = get_asset_path("example_cookies.txt")

0 commit comments

Comments
 (0)