-
-
Notifications
You must be signed in to change notification settings - Fork 765
Expand file tree
/
Copy path_cli.py
More file actions
218 lines (198 loc) · 7.69 KB
/
_cli.py
File metadata and controls
218 lines (198 loc) · 7.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import argparse
from importlib.metadata import PackageNotFoundError, version
from typing import List
from .proxies import GenericProxyConfig, WebshareProxyConfig
from .formatters import FormatterLoader
from ._api import YouTubeTranscriptApi, FetchedTranscript, TranscriptList
class YouTubeTranscriptCli:
def __init__(self, args: List[str]):
self._args = args
def run(self) -> str:
parsed_args = self._parse_args()
if parsed_args.exclude_manually_created and parsed_args.exclude_generated:
return ""
proxy_config = None
if parsed_args.http_proxy != "" or parsed_args.https_proxy != "":
proxy_config = GenericProxyConfig(
http_url=parsed_args.http_proxy,
https_url=parsed_args.https_proxy,
)
if (
parsed_args.webshare_proxy_username is not None
or parsed_args.webshare_proxy_password is not None
):
proxy_config = WebshareProxyConfig(
proxy_username=parsed_args.webshare_proxy_username,
proxy_password=parsed_args.webshare_proxy_password,
)
transcripts = []
exceptions = []
ytt_api = YouTubeTranscriptApi(
proxy_config=proxy_config,
cookies_from_browser=parsed_args.cookies_from_browser,
)
for video_id in parsed_args.video_ids:
try:
transcript_list = ytt_api.list(video_id)
if parsed_args.list_transcripts:
transcripts.append(transcript_list)
else:
transcripts.append(
self._fetch_transcript(
parsed_args,
transcript_list,
)
)
except Exception as exception:
exceptions.append(exception)
print_sections = [str(exception) for exception in exceptions]
if transcripts:
if parsed_args.list_transcripts:
print_sections.extend(
str(transcript_list) for transcript_list in transcripts
)
else:
print_sections.append(
FormatterLoader()
.load(parsed_args.format)
.format_transcripts(transcripts)
)
return "\n\n".join(print_sections)
def _fetch_transcript(
self,
parsed_args,
transcript_list: TranscriptList,
) -> FetchedTranscript:
if parsed_args.exclude_manually_created:
transcript = transcript_list.find_generated_transcript(
parsed_args.languages
)
elif parsed_args.exclude_generated:
transcript = transcript_list.find_manually_created_transcript(
parsed_args.languages
)
else:
transcript = transcript_list.find_transcript(parsed_args.languages)
if parsed_args.translate:
transcript = transcript.translate(parsed_args.translate)
return transcript.fetch()
def _get_version(self):
try:
return version("youtube-transcript-api")
except PackageNotFoundError:
return "unknown"
def _parse_args(self):
parser = argparse.ArgumentParser(
description=(
"This is a python API which allows you to get the transcripts/subtitles for a given YouTube video. "
"It also works for automatically generated subtitles and it does not require a headless browser, like "
"other selenium based solutions do!"
)
)
parser.add_argument(
"--version",
action="version",
version=f"%(prog)s, version {self._get_version()}",
)
parser.add_argument(
"--list-transcripts",
action="store_const",
const=True,
default=False,
help="This will list the languages in which the given videos are available in.",
)
parser.add_argument(
"video_ids", nargs="+", type=str, help="List of YouTube video IDs."
)
parser.add_argument(
"--languages",
nargs="*",
default=[
"en",
],
type=str,
help=(
'A list of language codes in a descending priority. For example, if this is set to "de en" it will '
"first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails "
"to do so. As I can't provide a complete list of all working language codes with full certainty, you "
"may have to play around with the language codes a bit, to find the one which is working for you!"
),
)
parser.add_argument(
"--exclude-generated",
action="store_const",
const=True,
default=False,
help="If this flag is set transcripts which have been generated by YouTube will not be retrieved.",
)
parser.add_argument(
"--exclude-manually-created",
action="store_const",
const=True,
default=False,
help="If this flag is set transcripts which have been manually created will not be retrieved.",
)
parser.add_argument(
"--format",
type=str,
default="pretty",
choices=tuple(FormatterLoader.TYPES.keys()),
)
parser.add_argument(
"--translate",
default="",
help=(
"The language code for the language you want this transcript to be translated to. Use the "
"--list-transcripts feature to find out which languages are translatable and which translation "
"languages are available."
),
)
parser.add_argument(
"--webshare-proxy-username",
default=None,
type=str,
help='Specify your Webshare "Proxy Username" found at https://dashboard.webshare.io/proxy/settings',
)
parser.add_argument(
"--webshare-proxy-password",
default=None,
type=str,
help='Specify your Webshare "Proxy Password" found at https://dashboard.webshare.io/proxy/settings',
)
parser.add_argument(
"--http-proxy",
default="",
metavar="URL",
help="Use the specified HTTP proxy.",
)
parser.add_argument(
"--https-proxy",
default="",
metavar="URL",
help="Use the specified HTTPS proxy.",
)
parser.add_argument(
"--cookies-from-browser",
dest="cookies_from_browser",
default=None,
choices=[
"chrome",
"firefox",
"edge",
"brave",
"chromium",
"opera",
"vivaldi",
],
help=(
"Extract cookies from the specified browser for authentication. "
"This enables access to age-restricted videos. "
"Supported browsers: chrome, firefox, edge, brave, chromium, opera, vivaldi. "
"Note: Chrome-based browsers require the 'cryptography' package. "
"Install with: pip install 'youtube-transcript-api[cookies]'"
),
)
return self._sanitize_video_ids(parser.parse_args(self._args))
def _sanitize_video_ids(self, args):
args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids]
return args