diff --git a/README.md b/README.md
index 87d3723..ad271f3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,3 @@
-
✨ YouTube Transcript API ✨
@@ -365,6 +364,30 @@ Using the CLI:
youtube_transcript_api --cookies /path/to/your/cookies.txt
```
+## SSL Verification
+
+You can customize SSL certificate verification by providing a path to a custom certificate bundle or disabling verification entirely:
+
+```python
+from youtube_transcript_api import YouTubeTranscriptApi
+
+# Using custom certificate bundle
+YouTubeTranscriptApi.get_transcript(video_id, verify='/path/to/cacert.pem')
+
+# Disabling SSL verification (not recommended for production)
+YouTubeTranscriptApi.get_transcript(video_id, verify=False)
+```
+
+Using the CLI:
+
+```
+# Using custom certificate bundle
+youtube_transcript_api --verify /path/to/cacert.pem
+
+# Disabling SSL verification
+youtube_transcript_api --verify False
+```
+
## Warning
This code uses an undocumented part of the YouTube API, which is called by the YouTube web-client. So there is no guarantee that it won't stop working tomorrow, if they change how things work. I will however do my best to make things working again as soon as possible if that happens. So if it stops working, let me know!
diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py
index bf1f240..80a61fa 100644
--- a/youtube_transcript_api/_api.py
+++ b/youtube_transcript_api/_api.py
@@ -16,7 +16,7 @@
class YouTubeTranscriptApi(object):
@classmethod
- def list_transcripts(cls, video_id, proxies=None, cookies=None):
+ def list_transcripts(cls, video_id, proxies=None, cookies=None, verify=None):
"""
Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object
which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating
@@ -61,6 +61,8 @@ def list_transcripts(cls, video_id, proxies=None, cookies=None):
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str
+ :param verify: custom SSL verification path or boolean
+ :type verify: str|bool|None
:return: the list of available transcripts
:rtype TranscriptList:
"""
@@ -68,6 +70,8 @@ def list_transcripts(cls, video_id, proxies=None, cookies=None):
if cookies:
http_client.cookies = cls._load_cookies(cookies, video_id)
http_client.proxies = proxies if proxies else {}
+ if verify is not None:
+ http_client.verify = verify
return TranscriptListFetcher(http_client).fetch(video_id)
@classmethod
@@ -79,6 +83,7 @@ def get_transcripts(
proxies=None,
cookies=None,
preserve_formatting=False,
+ verify=None,
):
"""
Retrieves the transcripts for a list of videos.
@@ -98,6 +103,8 @@ def get_transcripts(
:type cookies: str
:param preserve_formatting: whether to keep select HTML text formatting
:type preserve_formatting: bool
+ :param verify: custom SSL verification path or boolean
+ :type verify: str|bool|None
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
video ids, which could not be retrieved
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
@@ -110,7 +117,7 @@ def get_transcripts(
for video_id in video_ids:
try:
data[video_id] = cls.get_transcript(
- video_id, languages, proxies, cookies, preserve_formatting
+ video_id, languages, proxies, cookies, preserve_formatting, verify
)
except Exception as exception:
if not continue_after_error:
@@ -128,6 +135,7 @@ def get_transcript(
proxies=None,
cookies=None,
preserve_formatting=False,
+ verify=None,
):
"""
Retrieves the transcript for a single video. This is just a shortcut for calling::
@@ -146,12 +154,14 @@ def get_transcript(
:type cookies: str
:param preserve_formatting: whether to keep select HTML text formatting
:type preserve_formatting: bool
+ :param verify: custom SSL verification path or boolean
+ :type verify: str|bool|None
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
:rtype [{'text': str, 'start': float, 'end': float}]:
"""
assert isinstance(video_id, str), "`video_id` must be a string"
return (
- cls.list_transcripts(video_id, proxies, cookies)
+ cls.list_transcripts(video_id, proxies, cookies, verify)
.find_transcript(languages)
.fetch(preserve_formatting=preserve_formatting)
)
diff --git a/youtube_transcript_api/_cli.py b/youtube_transcript_api/_cli.py
index 09f76ba..3d1adec 100644
--- a/youtube_transcript_api/_cli.py
+++ b/youtube_transcript_api/_cli.py
@@ -47,7 +47,7 @@ def run(self):
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
transcript_list = YouTubeTranscriptApi.list_transcripts(
- video_id, proxies=proxies, cookies=cookies
+ video_id, proxies=proxies, cookies=cookies, verify=parsed_args.verify
)
if parsed_args.list_transcripts:
@@ -147,9 +147,23 @@ def _parse_args(self):
default=None,
help="The cookie file that will be used for authorization with youtube.",
)
+ parser.add_argument(
+ "--verify",
+ default=None,
+ type=self._parse_verify,
+ help="Path to a custom SSL certificate bundle or False to disable verification.",
+ )
return self._sanitize_video_ids(parser.parse_args(self._args))
+ def _parse_verify(self, value):
+ if value.lower() == 'false':
+ return False
+ elif value.lower() == 'true':
+ return True
+ else:
+ return value
+
def _sanitize_video_ids(self, args):
args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids]
return args
diff --git a/youtube_transcript_api/test/test_cli.py b/youtube_transcript_api/test/test_cli.py
index dd21b39..7beffff 100644
--- a/youtube_transcript_api/test/test_cli.py
+++ b/youtube_transcript_api/test/test_cli.py
@@ -309,8 +309,29 @@ def test_run__cookies(self):
("v1 v2 --languages de en " "--cookies blahblah.txt").split()
).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call(
- "v1", proxies=None, cookies="blahblah.txt"
+ "v1", proxies=None, cookies="blahblah.txt", verify=None
)
YouTubeTranscriptApi.list_transcripts.assert_any_call(
- "v2", proxies=None, cookies="blahblah.txt"
+ "v2", proxies=None, cookies="blahblah.txt", verify=None
+ )
+
+ def test_run__verify(self):
+ YouTubeTranscriptCli(
+ ("v1 v2 --languages de en " "--verify /path/to/cert.pem").split()
+ ).run()
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v1", proxies=None, cookies=None, verify="/path/to/cert.pem"
+ )
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v2", proxies=None, cookies=None, verify="/path/to/cert.pem"
+ )
+
+ YouTubeTranscriptCli(
+ ("v1 v2 --languages de en " "--verify False").split()
+ ).run()
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v1", proxies=None, cookies=None, verify="False"
+ )
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v2", proxies=None, cookies=None, verify="False"
)