diff --git a/README.md b/README.md index 87d3723..ad271f3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -

✨ YouTube Transcript API ✨

@@ -365,6 +364,30 @@ Using the CLI: youtube_transcript_api --cookies /path/to/your/cookies.txt ``` +## SSL Verification + +You can customize SSL certificate verification by providing a path to a custom certificate bundle or disabling verification entirely: + +```python +from youtube_transcript_api import YouTubeTranscriptApi + +# Using custom certificate bundle +YouTubeTranscriptApi.get_transcript(video_id, verify='/path/to/cacert.pem') + +# Disabling SSL verification (not recommended for production) +YouTubeTranscriptApi.get_transcript(video_id, verify=False) +``` + +Using the CLI: + +``` +# Using custom certificate bundle +youtube_transcript_api --verify /path/to/cacert.pem + +# Disabling SSL verification +youtube_transcript_api --verify False +``` + ## Warning This code uses an undocumented part of the YouTube API, which is called by the YouTube web-client. So there is no guarantee that it won't stop working tomorrow, if they change how things work. I will however do my best to make things working again as soon as possible if that happens. So if it stops working, let me know! diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py index bf1f240..80a61fa 100644 --- a/youtube_transcript_api/_api.py +++ b/youtube_transcript_api/_api.py @@ -16,7 +16,7 @@ class YouTubeTranscriptApi(object): @classmethod - def list_transcripts(cls, video_id, proxies=None, cookies=None): + def list_transcripts(cls, video_id, proxies=None, cookies=None, verify=None): """ Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating @@ -61,6 +61,8 @@ def list_transcripts(cls, video_id, proxies=None, cookies=None): :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies :param cookies: a string of the path to a text file containing youtube authorization cookies :type cookies: str + :param verify: custom SSL verification path or boolean + :type verify: str|bool|None :return: the list of available transcripts :rtype TranscriptList: """ @@ -68,6 +70,8 @@ def list_transcripts(cls, video_id, proxies=None, cookies=None): if cookies: http_client.cookies = cls._load_cookies(cookies, video_id) http_client.proxies = proxies if proxies else {} + if verify is not None: + http_client.verify = verify return TranscriptListFetcher(http_client).fetch(video_id) @classmethod @@ -79,6 +83,7 @@ def get_transcripts( proxies=None, cookies=None, preserve_formatting=False, + verify=None, ): """ Retrieves the transcripts for a list of videos. @@ -98,6 +103,8 @@ def get_transcripts( :type cookies: str :param preserve_formatting: whether to keep select HTML text formatting :type preserve_formatting: bool + :param verify: custom SSL verification path or boolean + :type verify: str|bool|None :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of video ids, which could not be retrieved :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): @@ -110,7 +117,7 @@ def get_transcripts( for video_id in video_ids: try: data[video_id] = cls.get_transcript( - video_id, languages, proxies, cookies, preserve_formatting + video_id, languages, proxies, cookies, preserve_formatting, verify ) except Exception as exception: if not continue_after_error: @@ -128,6 +135,7 @@ def get_transcript( proxies=None, cookies=None, preserve_formatting=False, + verify=None, ): """ Retrieves the transcript for a single video. This is just a shortcut for calling:: @@ -146,12 +154,14 @@ def get_transcript( :type cookies: str :param preserve_formatting: whether to keep select HTML text formatting :type preserve_formatting: bool + :param verify: custom SSL verification path or boolean + :type verify: str|bool|None :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys :rtype [{'text': str, 'start': float, 'end': float}]: """ assert isinstance(video_id, str), "`video_id` must be a string" return ( - cls.list_transcripts(video_id, proxies, cookies) + cls.list_transcripts(video_id, proxies, cookies, verify) .find_transcript(languages) .fetch(preserve_formatting=preserve_formatting) ) diff --git a/youtube_transcript_api/_cli.py b/youtube_transcript_api/_cli.py index 09f76ba..3d1adec 100644 --- a/youtube_transcript_api/_cli.py +++ b/youtube_transcript_api/_cli.py @@ -47,7 +47,7 @@ def run(self): def _fetch_transcript(self, parsed_args, proxies, cookies, video_id): transcript_list = YouTubeTranscriptApi.list_transcripts( - video_id, proxies=proxies, cookies=cookies + video_id, proxies=proxies, cookies=cookies, verify=parsed_args.verify ) if parsed_args.list_transcripts: @@ -147,9 +147,23 @@ def _parse_args(self): default=None, help="The cookie file that will be used for authorization with youtube.", ) + parser.add_argument( + "--verify", + default=None, + type=self._parse_verify, + help="Path to a custom SSL certificate bundle or False to disable verification.", + ) return self._sanitize_video_ids(parser.parse_args(self._args)) + def _parse_verify(self, value): + if value.lower() == 'false': + return False + elif value.lower() == 'true': + return True + else: + return value + def _sanitize_video_ids(self, args): args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids] return args diff --git a/youtube_transcript_api/test/test_cli.py b/youtube_transcript_api/test/test_cli.py index dd21b39..7beffff 100644 --- a/youtube_transcript_api/test/test_cli.py +++ b/youtube_transcript_api/test/test_cli.py @@ -309,8 +309,29 @@ def test_run__cookies(self): ("v1 v2 --languages de en " "--cookies blahblah.txt").split() ).run() YouTubeTranscriptApi.list_transcripts.assert_any_call( - "v1", proxies=None, cookies="blahblah.txt" + "v1", proxies=None, cookies="blahblah.txt", verify=None ) YouTubeTranscriptApi.list_transcripts.assert_any_call( - "v2", proxies=None, cookies="blahblah.txt" + "v2", proxies=None, cookies="blahblah.txt", verify=None + ) + + def test_run__verify(self): + YouTubeTranscriptCli( + ("v1 v2 --languages de en " "--verify /path/to/cert.pem").split() + ).run() + YouTubeTranscriptApi.list_transcripts.assert_any_call( + "v1", proxies=None, cookies=None, verify="/path/to/cert.pem" + ) + YouTubeTranscriptApi.list_transcripts.assert_any_call( + "v2", proxies=None, cookies=None, verify="/path/to/cert.pem" + ) + + YouTubeTranscriptCli( + ("v1 v2 --languages de en " "--verify False").split() + ).run() + YouTubeTranscriptApi.list_transcripts.assert_any_call( + "v1", proxies=None, cookies=None, verify="False" + ) + YouTubeTranscriptApi.list_transcripts.assert_any_call( + "v2", proxies=None, cookies=None, verify="False" )