From 69a40d3eb06316ac944aea2a0d28d2ce15d447cd Mon Sep 17 00:00:00 2001 From: Yukan Zhang Date: Thu, 10 Jun 2021 00:58:34 -0700 Subject: [PATCH] [sohu] fix extractor conflict certain URL would cause conflict between SohuPlaylistIE and SohuIE --- youtube_dl/extractor/sohu.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index b3158d806..54094b52b 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -262,7 +262,7 @@ class SohuIE(InfoExtractor): class SohuPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:my\.)?tv\.sohu\.com/pl/(?P\d+)' + _VALID_URL = r'https?://(?:my\.)?tv\.sohu\.com/pl/(?P\d+)$' _URL_IN_PLAYLIST = re.compile(r'.*?') parser = HTMLAttributeParser() _TESTS = [{ @@ -311,14 +311,13 @@ class SohuPlaylistIE(InfoExtractor): def _get_all_pages_in_playlist(self, first_page, url): pgcount = int(self._search_regex(r'var pgcount = \'(\d+)\'', first_page, 'pgcount')) pgsize = int(self._search_regex(r'var pgsize = \'(\d+)\'', first_page, 'pgsize')) - return [url + '/index%d.shtml' % (i+1) for i in range(0, math.ceil(pgcount/pgsize))] + return [url + '/index%d.shtml' % (i + 1) for i in range(0, math.ceil(pgcount / pgsize))] def _get_video_list(self, all_pages, playlist_id): video_list = '' for i, url in enumerate(all_pages): - webpage = self._download_webpage(url, "playlist " + playlist_id + " page: %d" % (1+i)) + webpage = self._download_webpage(url, "playlist " + playlist_id + " page: %d" % (1 + i)) video_list += self._search_regex( r'
    (.*?)
', webpage, 'video list', NO_DEFAULT, True, re.DOTALL) return video_list -