[sohu] fix extractor conflict

certain URL would cause conflict between SohuPlaylistIE and SohuIE
2024-11-16 06:15:46 +00:00 · 2021-06-10 00:58:34 -07:00 · 2021-06-10 00:58:34 -07:00 · 69a40d3eb0
commit 69a40d3eb0
parent 22c0a90a7a
1 changed files with 3 additions and 4 deletions
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@ -262,7 +262,7 @@ class SohuIE(InfoExtractor):


 class SohuPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:my\.)?tv\.sohu\.com/pl/(?P<pl_id>\d+)'
+    _VALID_URL = r'https?://(?:my\.)?tv\.sohu\.com/pl/(?P<pl_id>\d+)$'
    _URL_IN_PLAYLIST = re.compile(r'<strong>.*?</strong>')
    parser = HTMLAttributeParser()
    _TESTS = [{
@ -311,14 +311,13 @@ class SohuPlaylistIE(InfoExtractor):
    def _get_all_pages_in_playlist(self, first_page, url):
        pgcount = int(self._search_regex(r'var pgcount = \'(\d+)\'', first_page, 'pgcount'))
        pgsize = int(self._search_regex(r'var pgsize = \'(\d+)\'', first_page, 'pgsize'))
-        return [url + '/index%d.shtml' % (i+1) for i in range(0, math.ceil(pgcount/pgsize))]
+        return [url + '/index%d.shtml' % (i + 1) for i in range(0, math.ceil(pgcount / pgsize))]

    def _get_video_list(self, all_pages, playlist_id):
        video_list = ''
        for i, url in enumerate(all_pages):
-            webpage = self._download_webpage(url, "playlist " + playlist_id + " page: %d" % (1+i))
+            webpage = self._download_webpage(url, "playlist " + playlist_id + " page: %d" % (1 + i))
            video_list += self._search_regex(
                r'<ul class="uList cfix">(.*?)</ul>',
                webpage, 'video list', NO_DEFAULT, True, re.DOTALL)
        return video_list
-