1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-22 02:01:50 +00:00

[pornhub:playlistbase] Skip videos from drop-down menu for all playlists (closes #12819, closes #13902)

This commit is contained in:
Sergey M․ 2017-08-13 07:53:02 +07:00
parent b3c6515365
commit 475bcb225f
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -227,20 +227,6 @@ class PornHubIE(InfoExtractor):
class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, webpage): def _extract_entries(self, webpage):
return [
self.url_result(
'http://www.pornhub.com/%s' % video_url,
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
webpage))
]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
# Only process container div with main playlist content skipping # Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see # drop-down menu that uses similar pattern for videos (see
# https://github.com/rg3/youtube-dl/issues/11594). # https://github.com/rg3/youtube-dl/issues/11594).
@ -248,7 +234,21 @@ class PornHubPlaylistBaseIE(InfoExtractor):
r'(?s)(<div[^>]+class=["\']container.+)', webpage, r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage) 'container', default=webpage)
entries = self._extract_entries(container) return [
self.url_result(
'http://www.pornhub.com/%s' % video_url,
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
container))
]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = self._extract_entries(webpage)
playlist = self._parse_json( playlist = self._parse_json(
self._search_regex( self._search_regex(