mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-18 16:26:23 +00:00
[XVideos] Update XVideosIE including features from PR #30689
* add uploader, tag, performer and view_count extraction (closes #30689) * add dis/like_count extraction
This commit is contained in:
parent
35230a2b5a
commit
b04d78068d
1 changed files with 117 additions and 22 deletions
|
@ -4,22 +4,39 @@ from __future__ import unicode_literals
|
|||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from math import isinf
|
||||
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
SearchInfoExtractor,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
LazyList,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
try_get,
|
||||
remove_end,
|
||||
remove_start,
|
||||
T,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
txt_or_none,
|
||||
url_basename,
|
||||
urljoin,
|
||||
)
|
||||
|
@ -27,14 +44,18 @@ from ..utils import (
|
|||
|
||||
class XVideosIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?xvideos2?\.com/(?:video|prof-video-click/model/[^/]+/)|
|
||||
(?:www\.)?xvideos\.es/video|
|
||||
(?:www|flashservice)\.xvideos\.com/embedframe/|
|
||||
static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
(?:
|
||||
https?://
|
||||
(?:
|
||||
# xvideos\d+\.com redirects to xvideos.com
|
||||
# (?P<country>[a-z]{2})\.xvideos.com too: catch it anyway
|
||||
(?:[^/]+\.)?xvideos\.com/(?:video|prof-video-click/model/[^/]+/)|
|
||||
(?:www\.)?xvideos\.es/video|
|
||||
(?:www|flashservice)\.xvideos\.com/embedframe/|
|
||||
static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
|
||||
)|
|
||||
xvideos:
|
||||
)(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
|
@ -45,19 +66,51 @@ class XVideosIE(InfoExtractor):
|
|||
'title': 'Biker Takes his Girl',
|
||||
'duration': 108,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
'skip': 'Sorry, this video has been deleted',
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/video78250973/hot_blonde_gets_excited_in_the_middle_of_the_club.',
|
||||
'md5': '0bc6e46ef55907533ffa0542e45958b6',
|
||||
'info_dict': {
|
||||
'id': '78250973',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot blonde gets excited in the middle of the club.',
|
||||
'uploader': 'Deny Barbie Official',
|
||||
'age_limit': 18,
|
||||
'duration': 302,
|
||||
},
|
||||
}, {
|
||||
# Broken HLS formats
|
||||
'url': 'https://www.xvideos.com/video65982001/what_s_her_name',
|
||||
'md5': 'b82d7d7ef7d65a84b1fa6965f81f95a5',
|
||||
'md5': '18ff7d57d4edc3c908fc5b06166dd63d',
|
||||
'info_dict': {
|
||||
'id': '65982001',
|
||||
'ext': 'mp4',
|
||||
'title': 'what\'s her name?',
|
||||
'duration': 120,
|
||||
'uploader': 'Skakdjskdk',
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
|
||||
'duration': 120,
|
||||
'thumbnail': r're:^https://img-[a-z]+.xvideos-cdn.com/.+\.jpg',
|
||||
}
|
||||
}, {
|
||||
# from PR #30689
|
||||
'url': 'https://www.xvideos.com/video50011247/when_girls_play_-_adriana_chechik_abella_danger_-_tradimento_-_twistys',
|
||||
'md5': 'aa54f96311768b3a8bfe54b8c8fda070',
|
||||
'info_dict': {
|
||||
'id': '50011247',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Play - (Adriana Chechik, Abella Danger) - Betrayal - Twistys',
|
||||
'duration': 720,
|
||||
'age_limit': 18,
|
||||
'tags': ['lesbian', 'teen', 'hardcore', 'latina', 'rough', 'squirt', 'big-ass', 'cheater', 'twistys', 'cheat', 'ass-play', 'when-girls-play'],
|
||||
'creator': 'Twistys',
|
||||
'uploader': 'Twistys',
|
||||
'uploader_url': 'https://www.xvideos.com/channels/twistys1',
|
||||
'cast': [{'given_name': 'Adriana Chechik', 'url': 'https://www.xvideos.com/pornstars/adriana-chechik'}, {'given_name': 'Abella Danger', 'url': 'https://www.xvideos.com/pornstars/abella-danger'}],
|
||||
'view_count': 'lambda c: c >= 4038715',
|
||||
'like_count': 'lambda c: c >= 8800',
|
||||
'dislike_count': 'lambda c: c >= 3100',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://flashservice.xvideos.com/embedframe/4588838',
|
||||
'only_matching': True,
|
||||
|
@ -138,7 +191,7 @@ class XVideosIE(InfoExtractor):
|
|||
duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, default=None)) or parse_duration(
|
||||
self._search_regex(
|
||||
r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
|
||||
r'''<span [^>]*\bclass\s*=\s*["']duration\b[^>]+>.*?(\d[^<]+)''',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
@ -169,15 +222,57 @@ class XVideosIE(InfoExtractor):
|
|||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
# adapted from PR #30689
|
||||
ignore_tags = set(('xvideos', 'xvideos.com', 'x videos', 'x video', 'porn', 'video', 'videos'))
|
||||
tags = self._html_search_meta('keywords', webpage) or ''
|
||||
tags = [t for t in re.split(r'\s*,\s*', tags) if t not in ignore_tags]
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?sx)
|
||||
(?P<ul><a\b[^>]+\bclass\s*=\s*["'](?:[\w-]+\s+)*uploader-tag(?:\s+[\w-]+)*[^>]+>)
|
||||
\s*<span\s+class\s*=\s*["']name\b[^>]+>\s*(?P<name>.+?)\s*<
|
||||
''', webpage)
|
||||
creator = None
|
||||
uploader_url = None
|
||||
if mobj:
|
||||
uploader_url = urljoin(url, extract_attributes(mobj.group('ul')).get('href'))
|
||||
creator = mobj.group('name')
|
||||
|
||||
def get_actor_data(mobj):
|
||||
ul_url = extract_attributes(mobj.group('ul')).get('href')
|
||||
if '/pornstars/' in ul_url:
|
||||
return {
|
||||
'given_name': mobj.group('name'),
|
||||
'url': urljoin(url, ul_url),
|
||||
}
|
||||
|
||||
actors = traverse_obj(re.finditer(
|
||||
r'''(?sx)
|
||||
(?P<ul><a\b[^>]+\bclass\s*=\s*["'](?:[\w-]+\s+)*profile(?:\s+[\w-]+)*[^>]+>)
|
||||
\s*<span\s+class\s*=\s*["']name\b[^>]+>\s*(?P<name>.+?)\s*<
|
||||
''', webpage), (Ellipsis, T(get_actor_data)))
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
}, {
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails or None,
|
||||
'tags': tags or None,
|
||||
'creator': creator,
|
||||
'uploader': creator,
|
||||
'uploader_url': uploader_url,
|
||||
'cast': actors or None,
|
||||
'view_count': parse_count(get_element_by_class(
|
||||
'mobile-hide', get_element_by_id('v-views', webpage))),
|
||||
'like_count': parse_count(get_element_by_class('rating-good-nbr', webpage)),
|
||||
'dislike_count': parse_count(get_element_by_class('rating-bad-nbr', webpage)),
|
||||
}, {
|
||||
'channel': creator,
|
||||
'channel_url': uploader_url,
|
||||
} if '/channels/' in (uploader_url or '') else {})
|
||||
|
||||
class XVideosPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
|
|
Loading…
Reference in a new issue