mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 03:32:05 +00:00
[extractor/common] Improve JSON-LD interaction statistic extraction (refs #23306)
This commit is contained in:
parent
45b0a0d11b
commit
1727541315
2 changed files with 60 additions and 2 deletions
|
@ -98,6 +98,56 @@ class TestInfoExtractor(unittest.TestCase):
|
|||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||
|
||||
def test_search_json_ld_realworld(self):
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._search_json_ld(r'''<script type="application/ld+json">
|
||||
{
|
||||
"@context": "http://schema.org/",
|
||||
"@type": "VideoObject",
|
||||
"name": "1 On 1 With Kleio",
|
||||
"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
|
||||
"duration": "PT0H12M23S",
|
||||
"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
|
||||
"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
|
||||
"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
|
||||
"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
|
||||
"width": "1920",
|
||||
"height": "1080",
|
||||
"encodingFormat": "mp4",
|
||||
"bitrate": "6617kbps",
|
||||
"isFamilyFriendly": "False",
|
||||
"description": "Kleio Valentien",
|
||||
"uploadDate": "2015-12-05T21:24:35+01:00",
|
||||
"interactionStatistic": {
|
||||
"@type": "InteractionCounter",
|
||||
"interactionType": { "@type": "http://schema.org/WatchAction" },
|
||||
"userInteractionCount": 1120958
|
||||
}, "aggregateRating": {
|
||||
"@type": "AggregateRating",
|
||||
"ratingValue": "88",
|
||||
"ratingCount": "630",
|
||||
"bestRating": "100",
|
||||
"worstRating": "0"
|
||||
}, "actor": [{
|
||||
"@type": "Person",
|
||||
"name": "Kleio Valentien",
|
||||
"url": "https://www.eporner.com/pornstar/kleio-valentien/"
|
||||
}]}
|
||||
</script>''', None),
|
||||
{
|
||||
'title': '1 On 1 With Kleio',
|
||||
'description': 'Kleio Valentien',
|
||||
'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
|
||||
'timestamp': 1449347075,
|
||||
'duration': 743.0,
|
||||
'view_count': 1120958,
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
})
|
||||
|
||||
|
||||
def test_download_json(self):
|
||||
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
||||
self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
|
||||
|
|
|
@ -1237,8 +1237,16 @@ class InfoExtractor(object):
|
|||
'ViewAction': 'view',
|
||||
}
|
||||
|
||||
def extract_interaction_type(e):
|
||||
interaction_type = e.get('interactionType')
|
||||
if isinstance(interaction_type, dict):
|
||||
interaction_type = interaction_type.get('@type')
|
||||
return str_or_none(interaction_type)
|
||||
|
||||
def extract_interaction_statistic(e):
|
||||
interaction_statistic = e.get('interactionStatistic')
|
||||
if isinstance(interaction_statistic, dict):
|
||||
interaction_statistic = [interaction_statistic]
|
||||
if not isinstance(interaction_statistic, list):
|
||||
return
|
||||
for is_e in interaction_statistic:
|
||||
|
@ -1246,8 +1254,8 @@ class InfoExtractor(object):
|
|||
continue
|
||||
if is_e.get('@type') != 'InteractionCounter':
|
||||
continue
|
||||
interaction_type = is_e.get('interactionType')
|
||||
if not isinstance(interaction_type, compat_str):
|
||||
interaction_type = extract_interaction_type(is_e)
|
||||
if not interaction_type:
|
||||
continue
|
||||
# For interaction count some sites provide string instead of
|
||||
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||
|
|
Loading…
Reference in a new issue