1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-22 10:11:52 +00:00

Merge branch 'peugeot-hellporno'

This commit is contained in:
Sergey M․ 2014-12-29 21:33:57 +06:00
commit 479514d015
2 changed files with 72 additions and 0 deletions

View file

@ -169,6 +169,7 @@ from .grooveshark import GroovesharkIE
from .groupon import GrouponIE
from .hark import HarkIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hornbunny import HornBunnyIE

View file

@ -0,0 +1,71 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
remove_end,
)
class HellPornoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
_TEST = {
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
'md5': '1fee339c610d2049699ef2aa699439f1',
'info_dict': {
'id': '149116',
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
'ext': 'mp4',
'title': 'Dixie is posing with naked ass very erotic',
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = remove_end(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
flashvars = self._parse_json(self._search_regex(
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
display_id, transform_source=js_to_json)
video_id = flashvars.get('video_id')
thumbnail = flashvars.get('preview_url')
ext = flashvars.get('postfix', '.mp4')[1:]
formats = []
for video_url_key in ['video_url', 'video_alt_url']:
video_url = flashvars.get(video_url_key)
if not video_url:
continue
video_text = flashvars.get('%s_text' % video_url_key)
fmt = {
'url': video_url,
'ext': ext,
'format_id': video_text,
}
m = re.search(r'^(?P<height>\d+)[pP]', video_text)
if m:
fmt['height'] = int(m.group('height'))
formats.append(fmt)
self._sort_formats(formats)
categories = self._html_search_meta(
'keywords', webpage, 'categories', default='').split(',')
return {
'id': video_id,
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'categories': categories,
'age_limit': 18,
'formats': formats,
}