Merge remote-tracking branch 'peugeot/hornbunny'

2024-11-17 07:45:58 +00:00 · 2014-08-31 23:45:24 +02:00 · 2014-08-31 23:45:24 +02:00 · 5a3f0d9aee
commit 5a3f0d9aee
parent d10548b691 bbc9dc56f6
2 changed files with 45 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -135,6 +135,7 @@ from .grooveshark import GroovesharkIE
 from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hornbunny import HornBunnyIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
--- a/youtube_dl/extractor/hornbunny.py
+++ b/youtube_dl/extractor/hornbunny.py
@ -0,0 +1,44 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class HornBunnyIE(InfoExtractor):
    _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
    _TEST = {
        'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
        'md5': '95e40865aedd08eff60272b704852ad7',
        'info_dict': {
            'id': '5227',
            'ext': 'flv',
            'title': 'panty slut jerk off instruction',
            'duration': 550
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'class="title">(.*?)</h2>', webpage, 'title')
        redirect_url = self._html_search_regex(r'pg&settings=(.*?)\|0"\);', webpage, 'title')
        webpage2 = self._download_webpage(redirect_url, video_id)
        video_url = self._html_search_regex(r'flvMask:(.*?);', webpage2, 'video_url')
        mobj = re.search(r'<strong>Runtime:</strong> (?P<minutes>\d+):(?P<seconds>\d+)</div>', webpage)
        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
        view_count = self._html_search_regex(r'<strong>Views:</strong>  (\d+)</div>', webpage, 'view count', fatal=False)
        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'ext': 'flv',
            'duration': duration,
            'view_count': int_or_none(view_count),
        }