[lifenews] Add support for lifenews.ru

2024-12-23 01:08:10 +00:00 · 2014-01-31 03:48:00 +07:00 · 2014-01-31 03:48:00 +07:00 · 659aa21ba1
commit 659aa21ba1
parent 8ccda826d5
2 changed files with 64 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -110,6 +110,7 @@ from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE, LivestreamOriginalIE
 from .lynda import (
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@ -0,0 +1,63 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import unified_strdate
 class LifeNewsIE(InfoExtractor):
    IE_NAME = 'lifenews'
    IE_DESC = 'LIFE | NEWS'
    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
    _TEST = {
        'url': 'http://lifenews.ru/news/126342',
        'file': '126342.mp4',
        'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
        'info_dict': {
            'title': 'МВД разыскивает троих мужчин, оставивших в IKEA сумку с автоматом',
            'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
            'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
            'upload_date': '20140130',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
        video_url = self._html_search_regex(
            r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
        thumbnail = self._html_search_regex(
            r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
        title = self._og_search_title(webpage)
        TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
        if title.endswith(TITLE_SUFFIX):
            title = title[:-len(TITLE_SUFFIX)]
        description = self._og_search_description(webpage)
        view_count = self._html_search_regex(
            r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
        comment_count = self._html_search_regex(
            r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
        upload_date = self._html_search_regex(
            r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
        return {
            'id': video_id,
            'url': video_url,
            'thumbnail': thumbnail,
            'title': title,
            'description': description,
            'view_count': view_count,
            'comment_count': comment_count,
            'upload_date': unified_strdate(upload_date),
        }