[echomsk] Add extractor (Closes #4600)

2024-12-22 16:57:40 +00:00 · 2014-12-31 18:03:51 +06:00 · 2014-12-31 18:03:51 +06:00 · 1931a73f39
commit 1931a73f39
parent 966ea3aebd
2 changed files with 47 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -99,6 +99,7 @@ from .discovery import DiscoveryIE
 from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .echomsk import EchoMskIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .einthusan import EinthusanIE
--- a/youtube_dl/extractor/echomsk.py
+++ b/youtube_dl/extractor/echomsk.py
@ -0,0 +1,46 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class EchoMskIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.echo.msk.ru/sounds/1464134.html',
        'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
        'info_dict': {
            'id': '1464134',
            'ext': 'mp3',
            'title': 'Особое мнение - 29 декабря 2014, 19:08',
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        audio_url = self._search_regex(
            r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
        title = self._html_search_regex(
            r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
            webpage, 'title')
        air_date = self._html_search_regex(
            r'(?s)<div class="date">(.+?)</div>',
            webpage, 'date', fatal=False, default=None)
        if air_date:
            air_date = re.sub(r'(\s)\1+', r'\1', air_date)
            if air_date:
                title = '%s - %s' % (title, air_date)
        return {
            'id': video_id,
            'url': audio_url,
            'title': title,
        }