[theonion] Modernize

2024-11-26 04:02:11 +00:00 · 2015-02-19 04:12:40 +01:00 · 2015-02-19 04:12:40 +01:00 · e880c66bd8
commit e880c66bd8
parent 383456aa29
1 changed files with 5 additions and 12 deletions
--- a/youtube_dl/extractor/theonion.py
+++ b/youtube_dl/extractor/theonion.py
@ -4,11 +4,10 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
    _TEST = {
        'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
        'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        display_id = self._match_id(url)
-        article_id = mobj.group('article_id')
+        webpage = self._download_webpage(url, display_id)
        webpage = self._download_webpage(url, article_id)
        video_id = self._search_regex(
            r'"videoId":\s(\d+),', webpage, 'video ID')
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
        thumbnail = self._og_search_thumbnail(webpage)
        sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
        if not sources:
            raise ExtractorError(
                'No sources found for video %s' % video_id, expected=True)
        formats = []
        for src, type_ in sources:
            if type_ == 'video/mp4':
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
                })
            elif type_ == 'application/x-mpegURL':
                formats.extend(
-                    self._extract_m3u8_formats(src, video_id, preference=-1))
+                    self._extract_m3u8_formats(src, display_id, preference=-1))
            else:
                self.report_warning(
                    'Encountered unexpected format: %s' % type_)
        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,