youtube-dl/youtube_dl/extractor/amp.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    mimetype2ext,
    parse_iso8601,
    unified_timestamp,
    url_or_none,
)


class AMPIE(InfoExtractor):
    # parse Akamai Adaptive Media Player feed
    def _extract_feed_info(self, url):
        feed = self._download_json(
            url, None, 'Downloading Akamai AMP feed',
            'Unable to download Akamai AMP feed')
        item = feed.get('channel', {}).get('item')
        if not item:
            raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))

        video_id = item['guid']

        def get_media_node(name, default=None):
            media_name = 'media-%s' % name
            media_group = item.get('media-group') or item
            return media_group.get(media_name) or item.get(media_name) or item.get(name, default)

        thumbnails = []
        media_thumbnail = get_media_node('thumbnail')
        if media_thumbnail:
            if isinstance(media_thumbnail, dict):
                media_thumbnail = [media_thumbnail]
            for thumbnail_data in media_thumbnail:
                thumbnail = thumbnail_data.get('@attributes', {})
                thumbnail_url = url_or_none(thumbnail.get('url'))
                if not thumbnail_url:
                    continue
                thumbnails.append({
                    'url': self._proto_relative_url(thumbnail_url, 'http:'),
                    'width': int_or_none(thumbnail.get('width')),
                    'height': int_or_none(thumbnail.get('height')),
                })

        subtitles = {}
        media_subtitle = get_media_node('subTitle')
        if media_subtitle:
            if isinstance(media_subtitle, dict):
                media_subtitle = [media_subtitle]
            for subtitle_data in media_subtitle:
                subtitle = subtitle_data.get('@attributes', {})
                subtitle_href = url_or_none(subtitle.get('href'))
                if not subtitle_href:
                    continue
                subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
                    'url': subtitle_href,
                    'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
                })

        formats = []
        media_content = get_media_node('content')
        if isinstance(media_content, dict):
            media_content = [media_content]
        for media_data in media_content:
            media = media_data.get('@attributes', {})
            media_url = url_or_none(media.get('url'))
            if not media_url:
                continue
            ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
            if ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
                    video_id, f4m_id='hds', fatal=False))
            elif ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
            else:
                formats.append({
                    'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
                    'url': media_url,
                    'tbr': int_or_none(media.get('bitrate')),
                    'filesize': int_or_none(media.get('fileSize')),
                    'ext': ext,
                })

        self._sort_formats(formats)

        timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))

        return {
            'id': video_id,
            'title': get_media_node('title'),
            'description': get_media_node('description'),
            'thumbnails': thumbnails,
            'timestamp': timestamp,
            'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
            'subtitles': subtitles,
            'formats': formats,
        }
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
			`from ..utils import (`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`determine_ext,`
[amp] extract error message(closes #12795) 2017-04-20 04:16:41 +00:00			`ExtractorError,`
Improve URL extraction 2018-07-21 12:08:28 +00:00			`int_or_none,`
			`mimetype2ext,`
			`parse_iso8601,`
[AMP] Fix upload_date and timestamp extraction (#27970) 2021-01-26 21:43:11 +00:00			`unified_timestamp,`
Improve URL extraction 2018-07-21 12:08:28 +00:00			`url_or_none,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`)`


			`class AMPIE(InfoExtractor):`
			`# parse Akamai Adaptive Media Player feed`
			`def _extract_feed_info(self, url):`
[amp] extract error message(closes #12795) 2017-04-20 04:16:41 +00:00			`feed = self._download_json(`
[bleacherreport] fix style issues and simplify 2015-12-21 10:12:58 +00:00			`url, None, 'Downloading Akamai AMP feed',`
[amp] extract error message(closes #12795) 2017-04-20 04:16:41 +00:00			`'Unable to download Akamai AMP feed')`
			`item = feed.get('channel', {}).get('item')`
			`if not item:`
			`raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00
			`video_id = item['guid']`
[bleacherreport] fix style issues and simplify 2015-12-21 10:12:58 +00:00
			`def get_media_node(name, default=None):`
			`media_name = 'media-%s' % name`
			`media_group = item.get('media-group') or item`
			`return media_group.get(media_name) or item.get(media_name) or item.get(name, default)`

[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`thumbnails = []`
[bleacherreport] fix style issues and simplify 2015-12-21 10:12:58 +00:00			`media_thumbnail = get_media_node('thumbnail')`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`if media_thumbnail:`
			`if isinstance(media_thumbnail, dict):`
			`media_thumbnail = [media_thumbnail]`
			`for thumbnail_data in media_thumbnail:`
[amp] imporove thumbnail and subtitle extraction 2017-05-01 23:06:19 +00:00			`thumbnail = thumbnail_data.get('@attributes', {})`
Improve URL extraction 2018-07-21 12:08:28 +00:00			`thumbnail_url = url_or_none(thumbnail.get('url'))`
[amp] imporove thumbnail and subtitle extraction 2017-05-01 23:06:19 +00:00			`if not thumbnail_url:`
			`continue`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`thumbnails.append({`
[amp] imporove thumbnail and subtitle extraction 2017-05-01 23:06:19 +00:00			`'url': self._proto_relative_url(thumbnail_url, 'http:'),`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`'width': int_or_none(thumbnail.get('width')),`
			`'height': int_or_none(thumbnail.get('height')),`
			`})`

			`subtitles = {}`
[bleacherreport] fix style issues and simplify 2015-12-21 10:12:58 +00:00			`media_subtitle = get_media_node('subTitle')`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`if media_subtitle:`
			`if isinstance(media_subtitle, dict):`
			`media_subtitle = [media_subtitle]`
			`for subtitle_data in media_subtitle:`
[amp] imporove thumbnail and subtitle extraction 2017-05-01 23:06:19 +00:00			`subtitle = subtitle_data.get('@attributes', {})`
Improve URL extraction 2018-07-21 12:08:28 +00:00			`subtitle_href = url_or_none(subtitle.get('href'))`
[amp] imporove thumbnail and subtitle extraction 2017-05-01 23:06:19 +00:00			`if not subtitle_href:`
			`continue`
			`subtitles.setdefault(subtitle.get('lang') or 'en', []).append({`
			`'url': subtitle_href,`
			`'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),`
			`})`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00
			`formats = []`
[bleacherreport] fix style issues and simplify 2015-12-21 10:12:58 +00:00			`media_content = get_media_node('content')`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`if isinstance(media_content, dict):`
			`media_content = [media_content]`
			`for media_data in media_content:`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`media = media_data.get('@attributes', {})`
Improve URL extraction 2018-07-21 12:08:28 +00:00			`media_url = url_or_none(media.get('url'))`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`if not media_url:`
			`continue`
[amp] Fix a typo 2016-07-06 12:10:47 +00:00			`ext = mimetype2ext(media.get('type')) or determine_ext(media_url)`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`if ext == 'f4m':`
Simplify formats accumulation for f4m/m3u8/smil formats Now all _extract_*_formats routines return a list 2015-12-28 18:58:24 +00:00			`formats.extend(self._extract_f4m_formats(`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',`
Simplify formats accumulation for f4m/m3u8/smil formats Now all _extract_*_formats routines return a list 2015-12-28 18:58:24 +00:00			`video_id, f4m_id='hds', fatal=False))`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`elif ext == 'm3u8':`
Simplify formats accumulation for f4m/m3u8/smil formats Now all _extract_*_formats routines return a list 2015-12-28 18:58:24 +00:00			`formats.extend(self._extract_m3u8_formats(`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`else:`
			`formats.append({`
[abcnews] Added a new extractor (closes #3992) Related: #6108, #8664, #9459 2016-05-17 07:38:57 +00:00			`'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),`
Improve URL extraction 2018-07-21 12:08:28 +00:00			`'url': media_url,`
[bleacherreport] fix style issues and simplify 2015-12-21 10:12:58 +00:00			`'tbr': int_or_none(media.get('bitrate')),`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`'filesize': int_or_none(media.get('fileSize')),`
use mimetype2ext to determine manifest ext in multiple extractors 2016-07-06 08:11:46 +00:00			`'ext': ext,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`})`

			`self._sort_formats(formats)`

[AMP] Fix upload_date and timestamp extraction (#27970) 2021-01-26 21:43:11 +00:00			`timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))`
[amp] Fix upload timestamp extraction (Closes #9007) 2016-03-27 19:13:47 +00:00
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`return {`
			`'id': video_id,`
[bleacherreport] fix style issues and simplify 2015-12-21 10:12:58 +00:00			`'title': get_media_node('title'),`
			`'description': get_media_node('description'),`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`'thumbnails': thumbnails,`
[amp] Fix upload timestamp extraction (Closes #9007) 2016-03-27 19:13:47 +00:00			`'timestamp': timestamp,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),`
[amp] Add missing subtitles to info dict 2016-01-04 19:05:37 +00:00			`'subtitles': subtitles,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 15:54:35 +00:00			`'formats': formats,`
			`}`