youtube-dl/youtube_dl/extractor/svt.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    determine_ext,
    dict_get,
    int_or_none,
    unified_timestamp,
    str_or_none,
    strip_or_none,
    try_get,
)


class SVTBaseIE(InfoExtractor):
    _GEO_COUNTRIES = ['SE']

    def _extract_video(self, video_info, video_id):
        is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
        m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
        formats = []
        for vr in video_info['videoReferences']:
            player_type = vr.get('playerType') or vr.get('format')
            vurl = vr['url']
            ext = determine_ext(vurl)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    vurl, video_id,
                    ext='mp4', entry_protocol=m3u8_protocol,
                    m3u8_id=player_type, fatal=False))
            elif ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    vurl + '?hdcore=3.3.0', video_id,
                    f4m_id=player_type, fatal=False))
            elif ext == 'mpd':
                if player_type == 'dashhbbtv':
                    formats.extend(self._extract_mpd_formats(
                        vurl, video_id, mpd_id=player_type, fatal=False))
            else:
                formats.append({
                    'format_id': player_type,
                    'url': vurl,
                })
        rights = try_get(video_info, lambda x: x['rights'], dict) or {}
        if not formats and rights.get('geoBlockedSweden'):
            self.raise_geo_restricted(
                'This video is only available in Sweden',
                countries=self._GEO_COUNTRIES)
        self._sort_formats(formats)

        subtitles = {}
        subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
        if isinstance(subtitle_references, list):
            for sr in subtitle_references:
                subtitle_url = sr.get('url')
                subtitle_lang = sr.get('language', 'sv')
                if subtitle_url:
                    if determine_ext(subtitle_url) == 'm3u8':
                        # TODO(yan12125): handle WebVTT in m3u8 manifests
                        continue

                    subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})

        title = video_info.get('title')

        series = video_info.get('programTitle')
        season_number = int_or_none(video_info.get('season'))
        episode = video_info.get('episodeTitle')
        episode_number = int_or_none(video_info.get('episodeNumber'))

        timestamp = unified_timestamp(rights.get('validFrom'))
        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
        age_limit = None
        adult = dict_get(
            video_info, ('inappropriateForChildren', 'blockedForChildren'),
            skip_false_values=False)
        if adult is not None:
            age_limit = 18 if adult else 0

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'subtitles': subtitles,
            'duration': duration,
            'timestamp': timestamp,
            'age_limit': age_limit,
            'series': series,
            'season_number': season_number,
            'episode': episode,
            'episode_number': episode_number,
            'is_live': is_live,
        }


class SVTIE(SVTBaseIE):
    _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
        'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
        'info_dict': {
            'id': '2900353',
            'ext': 'mp4',
            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
            'duration': 27,
            'age_limit': 0,
        },
    }

    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
            r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
        if mobj:
            return mobj.group('url')

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        widget_id = mobj.group('widget_id')
        article_id = mobj.group('id')

        info = self._download_json(
            'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
            article_id)

        info_dict = self._extract_video(info['video'], article_id)
        info_dict['title'] = info['context']['title']
        return info_dict


class SVTPlayBaseIE(SVTBaseIE):
    _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'


class SVTPlayIE(SVTPlayBaseIE):
    IE_DESC = 'SVT Play and Öppet arkiv'
    _VALID_URL = r'''(?x)
                    (?:
                        (?:
                            svt:|
                            https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
                        )
                        (?P<svt_id>[^/?#&]+)|
                        https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
                    )
                    '''
    _TESTS = [{
        'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
        'md5': '2382036fd6f8c994856c323fe51c426e',
        'info_dict': {
            'id': 'jNwpV9P',
            'ext': 'mp4',
            'title': 'Det här är himlen',
            'timestamp': 1586044800,
            'upload_date': '20200405',
            'duration': 3515,
            'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
            'age_limit': 0,
            'subtitles': {
                'sv': [{
                    'ext': 'vtt',
                }]
            },
        },
        'params': {
            'format': 'bestvideo',
            # skip for now due to download test asserts that segment is > 10000 bytes and svt uses
            # init segments that are smaller
            # AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
            'skip_download': True,
        },
    }, {
        # geo restricted to Sweden
        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
        'only_matching': True,
    }, {
        'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
        'only_matching': True,
    }, {
        'url': 'https://www.svtplay.se/kanaler/svt1',
        'only_matching': True,
    }, {
        'url': 'svt:1376446-003A',
        'only_matching': True,
    }, {
        'url': 'svt:14278044',
        'only_matching': True,
    }, {
        'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
        'only_matching': True,
    }, {
        'url': 'svt:eWv5MLX',
        'only_matching': True,
    }]

    def _adjust_title(self, info):
        if info['is_live']:
            info['title'] = self._live_title(info['title'])

    def _extract_by_video_id(self, video_id, webpage=None):
        data = self._download_json(
            'https://api.svt.se/videoplayer-api/video/%s' % video_id,
            video_id, headers=self.geo_verification_headers())
        info_dict = self._extract_video(data, video_id)
        if not info_dict.get('title'):
            title = dict_get(info_dict, ('episode', 'series'))
            if not title and webpage:
                title = re.sub(
                    r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
            if not title:
                title = video_id
            info_dict['title'] = title
        self._adjust_title(info_dict)
        return info_dict

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id, svt_id = mobj.group('id', 'svt_id')

        if svt_id:
            return self._extract_by_video_id(svt_id)

        webpage = self._download_webpage(url, video_id)

        data = self._parse_json(
            self._search_regex(
                self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
                group='json'),
            video_id, fatal=False)

        thumbnail = self._og_search_thumbnail(webpage)

        if data:
            video_info = try_get(
                data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
                dict)
            if video_info:
                info_dict = self._extract_video(video_info, video_id)
                info_dict.update({
                    'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
                    'thumbnail': thumbnail,
                })
                self._adjust_title(info_dict)
                return info_dict

            svt_id = try_get(
                data, lambda x: x['statistics']['dataLake']['content']['id'],
                compat_str)

        if not svt_id:
            svt_id = self._search_regex(
                (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
                webpage, 'video id')

        info_dict = self._extract_by_video_id(svt_id, webpage)
        info_dict['thumbnail'] = thumbnail

        return info_dict


class SVTSeriesIE(SVTPlayBaseIE):
    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
    _TESTS = [{
        'url': 'https://www.svtplay.se/rederiet',
        'info_dict': {
            'id': '14445680',
            'title': 'Rederiet',
            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
        },
        'playlist_mincount': 318,
    }, {
        'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
        'info_dict': {
            'id': 'season-2-14445680',
            'title': 'Rederiet - Säsong 2',
            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
        },
        'playlist_mincount': 12,
    }]

    @classmethod
    def suitable(cls, url):
        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)

    def _real_extract(self, url):
        series_slug, season_id = re.match(self._VALID_URL, url).groups()

        series = self._download_json(
            'https://api.svt.se/contento/graphql', series_slug,
            'Downloading series page', query={
                'query': '''{
  listablesBySlug(slugs: ["%s"]) {
    associatedContent(include: [productionPeriod, season]) {
      items {
        item {
          ... on Episode {
            videoSvtId
          }
        }
      }
      id
      name
    }
    id
    longDescription
    name
    shortDescription
  }
}''' % series_slug,
            })['data']['listablesBySlug'][0]

        season_name = None

        entries = []
        for season in series['associatedContent']:
            if not isinstance(season, dict):
                continue
            if season_id:
                if season.get('id') != season_id:
                    continue
                season_name = season.get('name')
            items = season.get('items')
            if not isinstance(items, list):
                continue
            for item in items:
                video = item.get('item') or {}
                content_id = video.get('videoSvtId')
                if not content_id or not isinstance(content_id, compat_str):
                    continue
                entries.append(self.url_result(
                    'svt:' + content_id, SVTPlayIE.ie_key(), content_id))

        title = series.get('name')
        season_name = season_name or season_id

        if title and season_name:
            title = '%s - %s' % (title, season_name)
        elif season_id:
            title = season_id

        return self.playlist_result(
            entries, season_id or series.get('id'), title,
            dict_get(series, ('longDescription', 'shortDescription')))


class SVTPageIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
    _TESTS = [{
        'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
        'info_dict': {
            'id': '25298267',
            'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
        },
        'playlist_count': 4,
    }, {
        'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
        'info_dict': {
            'id': '24243746',
            'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
        },
        'playlist_count': 2,
    }, {
        # only programTitle
        'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
        'info_dict': {
            'id': '8439V2K',
            'ext': 'mp4',
            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
            'duration': 27,
            'age_limit': 0,
        },
    }, {
        'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
        'only_matching': True,
    }, {
        'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
        'only_matching': True,
    }]

    @classmethod
    def suitable(cls, url):
        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)

    def _real_extract(self, url):
        path, display_id = re.match(self._VALID_URL, url).groups()

        article = self._download_json(
            'https://api.svt.se/nss-api/page/' + path, display_id,
            query={'q': 'articles'})['articles']['content'][0]

        entries = []

        def _process_content(content):
            if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
                video_id = compat_str(content['image']['svtId'])
                entries.append(self.url_result(
                    'svt:' + video_id, SVTPlayIE.ie_key(), video_id))

        for media in article.get('media', []):
            _process_content(media)

        for obj in article.get('structuredBody', []):
            _process_content(obj.get('content') or {})

        return self.playlist_result(
            entries, str_or_none(article.get('id')),
            strip_or_none(article.get('title')))
-												[svtplay] Correct test case

											
										
										
											2015-02-09 15:05:01 +00:00
+								# coding: utf-8
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								from __future__ import unicode_literals
-												[oppetarkiv] Merge with svtplay

											
										
										
											2015-02-28 15:25:04 +00:00
+								import re
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								from .common import InfoExtractor
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								from ..compat import compat_str
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								from ..utils import (
 								    determine_ext,
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
+								    dict_get,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								    int_or_none,
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								    unified_timestamp,
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								    str_or_none,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								    strip_or_none,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								    try_get,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								)
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								class SVTBaseIE(InfoExtractor):
-												Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction

											
										
										
											2017-02-18 20:53:23 +00:00
+								    _GEO_COUNTRIES = ['SE']
-												[svt] PEP 8

											
										
										
											2017-02-19 19:25:55 +00:00
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								    def _extract_video(self, video_info, video_id):
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											2018-03-17 15:14:20 +00:00
+								        is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
 								        m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								        formats = []
 								        for vr in video_info['videoReferences']:
-												[svt] Fix DASH formats extraction

											
										
										
											2016-09-17 12:25:31 +00:00
+								            player_type = vr.get('playerType') or vr.get('format')
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								            vurl = vr['url']
-												[oppetarkiv] Merge with svtplay

											
										
										
											2015-02-28 15:25:04 +00:00
+								            ext = determine_ext(vurl)
 								            if ext == 'm3u8':
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								                formats.extend(self._extract_m3u8_formats(
 								                    vurl, video_id,
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											2018-03-17 15:14:20 +00:00
+								                    ext='mp4', entry_protocol=m3u8_protocol,
-												[svt] extract dashhbbtv formats(#8867)

											
										
										
											2016-03-15 18:33:09 +00:00
+								                    m3u8_id=player_type, fatal=False))
-												[oppetarkiv] Merge with svtplay

											
										
										
											2015-02-28 15:25:04 +00:00
+								            elif ext == 'f4m':
 								                formats.extend(self._extract_f4m_formats(
 								                    vurl + '?hdcore=3.3.0', video_id,
-												[svt] extract dashhbbtv formats(#8867)

											
										
										
											2016-03-15 18:33:09 +00:00
+								                    f4m_id=player_type, fatal=False))
 								            elif ext == 'mpd':
 								                if player_type == 'dashhbbtv':
 								                    formats.extend(self._extract_mpd_formats(
 								                        vurl, video_id, mpd_id=player_type, fatal=False))
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								            else:
 								                formats.append({
-												[svt] extract dashhbbtv formats(#8867)

											
										
										
											2016-03-15 18:33:09 +00:00
+								                    'format_id': player_type,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								                    'url': vurl,
 								                })
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								        rights = try_get(video_info, lambda x: x['rights'], dict) or {}
 								        if not formats and rights.get('geoBlockedSweden'):
-												[svt] Improve geo restriction detection and use geo bypass mechanism

											
										
										
											2017-02-04 11:52:33 +00:00
+								            self.raise_geo_restricted(
-												Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction

											
										
										
											2017-02-18 20:53:23 +00:00
+								                'This video is only available in Sweden',
 								                countries=self._GEO_COUNTRIES)
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								        self._sort_formats(formats)
-												[SVTPlay] Add subtitle support

											
										
										
											2016-01-12 16:08:55 +00:00
+								        subtitles = {}
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
+								        subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											2016-01-22 19:47:54 +00:00
+								        if isinstance(subtitle_references, list):
 								            for sr in subtitle_references:
 								                subtitle_url = sr.get('url')
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
+								                subtitle_lang = sr.get('language', 'sv')
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											2016-01-22 19:47:54 +00:00
+								                if subtitle_url:
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
+								                    if determine_ext(subtitle_url) == 'm3u8':
 								                        # TODO(yan12125): handle WebVTT in m3u8 manifests
 								                        continue
 								                    subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
-												[SVTPlay] Add subtitle support

											
										
										
											2016-01-12 16:08:55 +00:00
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								        title = video_info.get('title')
 								        series = video_info.get('programTitle')
 								        season_number = int_or_none(video_info.get('season'))
 								        episode = video_info.get('episodeTitle')
 								        episode_number = int_or_none(video_info.get('episodeNumber'))
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								        timestamp = unified_timestamp(rights.get('validFrom'))
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
 								        age_limit = None
 								        adult = dict_get(
 								            video_info, ('inappropriateForChildren', 'blockedForChildren'),
 								            skip_false_values=False)
 								        if adult is not None:
 								            age_limit = 18 if adult else 0
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
 								        return {
 								            'id': video_id,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								            'title': title,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								            'formats': formats,
-												[SVTPlay] Add subtitle support

											
										
										
											2016-01-12 16:08:55 +00:00
+								            'subtitles': subtitles,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								            'duration': duration,
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								            'timestamp': timestamp,
-												[oppetarkiv] Merge with svtplay

											
										
										
											2015-02-28 15:25:04 +00:00
+								            'age_limit': age_limit,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								            'series': series,
 								            'season_number': season_number,
 								            'episode': episode,
 								            'episode_number': episode_number,
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											2018-03-17 15:14:20 +00:00
+								            'is_live': is_live,
-												[svtplay] Add new extractor (Fixes #4914)

											
										
										
											2015-02-09 14:56:46 +00:00
+								        }
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
 								class SVTIE(SVTBaseIE):
 								    _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
 								    _TEST = {
 								        'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
+								        'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								        'info_dict': {
 								            'id': '2900353',
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
+								            'ext': 'mp4',
 								            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								            'duration': 27,
 								            'age_limit': 0,
 								        },
 								    }
-												[extractor/generic] Add support for svt embeds (Closes #5622)

											
										
										
											2015-05-08 18:23:35 +00:00
+								    @staticmethod
 								    def _extract_url(webpage):
 								        mobj = re.search(
 								            r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
 								        if mobj:
 								            return mobj.group('url')
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								    def _real_extract(self, url):
 								        mobj = re.match(self._VALID_URL, url)
 								        widget_id = mobj.group('widget_id')
 								        article_id = mobj.group('id')
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
 								        info = self._download_json(
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								            'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
 								            article_id)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								        info_dict = self._extract_video(info['video'], article_id)
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
+								        info_dict['title'] = info['context']['title']
 								        return info_dict
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
-												[svtplay] Share svtplay regex

											
										
										
											2018-04-04 17:28:36 +00:00
+								class SVTPlayBaseIE(SVTBaseIE):
 								    _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
 								class SVTPlayIE(SVTPlayBaseIE):
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								    IE_DESC = 'SVT Play and Öppet arkiv'
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								    _VALID_URL = r'''(?x)
 								                    (?:
-												[svtplay] Add support for svt.se/barnkanalen (closes #24817)

											
										
										
											2020-11-21 16:35:01 +00:00
+								                        (?:
 								                            svt:|
 								                            https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
 								                        )
 								                        (?P<svt_id>[^/?#&]+)|
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								                        https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
 								                    )
 								                    '''
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								    _TESTS = [{
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								        'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
 								        'md5': '2382036fd6f8c994856c323fe51c426e',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								        'info_dict': {
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								            'id': 'jNwpV9P',
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											2016-01-22 19:47:54 +00:00
+								            'ext': 'mp4',
-												[svtplay] Fix test title

											
										
										
											2020-11-21 16:36:25 +00:00
+								            'title': 'Det här är himlen',
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								            'timestamp': 1586044800,
 								            'upload_date': '20200405',
 								            'duration': 3515,
 								            'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								            'age_limit': 0,
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											2016-01-22 19:47:54 +00:00
+								            'subtitles': {
 								                'sv': [{
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								                    'ext': 'vtt',
-												[svt] Improve subtitles extraction and add test (Closes #8265)

											
										
										
											2016-01-22 19:47:54 +00:00
+								                }]
 								            },
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								        },
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								        'params': {
 								            'format': 'bestvideo',
 								            # skip for now due to download test asserts that segment is > 10000 bytes and svt uses
 								            # init segments that are smaller
 								            # AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
 								            'skip_download': True,
 								        },
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								    }, {
 								        # geo restricted to Sweden
 								        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
 								        'only_matching': True,
-												[svtplay] Extend _VALID_URL (#9900)

											
										
										
											2016-06-25 17:29:53 +00:00
+								    }, {
 								        'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
 								        'only_matching': True,
-												[svt] Add support for TV channel live streams (Closes #15279)

											
										
										
											2018-03-17 15:14:20 +00:00
+								    }, {
 								        'url': 'https://www.svtplay.se/kanaler/svt1',
 								        'only_matching': True,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								    }, {
 								        'url': 'svt:1376446-003A',
 								        'only_matching': True,
 								    }, {
 								        'url': 'svt:14278044',
 								        'only_matching': True,
-												[svtplay] Add support for svt.se/barnkanalen (closes #24817)

											
										
										
											2020-11-21 16:35:01 +00:00
+								    }, {
 								        'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
 								        'only_matching': True,
 								    }, {
 								        'url': 'svt:eWv5MLX',
 								        'only_matching': True,
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								    }]
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								    def _adjust_title(self, info):
 								        if info['is_live']:
 								            info['title'] = self._live_title(info['title'])
 								    def _extract_by_video_id(self, video_id, webpage=None):
 								        data = self._download_json(
-												[svtplay] Update API URL (closes #21075)

											
										
										
											2019-05-12 16:23:29 +00:00
+								            'https://api.svt.se/videoplayer-api/video/%s' % video_id,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								            video_id, headers=self.geo_verification_headers())
 								        info_dict = self._extract_video(data, video_id)
 								        if not info_dict.get('title'):
 								            title = dict_get(info_dict, ('episode', 'series'))
 								            if not title and webpage:
 								                title = re.sub(
 								                    r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
 								            if not title:
 								                title = video_id
 								            info_dict['title'] = title
 								        self._adjust_title(info_dict)
 								        return info_dict
-												[svtplay] Generalize svt extractors and add svt.se extractor

											
										
										
											2015-05-08 18:12:42 +00:00
+								    def _real_extract(self, url):
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								        mobj = re.match(self._VALID_URL, url)
 								        video_id, svt_id = mobj.group('id', 'svt_id')
 								        if svt_id:
 								            return self._extract_by_video_id(svt_id)
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
 								        webpage = self._download_webpage(url, video_id)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								        data = self._parse_json(
 								            self._search_regex(
-												[svtplay] Share svtplay regex

											
										
										
											2018-04-04 17:28:36 +00:00
+								                self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
 								                group='json'),
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								            video_id, fatal=False)
-												[svt] Fix extraction for SVTPlay (closes #9809)

											
										
										
											2016-06-21 09:55:53 +00:00
 								        thumbnail = self._og_search_thumbnail(webpage)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								        if data:
 								            video_info = try_get(
 								                data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
 								                dict)
 								            if video_info:
 								                info_dict = self._extract_video(video_info, video_id)
 								                info_dict.update({
 								                    'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
 								                    'thumbnail': thumbnail,
 								                })
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								                self._adjust_title(info_dict)
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
+								                return info_dict
-												[svtplay] Fix svt id extraction (closes #26425, closes #26428, closes #26438)

											
										
										
											2020-09-06 03:45:57 +00:00
+								            svt_id = try_get(
 								                data, lambda x: x['statistics']['dataLake']['content']['id'],
 								                compat_str)
 								        if not svt_id:
 								            svt_id = self._search_regex(
 								                (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
-												[svtplay] Fix id extraction (closes #26576)

											
										
										
											2020-09-13 11:59:37 +00:00
+								                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
 								                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
 								                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
-												[svtplay] Fix svt id extraction (closes #26425, closes #26428, closes #26438)

											
										
										
											2020-09-06 03:45:57 +00:00
+								                webpage, 'video id')
-												[svt] Various improvements

+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv
* [svt:base] Detect geo restriction
* [svt:base] Extract series related metadata

											
										
										
											2016-06-22 16:36:07 +00:00
-												[svt] Extract timestamp and thumbnail in more cases (#27130)

Add timestamp, set to "valid from" which i think could been seen as publish time.
Add thumbnail in more cases, seems to was only done in the embedded data case for some reason.
Switch svtplay test url to an existing video and also one with no expire date.
Also add an additional thumbnail url test regex.
											
										
										
											2020-11-21 16:24:37 +00:00
+								        info_dict = self._extract_by_video_id(svt_id, webpage)
 								        info_dict['thumbnail'] = thumbnail
 								        return info_dict
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
-												[svtplay] Share svtplay regex

											
										
										
											2018-04-04 17:28:36 +00:00
+								class SVTSeriesIE(SVTPlayBaseIE):
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
+								    _TESTS = [{
 								        'url': 'https://www.svtplay.se/rederiet',
 								        'info_dict': {
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            'id': '14445680',
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
+								            'title': 'Rederiet',
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
+								        },
 								        'playlist_mincount': 318,
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
+								    }, {
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								        'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
+								        'info_dict': {
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            'id': 'season-2-14445680',
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
+								            'title': 'Rederiet - Säsong 2',
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
+								        },
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								        'playlist_mincount': 12,
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
+								    }]
 								    @classmethod
 								    def suitable(cls, url):
-												[svtplay:series] Improve extraction (closes #16059)

											
										
										
											2018-04-04 16:52:00 +00:00
+								        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
 								    def _real_extract(self, url):
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								        series_slug, season_id = re.match(self._VALID_URL, url).groups()
 								        series = self._download_json(
 								            'https://api.svt.se/contento/graphql', series_slug,
 								            'Downloading series page', query={
 								                'query': '''{
 								  listablesBySlug(slugs: ["%s"]) {
 								    associatedContent(include: [productionPeriod, season]) {
 								      items {
 								        item {
 								          ... on Episode {
 								            videoSvtId
 								          }
 								        }
 								      }
 								      id
 								      name
 								    }
 								    id
 								    longDescription
 								    name
 								    shortDescription
 								  }
 								}''' % series_slug,
 								            })['data']['listablesBySlug'][0]
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
 								        season_name = None
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
 								        entries = []
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								        for season in series['associatedContent']:
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
+								            if not isinstance(season, dict):
 								                continue
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            if season_id:
 								                if season.get('id') != season_id:
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
+								                    continue
 								                season_name = season.get('name')
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            items = season.get('items')
 								            if not isinstance(items, list):
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
+								                continue
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            for item in items:
 								                video = item.get('item') or {}
 								                content_id = video.get('videoSvtId')
 								                if not content_id or not isinstance(content_id, compat_str):
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
+								                    continue
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								                entries.append(self.url_result(
 								                    'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
-												[svtplay:series] Improve extraction (closes #16059)

											
										
										
											2018-04-04 16:52:00 +00:00
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								        title = series.get('name')
 								        season_name = season_name or season_id
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
 								        if title and season_name:
 								            title = '%s - %s' % (title, season_name)
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								        elif season_id:
 								            title = season_id
-												[svtplay:series] Add support for season URLs

											
										
										
											2018-04-04 17:05:09 +00:00
-												[svtplay:series] Add extractor

Related to #11130

											
										
										
											2018-03-30 18:02:09 +00:00
+								        return self.playlist_result(
-												[svt] fix series extraction(closes #22297)

											
										
										
											2020-01-26 15:17:51 +00:00
+								            entries, season_id or series.get('id'), title,
 								            dict_get(series, ('longDescription', 'shortDescription')))
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
 								class SVTPageIE(InfoExtractor):
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								    _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								    _TESTS = [{
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								        'info_dict': {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								            'id': '25298267',
 								            'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								        },
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        'playlist_count': 4,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								    }, {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								        'info_dict': {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								            'id': '24243746',
 								            'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								        },
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        'playlist_count': 2,
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								    }, {
 								        # only programTitle
 								        'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 								        'info_dict': {
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								            'id': '8439V2K',
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
+								            'ext': 'mp4',
 								            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
 								            'duration': 27,
 								            'age_limit': 0,
 								        },
 								    }, {
 								        'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
 								        'only_matching': True,
 								    }]
 								    @classmethod
 								    def suitable(cls, url):
-												[svtplay] Add support for svt.se/barnkanalen (closes #24817)

											
										
										
											2020-11-21 16:35:01 +00:00
+								        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
 								    def _real_extract(self, url):
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        path, display_id = re.match(self._VALID_URL, url).groups()
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        article = self._download_json(
 								            'https://api.svt.se/nss-api/page/' + path, display_id,
 								            query={'q': 'articles'})['articles']['content'][0]
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        entries = []
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        def _process_content(content):
 								            if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
 								                video_id = compat_str(content['image']['svtId'])
 								                entries.append(self.url_result(
 								                    'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
-												[svt] Improve extraction and add support for pages (closes #16802)

											
										
										
											2018-06-27 21:29:11 +00:00
-												[svt] fix article extraction(closes #22897)(closes #22919)

											
										
										
											2020-01-26 13:15:49 +00:00
+								        for media in article.get('media', []):
 								            _process_content(media)
 								        for obj in article.get('structuredBody', []):
 								            _process_content(obj.get('content') or {})
 								        return self.playlist_result(
 								            entries, str_or_none(article.get('id')),
 								            strip_or_none(article.get('title')))