Use relative paths for DASH fragments (closes #12990)

10x reduced JSON size refs #13810
2025-01-21 23:35:38 +00:00 · 2017-08-05 06:57:19 +07:00 · 2017-08-05 06:57:19 +07:00 · 1141e9104b
commit 1141e9104b
parent 8519b88f67
2 changed files with 20 additions and 10 deletions
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@ -2,6 +2,7 @@ from __future__ import unicode_literals

 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
+from ..utils import urljoin


 class DashSegmentsFD(FragmentFD):
@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
    FD_NAME = 'dashsegments'

    def real_download(self, filename, info_dict):
-        segments = info_dict['fragments'][:1] if self.params.get(
+        fragment_base_url = info_dict.get('fragment_base_url')
+        fragments = info_dict['fragments'][:1] if self.params.get(
            'test', False) else info_dict['fragments']

        ctx = {
            'filename': filename,
-            'total_frags': len(segments),
+            'total_frags': len(fragments),
        }

        self._prepare_and_start_frag_download(ctx)
@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)

        frag_index = 0
-        for i, segment in enumerate(segments):
+        for i, fragment in enumerate(fragments):
            frag_index += 1
            if frag_index <= ctx['fragment_index']:
                continue
@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD):
            count = 0
            while count <= fragment_retries:
                try:
-                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+                    fragment_url = fragment.get('url')
+                    if not fragment_url:
+                        assert fragment_base_url
+                        fragment_url = urljoin(fragment_base_url, fragment['path'])
+                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                    if not success:
                        return False
                    self._append_fragment(ctx, frag_content)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1892,9 +1892,13 @@ class InfoExtractor(object):
                                'Bandwidth': bandwidth,
                            }

+                        def location_key(location):
+                            return 'url' if re.match(r'^https?://', location) else 'path'
+
                        if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:

                            media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+                            media_location_key = location_key(media_template)

                            # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
                            # can't be used at the same time
@ -1904,7 +1908,7 @@ class InfoExtractor(object):
                                    segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
                                    representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                                representation_ms_info['fragments'] = [{
-                                    'url': media_template % {
+                                    media_location_key: media_template % {
                                        'Number': segment_number,
                                        'Bandwidth': bandwidth,
                                    },
@ -1928,7 +1932,7 @@ class InfoExtractor(object):
                                        'Number': segment_number,
                                    }
                                    representation_ms_info['fragments'].append({
-                                        'url': segment_url,
+                                        media_location_key: segment_url,
                                        'duration': float_or_none(segment_d, representation_ms_info['timescale']),
                                    })

@ -1952,8 +1956,9 @@ class InfoExtractor(object):
                            for s in representation_ms_info['s']:
                                duration = float_or_none(s['d'], timescale)
                                for r in range(s.get('r', 0) + 1):
+                                    segment_uri = representation_ms_info['segment_urls'][segment_index]
                                    fragments.append({
-                                        'url': representation_ms_info['segment_urls'][segment_index],
+                                        location_key(segment_uri): segment_uri,
                                        'duration': duration,
                                    })
                                    segment_index += 1
@ -1962,6 +1967,7 @@ class InfoExtractor(object):
                        # No fragments key is present in this case.
                        if 'fragments' in representation_ms_info:
                            f.update({
+                                'fragment_base_url': base_url,
                                'fragments': [],
                                'protocol': 'http_dash_segments',
                            })
@ -1969,10 +1975,8 @@ class InfoExtractor(object):
                                initialization_url = representation_ms_info['initialization_url']
                                if not f.get('url'):
                                    f['url'] = initialization_url
-                                f['fragments'].append({'url': initialization_url})
+                                f['fragments'].append({location_key(initialization_url): initialization_url})
                            f['fragments'].extend(representation_ms_info['fragments'])
-                            for fragment in f['fragments']:
-                                fragment['url'] = urljoin(base_url, fragment['url'])
                        try:
                            existing_format = next(
                                fo for fo in formats