mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-22 08:47:41 +00:00
[Vbox7IE] Sanitise ld+json containing unexpected characters
* based on PR #29680 * added hack to force invoking `transform_source` * fixes #26218
This commit is contained in:
parent
bdda6b81df
commit
4416f82c80
1 changed files with 22 additions and 0 deletions
|
@ -5,6 +5,7 @@ import re
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_kwargs
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
|
@ -75,6 +76,27 @@ class Vbox7IE(InfoExtractor):
|
|||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
# transform_source=None, fatal=True
|
||||
def _parse_json(self, json_string, video_id, *args, **kwargs):
|
||||
if '"@context"' in json_string[:30]:
|
||||
# this is ld+json, or that's the way to bet
|
||||
transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source')
|
||||
if not transform_source:
|
||||
|
||||
def fix_chars(src):
|
||||
# fix malformed ld+json: replace raw CRLFs with escaped LFs
|
||||
return re.sub(
|
||||
r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src)
|
||||
|
||||
if len(args) > 0:
|
||||
args = (fix_chars,) + args[1:]
|
||||
else:
|
||||
kwargs['transform_source'] = fix_chars
|
||||
kwargs = compat_kwargs(kwargs)
|
||||
|
||||
return super(Vbox7IE, self)._parse_json(
|
||||
json_string, video_id, *args, **kwargs)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://vbox7.com/play:%s' % (video_id,)
|
||||
|
|
Loading…
Reference in a new issue