mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 11:41:52 +00:00
[Vbox7IE] Sanitise ld+json containing unexpected characters
* based on PR #29680 * added hack to force invoking `transform_source` * fixes #26218
This commit is contained in:
parent
bdda6b81df
commit
4416f82c80
1 changed files with 22 additions and 0 deletions
|
@ -5,6 +5,7 @@ import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_kwargs
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -75,6 +76,27 @@ class Vbox7IE(InfoExtractor):
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
|
# transform_source=None, fatal=True
|
||||||
|
def _parse_json(self, json_string, video_id, *args, **kwargs):
|
||||||
|
if '"@context"' in json_string[:30]:
|
||||||
|
# this is ld+json, or that's the way to bet
|
||||||
|
transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source')
|
||||||
|
if not transform_source:
|
||||||
|
|
||||||
|
def fix_chars(src):
|
||||||
|
# fix malformed ld+json: replace raw CRLFs with escaped LFs
|
||||||
|
return re.sub(
|
||||||
|
r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src)
|
||||||
|
|
||||||
|
if len(args) > 0:
|
||||||
|
args = (fix_chars,) + args[1:]
|
||||||
|
else:
|
||||||
|
kwargs['transform_source'] = fix_chars
|
||||||
|
kwargs = compat_kwargs(kwargs)
|
||||||
|
|
||||||
|
return super(Vbox7IE, self)._parse_json(
|
||||||
|
json_string, video_id, *args, **kwargs)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
url = 'https://vbox7.com/play:%s' % (video_id,)
|
url = 'https://vbox7.com/play:%s' % (video_id,)
|
||||||
|
|
Loading…
Reference in a new issue