mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-10-31 22:55:26 +00:00
[facebook] Fix extraction (closes #11926)
This commit is contained in:
parent
000f207944
commit
b83ef507b4
1 changed files with 27 additions and 9 deletions
|
@ -12,14 +12,16 @@ from ..compat import (
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
limit_length,
|
limit_length,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
try_get,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
get_element_by_id,
|
|
||||||
clean_html,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -243,14 +245,30 @@ class FacebookIE(InfoExtractor):
|
||||||
|
|
||||||
video_data = None
|
video_data = None
|
||||||
|
|
||||||
server_js_data = self._parse_json(self._search_regex(
|
def extract_video_data(instances):
|
||||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
for item in instances:
|
||||||
for item in server_js_data.get('instances', []):
|
|
||||||
if item[1][0] == 'VideoConfig':
|
if item[1][0] == 'VideoConfig':
|
||||||
video_item = item[2][0]
|
video_item = item[2][0]
|
||||||
if video_item.get('video_id') == video_id:
|
if video_item.get('video_id') == video_id:
|
||||||
video_data = video_item['videoData']
|
return video_item['videoData']
|
||||||
break
|
|
||||||
|
server_js_data = self._parse_json(self._search_regex(
|
||||||
|
r'handleServerJS\(({.+})(?:\);|,")', webpage,
|
||||||
|
'server js data', default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
|
if server_js_data:
|
||||||
|
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
|
server_js_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
|
||||||
|
webpage, 'js data', default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if server_js_data:
|
||||||
|
video_data = extract_video_data(try_get(
|
||||||
|
server_js_data, lambda x: x['jsmods']['instances'],
|
||||||
|
list) or [])
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
if not fatal_if_no_video:
|
if not fatal_if_no_video:
|
||||||
|
|
Loading…
Reference in a new issue