[facebook] Fix extraction (closes #11926)

This commit is contained in:
Sergey M․ 2017-02-01 23:15:38 +07:00
parent 000f207944
commit b83ef507b4
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -12,14 +12,16 @@ from ..compat import (
compat_urllib_parse_unquote_plus,
)
from ..utils import (
clean_html,
error_to_compat_str,
ExtractorError,
get_element_by_id,
int_or_none,
js_to_json,
limit_length,
sanitized_Request,
try_get,
urlencode_postdata,
get_element_by_id,
clean_html,
)
@ -243,14 +245,30 @@ class FacebookIE(InfoExtractor):
video_data = None
server_js_data = self._parse_json(self._search_regex(
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
for item in server_js_data.get('instances', []):
def extract_video_data(instances):
for item in instances:
if item[1][0] == 'VideoConfig':
video_item = item[2][0]
if video_item.get('video_id') == video_id:
video_data = video_item['videoData']
break
return video_item['videoData']
server_js_data = self._parse_json(self._search_regex(
r'handleServerJS\(({.+})(?:\);|,")', webpage,
'server js data', default='{}'), video_id, fatal=False)
if server_js_data:
video_data = extract_video_data(server_js_data.get('instances', []))
if not video_data:
server_js_data = self._parse_json(
self._search_regex(
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if server_js_data:
video_data = extract_video_data(try_get(
server_js_data, lambda x: x['jsmods']['instances'],
list) or [])
if not video_data:
if not fatal_if_no_video: