2016-12-20 11:34:46 +00:00
# coding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . compat import compat_str
from . . utils import (
dict_get ,
2020-12-28 09:50:29 +00:00
ExtractorError ,
2016-12-20 11:34:46 +00:00
int_or_none ,
parse_iso8601 ,
2020-12-28 09:50:29 +00:00
try_get ,
unescapeHTML ,
2023-05-19 18:31:52 +00:00
urljoin
2016-12-20 11:34:46 +00:00
)
class PikselIE ( InfoExtractor ) :
2020-12-28 09:50:29 +00:00
_VALID_URL = r ''' (?x)https?://
( ? :
( ? :
player \.
( ? :
olympusattelecom |
vibebyvista
) |
( ? : api | player ) \. multicastmedia |
( ? : api - ovp | player ) \. piksel
) \. com |
( ? :
mz - edge \. stream \. co |
movie - s \. nhk \. or
) \. jp |
vidego \. baltimorecity \. gov
) / v / ( ? : refid / ( ? P < refid > [ ^ / ] + ) / prefid / ) ? ( ? P < id > [ \w - ] + ) '''
2017-02-04 15:23:14 +00:00
_TESTS = [
{
2019-08-15 22:14:47 +00:00
' url ' : ' http://player.piksel.com/v/ums2867l ' ,
' md5 ' : ' 34e34c8d89dc2559976a6079db531e85 ' ,
2017-02-04 15:23:14 +00:00
' info_dict ' : {
2019-08-15 22:14:47 +00:00
' id ' : ' ums2867l ' ,
2017-02-04 15:23:14 +00:00
' ext ' : ' mp4 ' ,
2019-08-15 22:14:47 +00:00
' title ' : ' GX-005 with Caption ' ,
' timestamp ' : 1481335659 ,
' upload_date ' : ' 20161210 '
2017-02-04 15:23:14 +00:00
}
} ,
{
# Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
' url ' : ' https://player.piksel.com/v/v80kqp41 ' ,
' md5 ' : ' 753ddcd8cc8e4fa2dda4b7be0e77744d ' ,
' info_dict ' : {
' id ' : ' v80kqp41 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' WAW- State of Washington vs. Donald J. Trump, et al ' ,
' description ' : ' State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding. ' ,
' timestamp ' : 1486171129 ,
2019-08-15 22:14:47 +00:00
' upload_date ' : ' 20170204 '
2017-02-04 15:23:14 +00:00
}
2019-09-24 19:23:56 +00:00
} ,
{
# https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
' url ' : ' http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477 ' ,
' only_matching ' : True ,
2016-12-20 11:34:46 +00:00
}
2017-02-04 15:23:14 +00:00
]
2016-12-20 11:34:46 +00:00
@staticmethod
def _extract_url ( webpage ) :
mobj = re . search (
r ' <iframe[^>]+src=[ " \' ](?P<url>(?:https?:)?//player \ .piksel \ .com/v/[a-z0-9]+) ' ,
webpage )
if mobj :
return mobj . group ( ' url ' )
2023-05-19 18:31:52 +00:00
def _call_api ( self , app_token , resource , display_id , query , host = " https://player.piksel.com " , fatal = True ) :
url = urljoin ( base = host , path = ' /ws/ws_ %s /api/ %s /mode/json/apiv/5 ' % ( resource , app_token ) )
response = ( self . _download_json ( url , display_id , query = query , fatal = fatal ) or { } ) . get ( ' response ' )
2020-12-28 09:50:29 +00:00
failure = try_get ( response , lambda x : x [ ' failure ' ] [ ' reason ' ] )
if failure :
if fatal :
raise ExtractorError ( failure , expected = True )
self . report_warning ( failure )
return response
2016-12-20 11:34:46 +00:00
def _real_extract ( self , url ) :
2020-12-28 09:50:29 +00:00
ref_id , display_id = re . match ( self . _VALID_URL , url ) . groups ( )
2019-09-24 19:23:56 +00:00
webpage = self . _download_webpage ( url , display_id )
2017-02-04 15:23:14 +00:00
app_token = self . _search_regex ( [
r ' clientAPI \ s*: \ s* " ([^ " ]+) " ' ,
r ' data-de-api-key \ s*= \ s* " ([^ " ]+) " '
] , webpage , ' app token ' )
2020-12-28 09:50:29 +00:00
query = { ' refid ' : ref_id , ' prefid ' : display_id } if ref_id else { ' v ' : display_id }
program = self . _call_api (
2023-05-19 18:31:52 +00:00
app_token , ' program ' , display_id , query , url ) [ ' WsProgramResponse ' ] [ ' program ' ]
2020-12-28 09:50:29 +00:00
video_id = program [ ' uuid ' ]
video_data = program [ ' asset ' ]
2016-12-20 11:34:46 +00:00
title = video_data [ ' title ' ]
2020-12-28 09:50:29 +00:00
asset_type = dict_get ( video_data , [ ' assetType ' , ' asset_type ' ] )
2016-12-20 11:34:46 +00:00
formats = [ ]
2020-12-28 09:50:29 +00:00
def process_asset_file ( asset_file ) :
if not asset_file :
return
2016-12-20 11:34:46 +00:00
# TODO: extract rtmp formats
http_url = asset_file . get ( ' http_url ' )
if not http_url :
2020-12-28 09:50:29 +00:00
return
2016-12-20 11:34:46 +00:00
tbr = None
vbr = int_or_none ( asset_file . get ( ' videoBitrate ' ) , 1024 )
abr = int_or_none ( asset_file . get ( ' audioBitrate ' ) , 1024 )
if asset_type == ' video ' :
tbr = vbr + abr
elif asset_type == ' audio ' :
tbr = abr
format_id = [ ' http ' ]
if tbr :
format_id . append ( compat_str ( tbr ) )
formats . append ( {
' format_id ' : ' - ' . join ( format_id ) ,
' url ' : unescapeHTML ( http_url ) ,
' vbr ' : vbr ,
' abr ' : abr ,
' width ' : int_or_none ( asset_file . get ( ' videoWidth ' ) ) ,
' height ' : int_or_none ( asset_file . get ( ' videoHeight ' ) ) ,
' filesize ' : int_or_none ( asset_file . get ( ' filesize ' ) ) ,
' tbr ' : tbr ,
} )
2020-12-28 09:50:29 +00:00
def process_asset_files ( asset_files ) :
for asset_file in ( asset_files or [ ] ) :
process_asset_file ( asset_file )
process_asset_files ( video_data . get ( ' assetFiles ' ) )
process_asset_file ( video_data . get ( ' referenceFile ' ) )
if not formats :
asset_id = video_data . get ( ' assetid ' ) or program . get ( ' assetid ' )
if asset_id :
process_asset_files ( try_get ( self . _call_api (
app_token , ' asset_file ' , display_id , {
' assetid ' : asset_id ,
2023-05-19 18:31:52 +00:00
} , url , False ) , lambda x : x [ ' WsAssetFileResponse ' ] [ ' AssetFiles ' ] ) )
2020-12-28 09:50:29 +00:00
m3u8_url = dict_get ( video_data , [
' m3u8iPadURL ' ,
' ipadM3u8Url ' ,
' m3u8AndroidURL ' ,
' m3u8iPhoneURL ' ,
' iphoneM3u8Url ' ] )
if m3u8_url :
formats . extend ( self . _extract_m3u8_formats (
m3u8_url , video_id , ' mp4 ' , ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
smil_url = dict_get ( video_data , [ ' httpSmil ' , ' hdSmil ' , ' rtmpSmil ' ] )
if smil_url :
transform_source = None
if ref_id == ' nhkworld ' :
# TODO: figure out if this is something to be fixed in urljoin,
# _parse_smil_formats or keep it here
transform_source = lambda x : x . replace ( ' src= " / ' , ' src= " ' ) . replace ( ' /media " ' , ' /media/ " ' )
formats . extend ( self . _extract_smil_formats (
re . sub ( r ' /od/[^/]+/ ' , ' /od/http/ ' , smil_url ) , video_id ,
transform_source = transform_source , fatal = False ) )
2016-12-20 11:34:46 +00:00
self . _sort_formats ( formats )
2019-08-15 22:14:47 +00:00
subtitles = { }
for caption in video_data . get ( ' captions ' , [ ] ) :
caption_url = caption . get ( ' url ' )
if caption_url :
subtitles . setdefault ( caption . get ( ' locale ' , ' en ' ) , [ ] ) . append ( {
' url ' : caption_url } )
2016-12-20 11:34:46 +00:00
return {
' id ' : video_id ,
' title ' : title ,
' description ' : video_data . get ( ' description ' ) ,
' thumbnail ' : video_data . get ( ' thumbnailUrl ' ) ,
' timestamp ' : parse_iso8601 ( video_data . get ( ' dateadd ' ) ) ,
' formats ' : formats ,
2019-08-15 22:14:47 +00:00
' subtitles ' : subtitles ,
2016-12-20 11:34:46 +00:00
}