2016-02-11 17:10:32 +00:00
# coding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
2016-05-20 05:53:14 +00:00
from . . utils import (
js_to_json ,
smuggle_url ,
)
2016-02-11 17:10:32 +00:00
class CBCIE ( InfoExtractor ) :
2016-05-20 10:39:54 +00:00
_VALID_URL = r ' https?://(?:www \ .)?cbc \ .ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+) '
2016-02-11 17:10:32 +00:00
_TESTS = [ {
# with mediaId
' url ' : ' http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs ' ,
2016-05-20 05:53:14 +00:00
' md5 ' : ' 97e24d09672fc4cf56256d6faa6c25bc ' ,
2016-02-11 17:10:32 +00:00
' info_dict ' : {
' id ' : ' 2682904050 ' ,
2016-05-20 05:53:14 +00:00
' ext ' : ' mp4 ' ,
2016-02-11 17:10:32 +00:00
' title ' : ' Don Cherry – All-Stars ' ,
' description ' : ' Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’ s got heart. ' ,
2016-05-20 05:53:14 +00:00
' timestamp ' : 1454463000 ,
2016-02-11 17:10:32 +00:00
' upload_date ' : ' 20160203 ' ,
2016-05-20 05:53:14 +00:00
' uploader ' : ' CBCC-NEW ' ,
2016-02-11 17:10:32 +00:00
} ,
} , {
# with clipId
' url ' : ' http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live ' ,
' info_dict ' : {
' id ' : ' 2487345465 ' ,
2016-05-20 05:53:14 +00:00
' ext ' : ' mp4 ' ,
2016-02-11 17:10:32 +00:00
' title ' : ' Robin Williams freestyles on 90 Minutes Live ' ,
' description ' : ' Wacky American comedian Robin Williams shows off his infamous " freestyle " comedic talents while being interviewed on CBC \' s 90 Minutes Live. ' ,
2016-05-20 05:53:14 +00:00
' upload_date ' : ' 19780210 ' ,
2016-04-24 12:44:52 +00:00
' uploader ' : ' CBCC-NEW ' ,
2016-05-20 05:53:14 +00:00
' timestamp ' : 255977160 ,
2016-02-11 17:10:32 +00:00
} ,
} , {
# multiple iframes
' url ' : ' http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot ' ,
' playlist ' : [ {
2016-05-20 05:53:14 +00:00
' md5 ' : ' 377572d0b49c4ce0c9ad77470e0b96b4 ' ,
2016-02-11 17:10:32 +00:00
' info_dict ' : {
' id ' : ' 2680832926 ' ,
2016-05-20 05:53:14 +00:00
' ext ' : ' mp4 ' ,
2016-02-11 17:10:32 +00:00
' title ' : ' An Eagle \' s-Eye View Off Burrard Bridge ' ,
' description ' : ' Hercules the eagle flies from Vancouver \' s Burrard Bridge down to a nearby park with a mini-camera strapped to his back. ' ,
2016-05-20 05:53:14 +00:00
' upload_date ' : ' 20160201 ' ,
' timestamp ' : 1454342820 ,
' uploader ' : ' CBCC-NEW ' ,
2016-02-11 17:10:32 +00:00
} ,
} , {
2016-05-20 05:53:14 +00:00
' md5 ' : ' 415a0e3f586113894174dfb31aa5bb1a ' ,
2016-02-11 17:10:32 +00:00
' info_dict ' : {
' id ' : ' 2658915080 ' ,
2016-05-20 05:53:14 +00:00
' ext ' : ' mp4 ' ,
2016-02-11 17:10:32 +00:00
' title ' : ' Fly like an eagle! ' ,
' description ' : ' Eagle equipped with a mini camera flies from the world \' s tallest tower ' ,
2016-05-20 05:53:14 +00:00
' upload_date ' : ' 20150315 ' ,
' timestamp ' : 1426443984 ,
' uploader ' : ' CBCC-NEW ' ,
2016-02-11 17:10:32 +00:00
} ,
} ] ,
} ]
@classmethod
def suitable ( cls , url ) :
return False if CBCPlayerIE . suitable ( url ) else super ( CBCIE , cls ) . suitable ( url )
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
player_init = self . _search_regex (
r ' CBC \ .APP \ .Caffeine \ .initInstance \ (( { .+?}) \ ); ' , webpage , ' player init ' ,
default = None )
if player_init :
player_info = self . _parse_json ( player_init , display_id , js_to_json )
media_id = player_info . get ( ' mediaId ' )
if not media_id :
clip_id = player_info [ ' clipId ' ]
media_id = self . _download_json (
' http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases % 3DbyId % 253D ' + clip_id ,
clip_id ) [ ' entries ' ] [ 0 ] [ ' id ' ] . split ( ' / ' ) [ - 1 ]
return self . url_result ( ' cbcplayer: %s ' % media_id , ' CBCPlayer ' , media_id )
else :
entries = [ self . url_result ( ' cbcplayer: %s ' % media_id , ' CBCPlayer ' , media_id ) for media_id in re . findall ( r ' <iframe[^>]+src= " [^ " ]+?mediaId=( \ d+) " ' , webpage ) ]
return self . playlist_result ( entries )
class CBCPlayerIE ( InfoExtractor ) :
_VALID_URL = r ' (?:cbcplayer:|https?://(?:www \ .)?cbc \ .ca/(?:player/play/|i/caffeine/syndicate/ \ ?mediaId=))(?P<id> \ d+) '
_TEST = {
' url ' : ' http://www.cbc.ca/player/play/2683190193 ' ,
' info_dict ' : {
' id ' : ' 2683190193 ' ,
2016-05-20 05:53:14 +00:00
' ext ' : ' mp4 ' ,
2016-02-11 17:10:32 +00:00
' title ' : ' Gerry Runs a Sweat Shop ' ,
' description ' : ' md5:b457e1c01e8ff408d9d801c1c2cd29b0 ' ,
2016-05-20 05:53:14 +00:00
' timestamp ' : 1455071400 ,
2016-02-11 17:10:32 +00:00
' upload_date ' : ' 20160210 ' ,
2016-05-20 05:53:14 +00:00
' uploader ' : ' CBCC-NEW ' ,
2016-02-11 17:10:32 +00:00
} ,
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
2016-05-20 05:53:14 +00:00
return {
' _type ' : ' url_transparent ' ,
' ie_key ' : ' ThePlatform ' ,
' url ' : smuggle_url (
' http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/ %s ?mbr=true ' % video_id , {
' force_smil_url ' : True
} ) ,
' id ' : video_id ,
}