[piksel] Add new extractor(closes #11246)

This commit is contained in:
Remita Amine 2016-12-20 12:34:46 +01:00
parent d8c507c9e2
commit b1c357975d
3 changed files with 113 additions and 0 deletions

View file

@ -699,6 +699,7 @@ from .periscope import (
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .piksel import PikselIE
from .pinkbike import PinkbikeIE from .pinkbike import PinkbikeIE
from .pladform import PladformIE from .pladform import PladformIE
from .playfm import PlayFMIE from .playfm import PlayFMIE

View file

@ -75,6 +75,7 @@ from .facebook import FacebookIE
from .soundcloud import SoundcloudIE from .soundcloud import SoundcloudIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .dbtv import DBTVIE from .dbtv import DBTVIE
from .piksel import PikselIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2225,6 +2226,11 @@ class GenericIE(InfoExtractor):
if arkena_url: if arkena_url:
return self.url_result(arkena_url, ArkenaIE.ie_key()) return self.url_result(arkena_url, ArkenaIE.ie_key())
# Look for Piksel embeds
piksel_url = PikselIE._extract_url(webpage)
if piksel_url:
return self.url_result(piksel_url, PikselIE.ie_key())
# Look for Limelight embeds # Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj: if mobj:

View file

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
dict_get,
int_or_none,
unescapeHTML,
parse_iso8601,
)
class PikselIE(InfoExtractor):
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
_TEST = {
'url': 'http://player.piksel.com/v/nv60p12f',
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
'info_dict': {
'id': 'nv60p12f',
'ext': 'mp4',
'title': 'فن الحياة - الحلقة 1',
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
'timestamp': 1465231790,
'upload_date': '20160606',
}
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
app_token = self._search_regex(
r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
response = self._download_json(
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
video_id, query={
'v': video_id
})['response']
failure = response.get('failure')
if failure:
raise ExtractorError(response['failure']['reason'], expected=True)
video_data = response['WsProgramResponse']['program']['asset']
title = video_data['title']
formats = []
m3u8_url = dict_get(video_data, [
'm3u8iPadURL',
'ipadM3u8Url',
'm3u8AndroidURL',
'm3u8iPhoneURL',
'iphoneM3u8Url'])
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
for asset_file in video_data.get('assetFiles', []):
# TODO: extract rtmp formats
http_url = asset_file.get('http_url')
if not http_url:
continue
tbr = None
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
if asset_type == 'video':
tbr = vbr + abr
elif asset_type == 'audio':
tbr = abr
format_id = ['http']
if tbr:
format_id.append(compat_str(tbr))
formats.append({
'format_id': '-'.join(format_id),
'url': unescapeHTML(http_url),
'vbr': vbr,
'abr': abr,
'width': int_or_none(asset_file.get('videoWidth')),
'height': int_or_none(asset_file.get('videoHeight')),
'filesize': int_or_none(asset_file.get('filesize')),
'tbr': tbr,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnailUrl'),
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
}