[younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436)

This commit is contained in:
Sergey M․ 2017-10-29 04:16:07 +07:00
parent 47a8587915
commit eb4b5818e2
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 114 additions and 109 deletions

View file

@ -1336,7 +1336,7 @@ from .youku import (
YoukuShowIE, YoukuShowIE,
) )
from .younow import ( from .younow import (
YouNowIE, YouNowLiveIE,
YouNowChannelIE, YouNowChannelIE,
YouNowMomentIE, YouNowMomentIE,
) )

View file

@ -1,17 +1,22 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from datetime import date, datetime
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import int_or_none, UnsupportedError from ..utils import (
ExtractorError,
int_or_none,
try_get,
)
MOMENT_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/fetch/id=%s' CDN_API_BASE = 'https://cdn.younow.com/php/api'
STREAM_URL_FORMAT = 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8' MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
class YouNowIE(InfoExtractor): class YouNowLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'https://www.younow.com/AmandaPadeezy', 'url': 'https://www.younow.com/AmandaPadeezy',
'info_dict': { 'info_dict': {
@ -19,179 +24,179 @@ class YouNowIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'is_live': True, 'is_live': True,
'title': 'March 26, 2017', 'title': 'March 26, 2017',
'description': 'YouNow is the best way to broadcast live and get an audience to watch you.', 'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': 'https://ynassets.s3.amazonaws.com/broadcast/live/157869188/157869188.jpg',
'tags': ['girls'], 'tags': ['girls'],
'categories': ['girls'], 'categories': ['girls'],
'uploader': 'AmandaPadeezy', 'uploader': 'AmandaPadeezy',
'uploader_id': '6716501', 'uploader_id': '6716501',
'uploader_url': 'https://www.younow.com/AmandaPadeezy', 'uploader_url': 'https://www.younow.com/AmandaPadeezy',
'creator': 'AmandaPadeezy', 'creator': 'AmandaPadeezy',
'formats': [{ },
'url': 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=157869188/channelId=6716501', 'skip': True,
'ext': 'mp4',
'protocol': 'm3u8',
}],
}
} }
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
else super(YouNowLiveIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
username = self._match_id(url) username = self._match_id(url)
data = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username)
if data.get('media'): data = self._download_json(
stream_url = 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' % ( 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
data.get('broadcastId'), % username, username)
data.get('userId'),
)
else:
raise UnsupportedError('Unsupported stream or user is not streaming at this time')
webpage = self._download_webpage(url, username) if data.get('errorCode') != 0:
try: raise ExtractorError(data['errorMsg'], expected=True)
uploader = data['user']['profileUrlString']
except KeyError: uploader = try_get(
uploader = username data, lambda x: x['user']['profileUrlString'],
try: compat_str) or username
title = data['title']
except KeyError:
title = date.today().strftime('%B %d, %Y')
return { return {
'id': uploader, 'id': uploader,
'is_live': True, 'is_live': True,
'title': title, 'title': self._live_title(uploader),
'description': self._og_search_description(webpage),
'thumbnail': data.get('awsUrl'), 'thumbnail': data.get('awsUrl'),
'tags': data.get('tags'), 'tags': data.get('tags'),
'categories': data.get('tags'), 'categories': data.get('tags'),
'uploader': uploader, 'uploader': uploader,
'uploader_id': data.get('userId'), 'uploader_id': data.get('userId'),
'uploader_url': 'https://www.younow.com/%s' % (data['user']['profileUrlString'],), 'uploader_url': 'https://www.younow.com/%s' % username,
'creator': uploader, 'creator': uploader,
'view_count': int_or_none(data.get('viewers')), 'view_count': int_or_none(data.get('viewers')),
'like_count': int_or_none(data.get('likes')), 'like_count': int_or_none(data.get('likes')),
'formats': [{ 'formats': [{
'url': stream_url, 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
% (CDN_API_BASE, data['broadcastId'], data['userId']),
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8', 'protocol': 'm3u8',
}], }],
} }
def _moment_to_entry(item): def _extract_moment(item, fatal=True):
moment_id = item.get('momentId')
if not moment_id:
if not fatal:
return
raise ExtractorError('Unable to extract moment id')
moment_id = compat_str(moment_id)
title = item.get('text') title = item.get('text')
title_type = item.get('titleType')
if not title: if not title:
if title_type: title = 'YouNow %s' % (
title = 'YouNow %s' % item.get('titleType') item.get('momentType') or item.get('titleType') or 'moment')
else:
title = 'YouNow moment' uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
uploader_id = try_get(item, lambda x: x['owner']['userId'])
uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
entry = { entry = {
'id': compat_str(item['momentId']), 'extractor_key': 'YouNowMoment',
'id': moment_id,
'title': title, 'title': title,
'view_count': int_or_none(item.get('views')), 'view_count': int_or_none(item.get('views')),
'like_count': int_or_none(item.get('likes')), 'like_count': int_or_none(item.get('likes')),
'timestamp': int_or_none(item.get('created')), 'timestamp': int_or_none(item.get('created')),
'creator': uploader,
'uploader': uploader,
'uploader_id': uploader_id,
'uploader_url': uploader_url,
'formats': [{ 'formats': [{
'url': STREAM_URL_FORMAT % (item['momentId'], item['momentId']), 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
% (moment_id, moment_id),
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8', 'protocol': 'm3u8_native',
}], }],
} }
try:
entry['uploader'] = entry['creator'] = item['owner']['name']
entry['uploader_url'] = 'https://www.younow.com/%s' % (item['owner']['name'],)
entry['uploader_id'] = item['owner']['userId']
except KeyError:
pass
return entry return entry
class YouNowChannelIE(InfoExtractor): class YouNowChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel' _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
_TEST = { _TEST = {
'url': 'https://www.younow.com/Kate_Swiz/channel', 'url': 'https://www.younow.com/its_Kateee_/channel',
'info_dict': { 'info_dict': {
'title': 'Kate_Swiz moments' 'id': '14629760',
'title': 'its_Kateee_ moments'
}, },
'playlist_count': 6, 'playlist_mincount': 8,
} }
MOMENTS_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/profile/channelId=%s/createdBefore=%d/records=20' def _entries(self, username, channel_id):
created_before = 0
for page_num in itertools.count(1):
if created_before is None:
break
info = self._download_json(
'%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
% (CDN_API_BASE, channel_id, created_before), username,
note='Downloading moments page %d' % page_num)
items = info.get('items')
if not items or not isinstance(items, list):
break
for item in items:
if not isinstance(item, dict):
continue
item_type = item.get('type')
if item_type == 'moment':
entry = _extract_moment(item, fatal=False)
if entry:
yield entry
elif item_type == 'collection':
moments = item.get('momentsIds')
if isinstance(moments, list):
for moment_id in moments:
m = self._download_json(
MOMENT_URL_FORMAT % moment_id, username,
note='Downloading %s moment JSON' % moment_id,
fatal=False)
if m and isinstance(m, dict) and m.get('item'):
entry = _extract_moment(m['item'])
if entry:
yield entry
created_before = int_or_none(item.get('created'))
def _real_extract(self, url): def _real_extract(self, url):
entries = []
username = self._match_id(url) username = self._match_id(url)
user_info = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username, note='Downloading user information') channel_id = compat_str(self._download_json(
channel_id = user_info['userId'] 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
created_before = 0 % username, username, note='Downloading user information')['userId'])
moment_ids = [] return self.playlist_result(
moment_ids_processed = [] self._entries(username, channel_id), channel_id,
err = False '%s moments' % username)
while True:
if created_before:
cb = datetime.fromtimestamp(created_before)
else:
cb = datetime.now()
info = self._download_json(self.MOMENTS_URL_FORMAT % (channel_id, created_before), username, note='Downloading moments data (created before %s)' % (cb))
for item in info['items']:
if item['type'] == 'moment':
entry = _moment_to_entry(item)
moment_ids_processed.append(entry['id'])
entries.append(entry)
elif item['type'] == 'collection':
moment_ids += [compat_str(x) for x in item['momentsIds']]
try:
created_before = int_or_none(item['created'])
except KeyError:
err = True
break
if (err or
not info['hasMore'] or
'items' not in info or
not info['items']):
break
for mid in set(moment_ids):
if mid in moment_ids_processed:
continue
item = self._download_json(MOMENT_URL_FORMAT % (mid), mid)
entries.append(_moment_to_entry(item['item']))
return self.playlist_result(entries, playlist_title='%s moments' % (username))
class YouNowMomentIE(InfoExtractor): class YouNowMomentIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/]+)/[^/]+' _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m', 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
'info_dict': { 'info_dict': {
'id': '20712117', 'id': '20712117',
'ext': 'mp4', 'ext': 'mp4',
'title': 'YouNow capture', 'title': 'YouNow capture',
'view_count': 19, 'view_count': int,
'like_count': 0, 'like_count': int,
'timestamp': 1490432040, 'timestamp': 1490432040,
'formats': [{
'url': 'https://hls.younow.com/momentsplaylists/live/20712117/20712117.m3u8',
'ext': 'mp4',
'protocol': 'm3u8',
}],
'upload_date': '20170325', 'upload_date': '20170325',
'uploader': 'GABO...', 'uploader': 'GABO...',
'uploader_id': 35917228, 'uploader_id': 35917228,
}, },
} }
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url)
else super(YouNowMomentIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
mid = self._match_id(url) video_id = self._match_id(url)
item = self._download_json(MOMENT_URL_FORMAT % (mid), mid) item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
return _moment_to_entry(item['item']) return _extract_moment(item['item'])