[nhk:vod] Improve extraction (Closes #10424)

This commit is contained in:
Sergey M․ 2016-08-26 04:48:40 +07:00
parent 298a120ab7
commit f9b373afda
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 32 additions and 11 deletions

View File

@ -4,26 +4,47 @@ from .common import InfoExtractor
class NhkVodIE(InfoExtractor): class NhkVodIE(InfoExtractor):
_VALID_URL = r'http://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+)\.html' _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+?)\.html'
_TESTS = [{ _TEST = {
# Videos available only for a limited period of time. Visit
# http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html',
'info_dict': { 'info_dict': {
'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5',
'ext': 'flv', 'ext': 'flv',
'title': '[nhkworld]VOD;2009-251-2016;TOKYO FASHION EXPRESS;The Kimono as Global Fashion;en', 'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion',
'description': 'md5:db338ee6ce8204f415b754782f819824',
'series': 'TOKYO FASHION EXPRESS',
'episode': 'The Kimono as Global Fashion',
}, },
'params': { 'skip': 'Videos available only for a limited period of time',
'skip_download': True # Videos available only for a limited period of time. }
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
embed_code = self._search_regex( embed_code = self._search_regex(
r'''nw_vod_ooplayer\('movie-area', '([^']+)'\);''', r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, webpage, 'ooyala embed code', group='id')
'ooyala embed code')
return self.url_result('ooyala:' + embed_code, 'Ooyala') title = self._search_regex(
r'<div[^>]+class=["\']episode-detail["\']>\s*<h\d+>([^<]+)',
webpage, 'title', default=None)
description = self._html_search_regex(
r'(?s)<p[^>]+class=["\']description["\'][^>]*>(.+?)</p>',
webpage, 'description', default=None)
series = self._search_regex(
r'<h2[^>]+class=["\']detail-top-player-title[^>]+><a[^>]+>([^<]+)',
webpage, 'series', default=None)
return {
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % embed_code,
'title': '%s - %s' % (series, title) if series and title else title,
'description': description,
'series': series,
'episode': title,
}