[vier] Add support for vijf.be

vier.be and vijf.be run on the same CMS and are property of the same company,
so the same extractor can be used for both of them.
This commit is contained in:
Lars Vierbergen 2017-03-04 17:47:19 +01:00 committed by Sergey M
parent 054a587de8
commit a3ba8a7acf
1 changed files with 27 additions and 6 deletions

View File

@ -9,7 +9,7 @@ from .common import InfoExtractor
class VierIE(InfoExtractor): class VierIE(InfoExtractor):
IE_NAME = 'vier' IE_NAME = 'vier'
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
'info_dict': { 'info_dict': {
@ -23,6 +23,19 @@ class VierIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
'info_dict': {
'id': '2561614',
'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
'ext': 'mp4',
'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s',
'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
'only_matching': True, 'only_matching': True,
@ -35,6 +48,7 @@ class VierIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
embed_id = mobj.group('embed_id') embed_id = mobj.group('embed_id')
display_id = mobj.group('display_id') or embed_id display_id = mobj.group('display_id') or embed_id
site = mobj.group('site')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@ -43,7 +57,7 @@ class VierIE(InfoExtractor):
webpage, 'video id') webpage, 'video id')
application = self._search_regex( application = self._search_regex(
[r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
webpage, 'application', default='vier_vod') webpage, 'application', default=site + '_vod')
filename = self._search_regex( filename = self._search_regex(
[r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
webpage, 'filename') webpage, 'filename')
@ -68,13 +82,19 @@ class VierIE(InfoExtractor):
class VierVideosIE(InfoExtractor): class VierVideosIE(InfoExtractor):
IE_NAME = 'vier:videos' IE_NAME = 'vier:videos'
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vier.be/demoestuin/videos', 'url': 'http://www.vier.be/demoestuin/videos',
'info_dict': { 'info_dict': {
'id': 'demoestuin', 'id': 'demoestuin',
}, },
'playlist_mincount': 153, 'playlist_mincount': 153,
}, {
'url': 'http://www.vijf.be/temptationisland/videos',
'info_dict': {
'id': 'temptationisland',
},
'playlist_mincount': 159,
}, { }, {
'url': 'http://www.vier.be/demoestuin/videos?page=6', 'url': 'http://www.vier.be/demoestuin/videos?page=6',
'info_dict': { 'info_dict': {
@ -92,6 +112,7 @@ class VierVideosIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
program = mobj.group('program') program = mobj.group('program')
site = mobj.group('site')
page_id = mobj.group('page') page_id = mobj.group('page')
if page_id: if page_id:
@ -105,13 +126,13 @@ class VierVideosIE(InfoExtractor):
entries = [] entries = []
for current_page_id in itertools.count(start_page): for current_page_id in itertools.count(start_page):
current_page = self._download_webpage( current_page = self._download_webpage(
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id),
program, program,
'Downloading page %d' % (current_page_id + 1)) 'Downloading page %d' % (current_page_id + 1))
page_entries = [ page_entries = [
self.url_result('http://www.vier.be' + video_url, 'Vier') self.url_result('http://www.' + site + '.be' + video_url, 'Vier')
for video_url in re.findall( for video_url in re.findall(
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries) entries.extend(page_entries)
if page_id or '>Meer<' not in current_page: if page_id or '>Meer<' not in current_page:
break break