youtube-dl/youtube_dl/extractor/srmediathek.py

60 lines
2.3 KiB
Python
Raw Normal View History

2016-10-02 11:39:18 +00:00
# coding: utf-8
2014-10-26 22:23:10 +00:00
from __future__ import unicode_literals
2015-12-25 16:37:50 +00:00
from .ard import ARDMediathekIE
from ..utils import (
ExtractorError,
get_element_by_attribute,
)
2014-10-26 22:23:10 +00:00
2015-12-25 16:37:50 +00:00
class SRMediathekIE(ARDMediathekIE):
2016-06-26 22:07:12 +00:00
IE_NAME = 'sr:mediathek'
2015-01-29 00:36:15 +00:00
IE_DESC = 'Saarländischer Rundfunk'
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
2014-10-26 22:23:10 +00:00
2015-12-25 16:37:50 +00:00
_TESTS = [{
2014-10-26 22:23:10 +00:00
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
'info_dict': {
'id': '28455',
'ext': 'mp4',
'title': 'sportarena (26.10.2014)',
'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
'thumbnail': 're:^https?://.*\.jpg$',
},
2015-12-25 16:37:50 +00:00
'skip': 'no longer available',
}, {
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682',
'info_dict': {
'id': '37682',
'ext': 'mp4',
'title': 'Love, Cakes and Rock\'n\'Roll',
'description': 'md5:18bf9763631c7d326c22603681e1123d',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
'only_matching': True,
2015-12-25 16:37:50 +00:00
}]
2014-10-26 22:23:10 +00:00
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
2015-12-25 16:37:50 +00:00
if '>Der gew&uuml;nschte Beitrag ist leider nicht mehr verf&uuml;gbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
2014-10-26 22:23:10 +00:00
2015-12-25 16:37:50 +00:00
media_collection_url = self._search_regex(
r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url')
info = self._extract_media_info(media_collection_url, webpage, video_id)
info.update({
2014-10-26 22:23:10 +00:00
'id': video_id,
2015-12-25 16:37:50 +00:00
'title': get_element_by_attribute('class', 'ardplayer-title', webpage),
2014-10-26 22:23:10 +00:00
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
2015-12-25 16:37:50 +00:00
})
return info