youtube-dl/youtube_dl/extractor/metacritic.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    fix_xml_ampersands,
)


class MetacriticIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)'

    _TESTS = [{
        'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
        'info_dict': {
            'id': '3698222',
            'ext': 'mp4',
            'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            'duration': 221,
        },
        'skip': 'Not providing trailers anymore',
    }, {
        'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
        'info_dict': {
            'id': '5740315',
            'ext': 'mp4',
            'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
            'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
            'duration': 114,
        },
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        # The xml is not well formatted, there are raw '&'
        info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
                                  video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)

        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
        formats = []
        for videoFile in clip.findall('httpURI/videoFile'):
            rate_str = videoFile.find('rate').text
            video_url = videoFile.find('filePath').text
            formats.append({
                'url': video_url,
                'ext': 'mp4',
                'format_id': rate_str,
                'tbr': int(rate_str),
            })
        self._sort_formats(formats)

        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
                                              webpage, 'description', flags=re.DOTALL)

        return {
            'id': video_id,
            'title': clip.find('title').text,
            'formats': formats,
            'description': description,
            'duration': int(clip.find('duration').text),
        }
[metacritic] Use centralized sorting and unicode_literals 2014-01-07 09:27:20 +00:00			`from __future__ import unicode_literals`

Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00			`import re`

			`from .common import InfoExtractor`
Use `_download_xml` in more extractors 2013-12-10 20:03:53 +00:00			`from ..utils import (`
Correct XML ampersand fixup 2014-01-20 21:11:34 +00:00			`fix_xml_ampersands,`
Use `_download_xml` in more extractors 2013-12-10 20:03:53 +00:00			`)`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00

			`class MetacriticIE(InfoExtractor):`
Improve some _VALID_URLs 2016-09-08 11:29:05 +00:00			`_VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)'`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00
[metacritic] Add a new valid test case 2016-04-14 07:12:59 +00:00			`_TESTS = [{`
[metacritic] Use centralized sorting and unicode_literals 2014-01-07 09:27:20 +00:00			`'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',`
			`'info_dict': {`
[metacritic] Modernize test 2014-03-29 13:46:05 +00:00			`'id': '3698222',`
			`'ext': 'mp4',`
[metacritic] Use centralized sorting and unicode_literals 2014-01-07 09:27:20 +00:00			`'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',`
			`'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',`
			`'duration': 221,`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00			`},`
[metacritic] Add a new valid test case 2016-04-14 07:12:59 +00:00			`'skip': 'Not providing trailers anymore',`
			`}, {`
			`'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',`
			`'info_dict': {`
			`'id': '5740315',`
			`'ext': 'mp4',`
			`'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',`
			`'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',`
			`'duration': 114,`
			`},`
			`}]`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`
			`webpage = self._download_webpage(url, video_id)`
			`# The xml is not well formatted, there are raw '&'`
Use `_download_xml` in more extractors 2013-12-10 20:03:53 +00:00			`info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00
			`clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)`
			`formats = []`
			`for videoFile in clip.findall('httpURI/videoFile'):`
			`rate_str = videoFile.find('rate').text`
			`video_url = videoFile.find('filePath').text`
			`formats.append({`
			`'url': video_url,`
			`'ext': 'mp4',`
			`'format_id': rate_str,`
[metacritic] Use centralized sorting and unicode_literals 2014-01-07 09:27:20 +00:00			`'tbr': int(rate_str),`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00			`})`
[metacritic] Use centralized sorting and unicode_literals 2014-01-07 09:27:20 +00:00			`self._sort_formats(formats)`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00
			`description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`webpage, 'description', flags=re.DOTALL)`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00
Remove the compatibility code used before the new format system was implemented 2013-12-03 13:21:06 +00:00			`return {`
Add an extractor for Metacritic 2013-09-06 16:08:07 +00:00			`'id': video_id,`
			`'title': clip.find('title').text,`
			`'formats': formats,`
			`'description': description,`
			`'duration': int(clip.find('duration').text),`
			`}`