[xminus] Simplify and extend (#4302)

2014-11-25 09:54:54 +01:00 · 2014-11-25 09:54:54 +01:00 · be64b5b098
parent c3e74731c2
commit be64b5b098
3 changed files with 95 additions and 20 deletions
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -47,6 +47,7 @@ from youtube_dl.utils import (
    js_to_json,
    intlist_to_bytes,
    args_to_str,
    parse_filesize,
 )
@ -367,5 +368,14 @@ class TestUtil(unittest.TestCase):
            'foo ba/r -baz \'2 be\' \'\''
        )
    def test_parse_filesize(self):
        self.assertEqual(parse_filesize(None), None)
        self.assertEqual(parse_filesize(''), None)
        self.assertEqual(parse_filesize('91 B'), 91)
        self.assertEqual(parse_filesize('foobar'), None)
        self.assertEqual(parse_filesize('2 MiB'), 2097152)
        self.assertEqual(parse_filesize('5 GB'), 5000000000)
        self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/xminus.py
+++ b/youtube_dl/extractor/xminus.py
@ -2,7 +2,14 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import (
    compat_chr,
    compat_ord,
 )
 from ..utils import (
    int_or_none,
    parse_filesize,
 )
 class XMinusIE(InfoExtractor):
@ -15,39 +22,46 @@ class XMinusIE(InfoExtractor):
            'ext': 'mp3',
            'title': 'Леонид Агутин-Песенка шофера',
            'duration': 156,
            'tbr': 320,
            'filesize_approx': 5900000,
            'view_count': int,
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        # TODO more code goes here, for example ...
        webpage = self._download_webpage(url, video_id)
        artist = self._html_search_regex(
-            r'minus_track.artist="(.+?)"', webpage, 'artist')
+            r'minus_track\.artist="(.+?)"', webpage, 'artist')
        title = artist + '-' + self._html_search_regex(
-            r'minus_track.title="(.+?)"', webpage, 'title')
+            r'minus_track\.title="(.+?)"', webpage, 'title')
        duration = int_or_none(self._html_search_regex(
-            r'minus_track.dur_sec=\'([0-9]+?)\'', webpage, 'duration'))
+            r'minus_track\.dur_sec=\'([0-9]*?)\'',
            webpage, 'duration', fatal=False))
        filesize_approx = parse_filesize(self._html_search_regex(
            r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
            webpage, 'approximate filesize', fatal=False))
        tbr = int_or_none(self._html_search_regex(
            r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
            webpage, 'bitrate', fatal=False))
        view_count = int_or_none(self._html_search_regex(
            r'<div class="quality.*?► ([0-9]+)',
            webpage, 'view count', fatal=False))
        enc_token = self._html_search_regex(
            r'data-mt="(.*?)"', webpage, 'enc_token')
-        token = self._decode_token(enc_token)
+        token = ''.join(
-        url = 'http://x-minus.org/dwlf/{}/{}.mp3'.format(video_id, token)
+            c if pos == 3 else compat_chr(compat_ord(c) - 1)
            for pos, c in enumerate(reversed(enc_token)))
        video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
        return {
            'id': video_id,
            'title': title,
-            'url': url,
+            'url': video_url,
            'duration': duration,
            'filesize_approx': filesize_approx,
            'tbr': tbr,
            'view_count': view_count,
        }
    def _decode_token(self, enc_token):
        token = ''
        pos = 0
        for c in reversed(enc_token):
            if pos != 3:
                token += chr(ord(c) - 1)
            else:
                token += c
            pos += 1
        return token
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1046,6 +1046,57 @@ def format_bytes(bytes):
    return '%.2f%s' % (converted, suffix)
 def parse_filesize(s):
    if s is None:
        return None
    # The lower-case forms are of course incorrect and inofficial,
    # but we support those too
    _UNIT_TABLE = {
        'B': 1,
        'b': 1,
        'KiB': 1024,
        'KB': 1000,
        'kB': 1024,
        'Kb': 1000,
        'MiB': 1024 ** 2,
        'MB': 1000 ** 2,
        'mB': 1024 ** 2,
        'Mb': 1000 ** 2,
        'GiB': 1024 ** 3,
        'GB': 1000 ** 3,
        'gB': 1024 ** 3,
        'Gb': 1000 ** 3,
        'TiB': 1024 ** 4,
        'TB': 1000 ** 4,
        'tB': 1024 ** 4,
        'Tb': 1000 ** 4,
        'PiB': 1024 ** 5,
        'PB': 1000 ** 5,
        'pB': 1024 ** 5,
        'Pb': 1000 ** 5,
        'EiB': 1024 ** 6,
        'EB': 1000 ** 6,
        'eB': 1024 ** 6,
        'Eb': 1000 ** 6,
        'ZiB': 1024 ** 7,
        'ZB': 1000 ** 7,
        'zB': 1024 ** 7,
        'Zb': 1000 ** 7,
        'YiB': 1024 ** 8,
        'YB': 1000 ** 8,
        'yB': 1024 ** 8,
        'Yb': 1000 ** 8,
    }
    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
    if not m:
        return None
    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
 def get_term_width():
    columns = compat_getenv('COLUMNS', None)
    if columns: