[thesixtyone] Add new extractor (closes #3781)

2014-10-04 22:40:36 +03:00 · 2014-10-04 22:40:36 +03:00 · 5e69192ef7
parent e9be9a6acd
commit 5e69192ef7
2 changed files with 101 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -371,6 +371,7 @@ from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .theplatform import ThePlatformIE
+from .thesixtyone import TheSixtyOneIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
--- a/youtube_dl/extractor/thesixtyone.py
+++ b/youtube_dl/extractor/thesixtyone.py
@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class TheSixtyOneIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/
+        (?:.*?/)*
+        (?:
+            s|
+            song/comments/list|
+            song
+        )/(?P<id>[A-Za-z0-9]+)/?$'''
+    _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
+    _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}.thesixtyone.com/thesixtyone_production/audio/{0:}_stream'
+    _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
+    _TESTS = [
+        {
+            'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/',
+            'md5': '821cc43b0530d3222e3e2b70bb4622ea',
+            'info_dict': {
+                'id': 'SrE3zD7s1jt',
+                'ext': 'mp3',
+                'title': 'CASIO - Unicorn War Mixtape',
+                'thumbnail': 're:^https?://.*_desktop$',
+                'upload_date': '20071217',
+                'duration': 3208,
+            }
+        },
+        {
+            'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
+            'only_matching': True,
+        },
+    ]
+
+    _DECODE_MAP = {
+        "x": "a",
+        "m": "b",
+        "w": "c",
+        "q": "d",
+        "n": "e",
+        "p": "f",
+        "a": "0",
+        "h": "1",
+        "e": "2",
+        "u": "3",
+        "s": "4",
+        "i": "5",
+        "o": "6",
+        "y": "7",
+        "r": "8",
+        "c": "9"
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        song_id = mobj.group('id')
+
+        webpage = self._download_webpage(
+            self._SONG_URL_TEMPLATE.format(song_id), song_id)
+
+        song_data = json.loads(self._search_regex(
+            r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
+        keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
+        url = self._SONG_FILE_URL_TEMPLATE.format(
+            "".join(reversed(keys)), **song_data)
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+            'ext': 'mp3',
+        }]
+
+        return {
+            'id': song_id,
+            'title': '{artist:} - {name:}'.format(**song_data),
+            'formats': formats,
+            'comment_count': song_data.get('comments_count'),
+            'duration': song_data.get('play_time'),
+            'like_count': song_data.get('score'),
+            'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data),
+            'upload_date': unified_strdate(song_data.get('publish_date')),
+        }