Enable artist playlists in YoutubePlaylistIE

Artist playlist pages have different format compared to user playlists,
thus more format checking is needed to construct the correct URL.

From the artist playlist this method downloads all listed below the
"Videos by [Artist Name]" header, plus usually there's one more
video on the side, titled "Youtube Mix for [Artist Name]", which
has a link format that currently cannot be distinguished from the other
videos in the list.
This commit is contained in:
Gergely Imreh 2011-01-31 18:54:47 +08:00
parent 16c73c2e51
commit f74e22ae28

View file

@ -2096,8 +2096,8 @@ class YahooSearchIE(InfoExtractor):
class YoutubePlaylistIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists.""" """Information Extractor for YouTube playlists."""
_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*' _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/)([^&]+).*'
_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
_youtube_ie = None _youtube_ie = None
@ -2125,13 +2125,19 @@ class YoutubePlaylistIE(InfoExtractor):
return return
# Download playlist pages # Download playlist pages
playlist_id = mobj.group(1) # prefix is 'p' as default for playlists but there are other types that need extra care
playlist_prefix = mobj.group(1)
if playlist_prefix == 'a':
playlist_access = 'artist'
else:
playlist_access = 'view_play_list'
playlist_id = mobj.group(2)
video_ids = [] video_ids = []
pagenum = 1 pagenum = 1
while True: while True:
self.report_download_page(playlist_id, pagenum) self.report_download_page(playlist_id, pagenum)
request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err: