diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 2a566eabe..8181bca09 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -5,8 +5,8 @@ import re import sys import time +from ..compat import compat_str from ..utils import ( - compat_str, encodeFilename, format_bytes, timeconvert, diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 7cd22c504..00f3a026c 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree from .common import FileDownloader from .http import HttpFD +from ..compat import ( + compat_urlparse, +) from ..utils import ( struct_pack, struct_unpack, - compat_urlparse, format_bytes, encodeFilename, sanitize_open, diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index ad26cfa40..5bb0f3cfd 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -6,9 +6,11 @@ import subprocess from ..postprocessor.ffmpeg import FFmpegPostProcessor from .common import FileDownloader -from ..utils import ( +from ..compat import ( compat_urlparse, compat_urllib_request, +) +from ..utils import ( check_executable, encodeFilename, ) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 224962e86..e68f20c9f 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -4,11 +4,12 @@ import os import time from .common import FileDownloader -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_error, +) +from ..utils import ( ContentTooShortError, - encodeFilename, sanitize_open, format_bytes, diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 58ae2005c..575912675 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -7,9 +7,9 @@ import sys import time from .common import FileDownloader +from ..compat import compat_str from ..utils import ( check_executable, - compat_str, encodeFilename, format_bytes, get_exe_version, diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 398e93bfb..623aeaf34 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -5,10 +5,9 @@ import re import json from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - compat_str, qualities, - determine_ext, ) @@ -75,9 +74,7 @@ class AllocineIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id), 'url': v, - 'ext': determine_ext(v), }) - self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 185ee3693..b51eafc45 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -68,4 +68,3 @@ class AolIE(InfoExtractor): 'title': title, 'entries': entries, } - diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 0c01fa1a1..7cd0482c7 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -4,8 +4,8 @@ import re import json from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( - compat_urlparse, int_or_none, ) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 1c765532a..014a21952 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - compat_urllib_parse, determine_ext, ExtractorError, ) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 1ca0b7cf2..98e1443ab 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -5,7 +5,7 @@ import json import itertools from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, ) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index acddbc8f1..9fb770cb1 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -4,9 +4,11 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urlparse, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index c1fc433f7..003e50002 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -1,8 +1,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - compat_urllib_parse, xpath_text, xpath_with_ns, int_or_none, diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 0d5889f5d..241b904a9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,8 +4,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_parse_qs from ..utils import ( - compat_parse_qs, ExtractorError, int_or_none, unified_strdate, @@ -29,10 +29,9 @@ class BiliBiliIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + video_code = self._search_regex( r'(?s)
(.*?)
', webpage, 'video code') diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index bf18a97e0..1eca00470 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -6,20 +6,21 @@ import json import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse, - find_xpath_attr, - fix_xml_ampersands, - compat_urlparse, - compat_str, - compat_urllib_request, +from ..compat import ( compat_parse_qs, + compat_str, + compat_urllib_parse, compat_urllib_parse_urlparse, - + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( determine_ext, ExtractorError, - unsmuggle_url, + find_xpath_attr, + fix_xml_ampersands, unescapeHTML, + unsmuggle_url, ) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 97feb6704..2f866f3ef 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_parse, compat_urllib_parse_urlparse, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 2e3ef3fda..48e2410b6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .mtv import MTVServicesInfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urllib_parse, +) +from ..utils import ( ExtractorError, float_or_none, unified_strdate, diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 7a7e79360..3db4db4e4 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -5,12 +5,14 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - orderedSet, compat_urllib_parse_urlparse, compat_urlparse, ) +from ..utils import ( + orderedSet, +) class CondeNastIE(InfoExtractor): diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d7e2b841e..8f1ea02e7 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -10,10 +10,12 @@ import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, bytes_to_intlist, intlist_to_bytes, unified_strdate, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 936c13cd6..cf5841a7c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -8,13 +8,15 @@ import itertools from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - compat_urllib_request, +from ..compat import ( compat_str, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, + int_or_none, orderedSet, str_to_int, - int_or_none, - ExtractorError, unescapeHTML, ) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 45d66e2e6..c6b813f58 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -5,7 +5,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py index b766e17f2..9cb1bf301 100644 --- a/youtube_dl/extractor/ehow.py +++ b/youtube_dl/extractor/ehow.py @@ -1,8 +1,6 @@ from __future__ import unicode_literals -import re - -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) from .common import InfoExtractor @@ -24,11 +22,10 @@ class EHowIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)', - webpage, 'video URL') + video_url = self._search_regex( + r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') final_url = compat_urllib_parse.unquote(video_url) uploader = self._html_search_meta('uploader', webpage) title = self._og_search_title(webpage).replace(' | eHow', '') diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index f4c1e2a72..a30a1f330 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -6,7 +6,7 @@ import random import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, ) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 476fc22b9..e240cb859 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py index d237a8281..d872d828f 100644 --- a/youtube_dl/extractor/everyonesmixtape.py +++ b/youtube_dl/extractor/everyonesmixtape.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index aacbf1414..36ba33128 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -3,16 +3,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, +) +from ..utils import ( str_to_int, ) class ExtremeTubeIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pextremetube\.com/.*?video/.+?(?P[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:www\.)?(?Pextremetube\.com/.*?video/.+?(?P[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', @@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 6f5d23559..81ceace53 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,19 +1,20 @@ #! -*- coding: utf-8 -*- from __future__ import unicode_literals -import re import hashlib from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_request, compat_urlparse, ) +from ..utils import ( + ExtractorError, +) class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/((?P[^/]+)/)?content/(?P[^/]+)' + _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P[^/]+)' IE_NAME = 'fc2' _TEST = { 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', @@ -26,9 +27,7 @@ class FC2IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) self._downloader.cookiejar.clear_session_cookies() # must clear diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index af439ccfe..3191116d9 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class FiredriveIE(InfoExtractor): @@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://firedrive.com/file/%s' % video_id - webpage = self._download_webpage(url, video_id) if re.search(self._FILE_DELETED_REGEX, webpage) is not None: diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index b22ce2acb..7187e0752 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, - unified_strdate, - str_to_int, - parse_duration, +) +from ..utils import ( clean_html, + parse_duration, + str_to_int, + unified_strdate, ) @@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') + video_id = self._match_id(url) webpage_url = 'http://www.4tube.com/videos/' + video_id webpage = self._download_webpage(webpage_url, video_id) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 898e0dda7..0c2972162 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -5,7 +5,7 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_urlparse, ) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index e0420a48f..bbc760a49 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -6,13 +6,15 @@ import re import json from .common import InfoExtractor -from ..utils import ( - compat_urlparse, - ExtractorError, - clean_html, - parse_duration, +from ..compat import ( compat_urllib_parse_urlparse, + compat_urlparse, +) +from ..utils import ( + clean_html, + ExtractorError, int_or_none, + parse_duration, ) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index d570e3f6a..47373e215 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -4,9 +4,11 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urlparse, +) +from ..utils import ( unescapeHTML, ) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index de14ae1fb..d453ec010 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index 53714f47f..2bfb99040 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -2,8 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( determine_ext, ) diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 469e1f935..498304cb2 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -4,7 +4,7 @@ import itertools import re from .common import SearchInfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 1ac1da856..ae24aff84 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -4,11 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, int_or_none, ) @@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor): formats = [{ 'format_id': 'sd', 'url': video_url, - 'ext': determine_ext(video_url), 'quality': 1, }] diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py index 8e812b669..704d0285d 100644 --- a/youtube_dl/extractor/hostingbulk.py +++ b/youtube_dl/extractor/hostingbulk.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, int_or_none, urlencode_postdata, ) @@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://hostingbulk.com/{0:}.html'.format(video_id) # Custom request with cookie to set language to English, so our file diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index 6d0d847c6..aa0724a02 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -1,20 +1,20 @@ from __future__ import unicode_literals import json -import re import time from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, ) class HypemIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P[^/]+)/' _TEST = { 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', 'md5': 'b9cc91b5af8995e9f0c1cee04c575828', @@ -27,8 +27,7 @@ class HypemIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - track_id = mobj.group(1) + track_id = self._match_id(url) data = {'ax': 1, 'ts': time.time()} data_encoded = compat_urllib_parse.urlencode(data) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f2c1c10f5..13a53a0cb 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -4,7 +4,7 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, ) diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index e76dd222d..f25f43664 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals import base64 -import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) @@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex(r'(.*?)', webpage, 'title') diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 1e4799187..c813d4b82 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, compat_urllib_parse, +) +from ..utils import ( xpath_with_ns, ) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 4247d6391..8529bedfc 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -6,8 +6,10 @@ from random import random from math import floor from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index f0fba1adb..7a400323d 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -5,8 +5,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 75b63cffb..97dcb518a 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -4,7 +4,7 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, @@ -15,7 +15,7 @@ from ..aes import ( class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P[0-9]+)(?:[/?&]|$)' + _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P[0-9]+)(?:[/?&]|$)' _TEST = { 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'file': '1214711.mp4', @@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = self._match_id(url) req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 03c4691c6..5247c6f58 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -4,10 +4,12 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urllib_parse_urlparse, compat_urlparse, +) +from ..utils import ( ExtractorError, find_xpath_attr, int_or_none, diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 2160d6cb0..26e84970d 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -5,12 +5,14 @@ import json from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_str, compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, int_or_none, - compat_str, ) diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py index 1abf6e4f8..0b85a59d1 100644 --- a/youtube_dl/extractor/malemotion.py +++ b/youtube_dl/extractor/malemotion.py @@ -1,43 +1,33 @@ +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) class MalemotionIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P.+?)(#|$)' + _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P.+?)(#|$)' _TEST = { - 'url': 'http://malemotion.com/video/bien-dur.10ew', - 'file': '10ew.mp4', - 'md5': 'b3cc49f953b107e4a363cdff07d100ce', + 'url': 'http://malemotion.com/video/bete-de-concours.ltc', + 'md5': '3013e53a0afbde2878bc39998c33e8a5', 'info_dict': { - "title": "Bien dur", - "age_limit": 18, + 'id': 'ltc', + 'ext': 'mp4', + 'title': 'BĂȘte de Concours', + 'age_limit': 18, }, - 'skip': 'This video has been deleted.' } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group("id") - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - - # Extract video URL - video_url = compat_urllib_parse.unquote( - self._search_regex(r'(.*?)[^/]+)/' + _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P[^/]+)/' _TEST = { 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', @@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - episode = mobj.group('episode') + episode = self._match_id(url) webpage = self._download_webpage(url, episode) embed_data_json = self._search_regex( - r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', - flags=re.DOTALL + r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', ).replace('\'', '"') embed_data = json.loads(embed_data_json) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 55cc33a3e..07d194562 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( ExtractorError, HEADRequest, int_or_none, diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 2ff79b9b8..184f9c2c9 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,10 +5,12 @@ import json import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, int_or_none, ) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index d658647e6..2cec12d35 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -4,7 +4,7 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, @@ -12,7 +12,7 @@ from ..utils import ( class MofosexIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' + _VALID_URL = r'https?://(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' _TEST = { 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', 'md5': '1b2eb47ac33cc75d4a80e3026b613c5a', @@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 1c4f589cc..5de719bdc 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,7 +5,7 @@ import os.path import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) @@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 34a4bec3a..9f2853fa3 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -4,11 +4,13 @@ import re import time from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_request, compat_urllib_parse, ) +from ..utils import ( + ExtractorError, +) class MooshareIE(InfoExtractor): @@ -43,9 +45,7 @@ class MooshareIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) page = self._download_webpage(url, video_id, 'Downloading page') if re.search(r'>Video Not Found or Deleted<', page) is not None: diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 7c0ec6a12..f5ca74e97 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -3,13 +3,14 @@ from __future__ import unicode_literals import hashlib import json -import re import time from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_str, +) +from ..utils import ( int_or_none, ) @@ -32,10 +33,9 @@ class MotorsportIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') - + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + flashvars_code = self._html_search_regex( r'[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' _TEST = { 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', @@ -32,10 +30,10 @@ class NFBIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') + video_id = self._match_id(url) + page = self._download_webpage( + 'https://www.nfb.ca/film/%s' % video_id, video_id, + 'Downloading film page') uploader_id = self._html_search_regex(r'[0-9]+)' + _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P[0-9]+)' _TEST = { 'url': 'http://m.nuvid.com/video/1310741/', 'md5': 'eab207b7ac4fccfb4e23c86201f11277', @@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) formats = [] diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index b4389e0b6..c66db3cdc 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -4,16 +4,17 @@ import json import re from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse class PhotobucketIE(InfoExtractor): _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P.*)\.(?P(flv)|(mp4))' _TEST = { 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', - 'file': 'zpsc0c3b9fa.mp4', 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', 'info_dict': { + 'id': 'zpsc0c3b9fa', + 'ext': 'mp4', 'timestamp': 1367669341, 'upload_date': '20130504', 'uploader': 'rachaneronas', diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 17880471d..449d4836c 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,11 +5,13 @@ import re import os.path from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class PlayedIE(InfoExtractor): @@ -28,7 +30,6 @@ class PlayedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - orig_webpage = self._download_webpage(url, video_id) m_error = re.search( diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index ebc046804..9576aed0e 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, float_or_none, int_or_none, diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index cd3905acb..c3e667e9e 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -3,31 +3,31 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - clean_html, +from ..compat import ( compat_urllib_parse, ) +from ..utils import ( + clean_html, + ExtractorError, +) class PlayvidIE(InfoExtractor): - _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' + _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' _TEST = { - 'url': 'http://www.playvid.com/watch/agbDDi7WZTV', - 'md5': '44930f8afa616efdf9482daf4fe53e1e', + 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', + 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', 'info_dict': { - 'id': 'agbDDi7WZTV', + 'id': 'RnmBNgtrrJu', 'ext': 'mp4', - 'title': 'Michelle Lewin in Miami Beach', - 'duration': 240, + 'title': 'md5:9256d01c6317e3f703848b5906880dc8', + 'duration': 82, 'age_limit': 18, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) m_error = re.search( diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 2ca15b717..634142d0d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -4,10 +4,12 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_urllib_parse, compat_urllib_parse_urlparse, compat_urllib_request, - compat_urllib_parse, +) +from ..utils import ( str_to_int, ) from ..aes import ( @@ -16,7 +18,7 @@ from ..aes import ( class PornHubIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P[0-9a-f]+)' + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P[0-9a-f]+)' _TEST = { 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': '882f488fa1f0026f023f33576004a2ed', diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 7fcde086c..f536e6e6c 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + determine_ext, + ExtractorError, +) class PromptFileIE(InfoExtractor): diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 1262793c8..385681d06 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,8 +5,10 @@ import re from hashlib import sha1 from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( unified_strdate, ) diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py index 3bc78060d..af7d76cf4 100644 --- a/youtube_dl/extractor/quickvid.py +++ b/youtube_dl/extractor/quickvid.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( determine_ext, int_or_none, ) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 2d39ecfe4..aa26b7e0b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -3,10 +3,12 @@ from __future__ import unicode_literals import re from .subtitles import SubtitlesInfoExtractor +from ..compat import ( + compat_urllib_parse, +) from ..utils import ( parse_duration, unified_strdate, - compat_urllib_parse, ) diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index dc59a5e5c..5e84c1098 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_str, +) from ..utils import ( int_or_none, parse_duration, parse_iso8601, unescapeHTML, - compat_str, ) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 6941d96fb..b72b5a586 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -5,10 +5,12 @@ import re import itertools from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, - unified_strdate, +) +from ..utils import ( ExtractorError, + unified_strdate, ) @@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) video = self._download_json( 'http://rutube.ru/api/video/%s/?format=json' % video_id, video_id, 'Downloading video JSON') @@ -114,8 +114,7 @@ class RutubeMovieIE(RutubeChannelIE): _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - movie_id = mobj.group('id') + movie_id = self._match_id(url) movie = self._download_json( self._MOVIE_TEMPLATE % movie_id, movie_id, 'Downloading movie JSON') diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index c145f6fc7..dfd897ba3 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_parse_qs, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class ScreencastIE(InfoExtractor): @@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex( diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index fdc31603a..26ced716e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -4,10 +4,12 @@ import re import base64 from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, - compat_urllib_parse, int_or_none, ) @@ -26,26 +28,30 @@ class SharedIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - page = self._download_webpage(url, video_id) + if '>File does not exist<' in webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) - if re.search(r'>File does not exist<', page) is not None: - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - - download_form = dict(re.findall(r'[^/]+)' + _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P[^/]+)' _TEST = { 'url': 'http://vbox7.com/play:249bb972c2', 'md5': '99f65c0c9ef9b682b97313e052734c3f', @@ -25,8 +24,7 @@ class Vbox7IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) redirect_page, urlh = self._download_webpage_handle(url, video_id) new_location = self._search_regex(r'window\.location = \'(.*)\';', diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 94647d1c8..815f58468 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -4,10 +4,12 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, - get_element_by_id, +) +from ..utils import ( clean_html, + get_element_by_id, ) @@ -26,8 +28,7 @@ class VeeHDIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) # VeeHD seems to send garbage on the first request. # See https://github.com/rg3/youtube-dl/issues/2102 diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index a7953a7e7..01e258e32 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -4,8 +4,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( int_or_none, ExtractorError, ) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c912c3cbe..43f6b029d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,8 +4,10 @@ import re import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index ac6c25537..0ffc7ff7d 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,10 +1,8 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import compat_urlparse from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import compat_urlparse class VideoDetectiveIE(InfoExtractor): @@ -17,13 +15,12 @@ class VideoDetectiveIE(InfoExtractor): 'ext': 'mp4', 'title': 'KICK-ASS 2', 'description': 'md5:65ba37ad619165afac7d432eaded6013', - 'duration': 135, + 'duration': 138, }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage) query = compat_urlparse.urlparse(og_video).query diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 29c4e0101..7a78f0d26 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( remove_start, ) @@ -27,9 +27,7 @@ class VideoMegaIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id) webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ca6b0d5b3..542e9198a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -5,14 +5,17 @@ import re import json from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse, + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, - compat_urllib_parse, - compat_str, + orderedSet, unescapeHTML, unified_strdate, - orderedSet) +) class VKIE(InfoExtractor): diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index affef6507..1c0966a79 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,8 +2,9 @@ from __future__ import unicode_literals import re + from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) @@ -24,8 +25,7 @@ class VodlockerIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) fields = dict(re.findall(r'''(?x)]*>(.*?)', webpage, 'title') diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 53ed7ef5a..79ed6c744 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -1,10 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) @@ -23,10 +21,7 @@ class XNXXIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # Get webpage content + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._search_regex(r'flv_url=(.*?)&', diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 38448e7c0..f9d98b83f 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -4,15 +4,17 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( parse_duration, str_to_int, ) class XTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pxtube\.com/watch\.php\?v=(?P[^/?&]+))' + _VALID_URL = r'https?://(?:www\.)?(?Pxtube\.com/watch\.php\?v=(?P[^/?&]+))' _TEST = { 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', @@ -29,7 +31,7 @@ class XTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 7e0044824..2a45dc574 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -3,15 +3,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - ExtractorError, +) +from ..utils import ( clean_html, + ExtractorError, ) class XVideosIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' + _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P[0-9]+)(?:.*)' _TEST = { 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 'md5': '4b46ae6ea5e6e9086e714d883313c0c9', @@ -24,37 +26,25 @@ class XVideosIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - mobj = re.search(r'

(.+?)

', webpage) if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - # Extract video URL video_url = compat_urllib_parse.unquote( self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) - - # Extract title video_title = self._html_search_regex( r'(.*?)\s+-\s+XVID', webpage, 'title') - - # Extract video thumbnail video_thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) return { 'id': video_id, 'url': video_url, - 'uploader': None, - 'upload_date': None, 'title': video_title, 'ext': 'flv', 'thumbnail': video_thumbnail, - 'description': None, 'age_limit': 18, } diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 0fdb12243..031226f27 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -6,11 +6,13 @@ import json import re from .common import InfoExtractor, SearchInfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urlparse, +) +from ..utils import ( clean_html, + ExtractorError, int_or_none, ) diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 7b621a9e3..894678a23 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -5,7 +5,7 @@ import re import json from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse class YnetIE(InfoExtractor): diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d9c06a2ee..107c9ac36 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -6,10 +6,11 @@ import re import sys from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, unescapeHTML, unified_strdate, diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 2bd264b30..e60505ace 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,8 +4,8 @@ import collections import io import zlib +from .compat import compat_str from .utils import ( - compat_str, ExtractorError, struct_unpack, )