Rename compat_urllib_request_Request to sanitized_Request and move to utils

This commit is contained in:
Sergey M․ 2015-11-20 20:33:49 +06:00
parent e4c4bcf36f
commit 67dda51722
4 changed files with 16 additions and 17 deletions

View File

@ -39,7 +39,6 @@ from .compat import (
compat_urllib_error, compat_urllib_error,
compat_urllib_request, compat_urllib_request,
compat_urllib_request_DataHandler, compat_urllib_request_DataHandler,
compat_urllib_request_Request,
) )
from .utils import ( from .utils import (
ContentTooShortError, ContentTooShortError,
@ -65,6 +64,7 @@ from .utils import (
SameFileError, SameFileError,
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitized_Request,
std_headers, std_headers,
subtitles_filename, subtitles_filename,
UnavailableVideoError, UnavailableVideoError,
@ -1874,7 +1874,7 @@ class YoutubeDL(object):
def urlopen(self, req): def urlopen(self, req):
""" Start an HTTP download """ """ Start an HTTP download """
if isinstance(req, compat_basestring): if isinstance(req, compat_basestring):
req = compat_urllib_request_Request(req) req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout) return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self): def print_debug_header(self):

View File

@ -198,14 +198,6 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
# unwanted failures due to missing protocol
def compat_urllib_request_Request(url, *args, **kwargs):
return compat_urllib_request.Request(
'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
try: try:
compat_basestring = basestring # Python 2 compat_basestring = basestring # Python 2
except NameError: except NameError:

View File

@ -8,7 +8,6 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_request_Request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@ -17,6 +16,7 @@ from ..utils import (
InAdvancePagedList, InAdvancePagedList,
int_or_none, int_or_none,
RegexNotFoundError, RegexNotFoundError,
sanitized_Request,
smuggle_url, smuggle_url,
std_headers, std_headers,
unified_strdate, unified_strdate,
@ -47,7 +47,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'service': 'vimeo', 'service': 'vimeo',
'token': token, 'token': token,
})) }))
login_request = compat_urllib_request_Request(self._LOGIN_URL, data) login_request = sanitized_Request(self._LOGIN_URL, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Referer', self._LOGIN_URL) login_request.add_header('Referer', self._LOGIN_URL)
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', vuid)
@ -222,7 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if url.startswith('http://'): if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url # vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://') url = url.replace('http://', 'https://')
password_request = compat_urllib_request_Request(url + '/password', data) password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url) password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', vuid)
@ -236,7 +236,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise ExtractorError('This video is protected by a password, use the --video-password option') raise ExtractorError('This video is protected by a password, use the --video-password option')
data = urlencode_postdata(encode_dict({'password': password})) data = urlencode_postdata(encode_dict({'password': password}))
pass_url = url + '/check-password' pass_url = url + '/check-password'
password_request = compat_urllib_request_Request(pass_url, data) password_request = sanitized_Request(pass_url, data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
return self._download_json( return self._download_json(
password_request, video_id, password_request, video_id,
@ -265,7 +265,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = compat_urllib_request_Request(url, None, headers) request = sanitized_Request(url, None, headers)
try: try:
webpage = self._download_webpage(request, video_id) webpage = self._download_webpage(request, video_id)
except ExtractorError as ee: except ExtractorError as ee:
@ -481,7 +481,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
password_path = self._search_regex( password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL') r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path) password_url = compat_urlparse.urljoin(page_url, password_path)
password_request = compat_urllib_request_Request(password_url, post) password_request = sanitized_Request(password_url, post)
password_request.add_header('Content-type', 'application/x-www-form-urlencoded') password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', vuid)
self._set_vimeo_cookie('xsrft', token) self._set_vimeo_cookie('xsrft', token)
@ -640,7 +640,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
def _page_url(self, base_url, pagenum): def _page_url(self, base_url, pagenum):
url = '%s/page:%d/' % (base_url, pagenum) url = '%s/page:%d/' % (base_url, pagenum)
request = compat_urllib_request_Request(url) request = sanitized_Request(url)
# Set the header to get a partial html page with the ids, # Set the header to get a partial html page with the ids,
# the normal page doesn't contain them. # the normal page doesn't contain them.
request.add_header('X-Requested-With', 'XMLHttpRequest') request.add_header('X-Requested-With', 'XMLHttpRequest')

View File

@ -373,6 +373,13 @@ def sanitize_path(s):
return os.path.join(*sanitized_path) return os.path.join(*sanitized_path)
# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
# unwanted failures due to missing protocol
def sanitized_Request(url, *args, **kwargs):
return compat_urllib_request.Request(
'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
def orderedSet(iterable): def orderedSet(iterable):
""" Remove all duplicates from the input iterable """ """ Remove all duplicates from the input iterable """
res = [] res = []