[odnoklassniki] add support for Schemeless embed extraction

This commit is contained in:
Remita Amine 2019-10-25 19:27:28 +01:00
parent 162bcc68dc
commit 416c3ca7f5
2 changed files with 13 additions and 3 deletions

View file

@ -118,6 +118,7 @@ from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE from .viqeo import ViqeoIE
from .expressen import ExpressenIE from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2627,9 +2628,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'VK') return self.url_result(mobj.group('url'), 'VK')
# Look for embedded Odnoklassniki player # Look for embedded Odnoklassniki player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
if mobj is not None: if odnoklassniki_url:
return self.url_result(mobj.group('url'), 'Odnoklassniki') return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
# Look for embedded ivi player # Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)

View file

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
@ -121,6 +123,13 @@ class OdnoklassnikiIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url): def _real_extract(self, url):
start_time = int_or_none(compat_parse_qs( start_time = int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])