[extractor/generic] Improve support for pornhub embeds (closes #11100)

2016-11-06 21:52:00 +07:00 · 2016-11-06 21:52:00 +07:00 · b52c9ef165
parent e28ed498e6
commit b52c9ef165
2 changed files with 11 additions and 12 deletions
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -1983,11 +1983,6 @@ class GenericIE(InfoExtractor):
        if sportbox_urls:
            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
        # Look for embedded PornHub player
        pornhub_url = PornHubIE._extract_url(webpage)
        if pornhub_url:
            return self.url_result(pornhub_url, 'PornHub')
        # Look for embedded XHamster player
        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
        if xhamster_urls:
@ -1998,6 +1993,11 @@ class GenericIE(InfoExtractor):
        if tnaflix_urls:
            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
        # Look for embedded PornHub player
        pornhub_urls = PornHubIE._extract_urls(webpage)
        if pornhub_urls:
            return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
        # Look for embedded DrTuber player
        drtuber_urls = DrTuberIE._extract_urls(webpage)
        if drtuber_urls:
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
                            (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
                            (?:www\.)?thumbzilla\.com/video/
                        )
-                        (?P<id>[0-9a-z]+)
+                        (?P<id>[\da-z]+)
                    '''
    _TESTS = [{
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
        'only_matching': True,
    }]
-    @classmethod
+    @staticmethod
-    def _extract_url(cls, webpage):
+    def _extract_urls(webpage):
-        mobj = re.search(
+        return re.findall(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
-        if mobj:
+            webpage)
            return mobj.group('url')
    def _extract_count(self, pattern, webpage, name):
        return str_to_int(self._search_regex(