[thisoldhouse] Add new extractor(closes #10837)

This commit is contained in:
Remita Amine 2016-10-03 15:27:09 +01:00
parent ee5de4e38e
commit c1084ddb0c
2 changed files with 33 additions and 0 deletions

View File

@ -892,6 +892,7 @@ from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE
from .tinypic import TinyPicIE
from .tlc import TlcDeIE

View File

@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class ThisOldHouseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
'md5': '568acf9ca25a639f0c4ff905826b662f',
'info_dict': {
'id': '2REGtUDQ',
'ext': 'mp4',
'title': 'How to Build a Storage Bench',
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
'timestamp': 1442548800,
'upload_date': '20150918',
}
}, {
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings'), display_id)
video_id = drupal_settings['jwplatform']['video_id']
return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)