[newgrounds] Fix metadata extraction (closes #15531)

This commit is contained in:
Sergey M․ 2018-02-09 21:17:02 +07:00
parent 5828489072
commit 9e167e1ee3
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 9 additions and 7 deletions

View File

@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor):
self._check_formats(formats, media_id)
self._sort_formats(formats)
uploader = self._search_regex(
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
uploader = self._html_search_regex(
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
fatal=False)
timestamp = unified_timestamp(self._search_regex(
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
timestamp = unified_timestamp(self._html_search_regex(
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
default=None))
duration = parse_duration(self._search_regex(
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
default=None))
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
'duration', default=None))
filesize_approx = parse_filesize(self._html_search_regex(
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
default=None))
if len(formats) == 1:
formats[0]['filesize_approx'] = filesize_approx