Improve URL extraction

This commit is contained in:
Sergey M․ 2018-07-21 19:08:28 +07:00
parent 4ecf300d13
commit 3052a30d42
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
47 changed files with 166 additions and 139 deletions

View file

@ -3,11 +3,12 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
mimetype2ext,
determine_ext,
ExtractorError,
int_or_none,
mimetype2ext,
parse_iso8601,
url_or_none,
)
@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data.get('@attributes', {})
thumbnail_url = thumbnail.get('url')
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data.get('@attributes', {})
subtitle_href = subtitle.get('href')
subtitle_href = url_or_none(subtitle.get('href'))
if not subtitle_href:
continue
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
media_content = [media_content]
for media_data in media_content:
media = media_data.get('@attributes', {})
media_url = media.get('url')
media_url = url_or_none(media.get('url'))
if not media_url:
continue
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
else:
formats.append({
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
'url': media['url'],
'url': media_url,
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
'ext': ext,