Merge branch 'subtitles-rework'

(Closes PR #4964)
This commit is contained in:
Jaime Marquínez Ferrándiz 2015-02-23 17:13:03 +01:00
commit bfc993cc91
29 changed files with 532 additions and 554 deletions

View file

@ -154,7 +154,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
@ -1008,6 +1008,15 @@ class YoutubeDL(object):
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
return
info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], info_dict.get('subtitles'),
info_dict.get('automatic_captions'))
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
@ -1136,6 +1145,55 @@ class YoutubeDL(object):
info_dict.update(formats_to_download[-1])
return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
"""Select the requested subtitles and their format"""
available_subs = {}
if normal_subtitles and self.params.get('writesubtitles'):
available_subs.update(normal_subtitles)
if automatic_captions and self.params.get('writeautomaticsub'):
for lang, cap_info in automatic_captions.items():
if lang not in available_subs:
available_subs[lang] = cap_info
if (not self.params.get('writesubtitles') and not
self.params.get('writeautomaticsub') or not
available_subs):
return None
if self.params.get('allsubtitles', False):
requested_langs = available_subs.keys()
else:
if self.params.get('subtitleslangs', False):
requested_langs = self.params.get('subtitleslangs')
elif 'en' in available_subs:
requested_langs = ['en']
else:
requested_langs = [list(available_subs.keys())[0]]
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
subs = {}
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
continue
for ext in formats_preference:
if ext == 'best':
f = formats[-1]
break
matches = list(filter(lambda f: f['ext'] == ext, formats))
if matches:
f = matches[-1]
break
else:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@ -1238,15 +1296,22 @@ class YoutubeDL(object):
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat', 'srt')
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
continue
subtitles = info_dict['requested_subtitles']
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
if sub_info.get('data') is not None:
sub_data = sub_info['data']
else:
try:
uf = self.urlopen(sub_info['url'])
sub_data = uf.read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@ -1254,7 +1319,7 @@ class YoutubeDL(object):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
@ -1564,6 +1629,17 @@ class YoutubeDL(object):
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
def list_subtitles(self, video_id, subtitles, name='subtitles'):
if not subtitles:
self.to_screen('%s has no %s' % (video_id, name))
return
self.to_screen(
'Available %s for %s:' % (name, video_id))
self.to_screen(render_table(
['Language', 'formats'],
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
for lang, formats in subtitles.items()]))
def urlopen(self, req):
""" Start an HTTP download """