[youtube] improve m3u8 format extraction
This commit is contained in:
		
							parent
							
								
									e109f1ff43
								
							
						
					
					
						commit
						89beedd31f
					
				
					 1 changed files with 15 additions and 32 deletions
				
			
		|  | @ -1253,21 +1253,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
|         video_id = mobj.group(2) | ||||
|         return video_id | ||||
| 
 | ||||
|     def _extract_from_m3u8(self, manifest_url, video_id): | ||||
|         url_map = {} | ||||
| 
 | ||||
|         def _get_urls(_manifest): | ||||
|             lines = _manifest.split('\n') | ||||
|             urls = filter(lambda l: l and not l.startswith('#'), | ||||
|                           lines) | ||||
|             return urls | ||||
|         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest') | ||||
|         formats_urls = _get_urls(manifest) | ||||
|         for format_url in formats_urls: | ||||
|             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag') | ||||
|             url_map[itag] = format_url | ||||
|         return url_map | ||||
| 
 | ||||
|     def _extract_annotations(self, video_id): | ||||
|         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id | ||||
|         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') | ||||
|  | @ -1573,19 +1558,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
|         if self._downloader.params.get('writeannotations', False): | ||||
|             video_annotations = self._extract_annotations(video_id) | ||||
| 
 | ||||
|         def _map_to_format_list(urlmap): | ||||
|             formats = [] | ||||
|             for itag, video_real_url in urlmap.items(): | ||||
|                 dct = { | ||||
|                     'format_id': itag, | ||||
|                     'url': video_real_url, | ||||
|                     'player_url': player_url, | ||||
|                 } | ||||
|                 if itag in self._formats: | ||||
|                     dct.update(self._formats[itag]) | ||||
|                 formats.append(dct) | ||||
|             return formats | ||||
| 
 | ||||
|         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): | ||||
|             self.report_rtmp_download() | ||||
|             formats = [{ | ||||
|  | @ -1718,11 +1690,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
|                 formats.append(dct) | ||||
|         elif video_info.get('hlsvp'): | ||||
|             manifest_url = video_info['hlsvp'][0] | ||||
|             url_map = self._extract_from_m3u8(manifest_url, video_id) | ||||
|             formats = _map_to_format_list(url_map) | ||||
|             # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming | ||||
|             for a_format in formats: | ||||
|             formats = [] | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 manifest_url, video_id, 'mp4', fatal=False) | ||||
|             for a_format in m3u8_formats: | ||||
|                 itag = self._search_regex( | ||||
|                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None) | ||||
|                 if itag: | ||||
|                     a_format['format_id'] = itag | ||||
|                     if itag in self._formats: | ||||
|                         dct = self._formats[itag].copy() | ||||
|                         dct.update(a_format) | ||||
|                         a_format = dct | ||||
|                 a_format['player_url'] = player_url | ||||
|                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming | ||||
|                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' | ||||
|                 formats.append(a_format) | ||||
|         else: | ||||
|             unavailable_message = self._html_search_regex( | ||||
|                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>', | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Remita Amine
						Remita Amine