Merge remote-tracking branch 'upstream/master'
This commit is contained in:
		
						commit
						85f03346eb
					
				
					 12 changed files with 101 additions and 28 deletions
				
			
		|  | @ -59,6 +59,7 @@ from .myvideo import MyVideoIE | |||
| from .nba import NBAIE | ||||
| from .nbc import NBCNewsIE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .pornotube import PornotubeIE | ||||
|  |  | |||
|  | @ -150,7 +150,7 @@ class InfoExtractor(object): | |||
|         if m: | ||||
|             encoding = m.group(1) | ||||
|         else: | ||||
|             m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]', | ||||
|             m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]', | ||||
|                           webpage_bytes[:1024]) | ||||
|             if m: | ||||
|                 encoding = m.group(1).decode('ascii') | ||||
|  |  | |||
|  | @ -13,7 +13,7 @@ class IGNIE(InfoExtractor): | |||
|     Some videos of it.ign.com are also supported | ||||
|     """ | ||||
| 
 | ||||
|     _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)' | ||||
|     _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)' | ||||
|     IE_NAME = u'ign.com' | ||||
| 
 | ||||
|     _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' | ||||
|  | @ -41,7 +41,11 @@ class IGNIE(InfoExtractor): | |||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name_or_id = mobj.group('name_or_id') | ||||
|         page_type = mobj.group('type') | ||||
|         webpage = self._download_webpage(url, name_or_id) | ||||
|         if page_type == 'articles': | ||||
|             video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url') | ||||
|             return self.url_result(video_url, ie='IGN') | ||||
|         video_id = self._find_video_id(webpage) | ||||
|         result = self._get_video_info(video_id) | ||||
|         description = self._html_search_regex(self._DESCRIPTION_RE, | ||||
|  | @ -68,7 +72,7 @@ class IGNIE(InfoExtractor): | |||
| class OneUPIE(IGNIE): | ||||
|     """Extractor for 1up.com, it uses the ign videos system.""" | ||||
| 
 | ||||
|     _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)' | ||||
|     _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)' | ||||
|     IE_NAME = '1up.com' | ||||
| 
 | ||||
|     _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' | ||||
|  |  | |||
|  | @ -25,23 +25,21 @@ class TechTVMITIE(InfoExtractor): | |||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage( | ||||
|         raw_page = self._download_webpage( | ||||
|             'http://techtv.mit.edu/videos/%s' % video_id, video_id) | ||||
|         embed_page = self._download_webpage( | ||||
|             'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, | ||||
|             note=u'Downloading embed page') | ||||
|         clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page) | ||||
| 
 | ||||
|         base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', | ||||
|             embed_page, u'base url') | ||||
|         formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, | ||||
|             raw_page, u'base url') | ||||
|         formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, | ||||
|             u'video formats') | ||||
|         formats = json.loads(formats_json) | ||||
|         formats = sorted(formats, key=lambda f: f['bitrate']) | ||||
| 
 | ||||
|         title = get_element_by_id('edit-title', webpage) | ||||
|         description = clean_html(get_element_by_id('edit-description', webpage)) | ||||
|         title = get_element_by_id('edit-title', clean_page) | ||||
|         description = clean_html(get_element_by_id('edit-description', clean_page)) | ||||
|         thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', | ||||
|             embed_page, u'thumbnail', flags=re.DOTALL) | ||||
|             raw_page, u'thumbnail', flags=re.DOTALL) | ||||
| 
 | ||||
|         return {'id': video_id, | ||||
|                 'title': title, | ||||
|  |  | |||
							
								
								
									
										67
									
								
								youtube_dl/extractor/orf.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/orf.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,67 @@ | |||
| # coding: utf-8 | ||||
| 
 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import json | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
| 
 | ||||
| class ORFIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' | ||||
| 
 | ||||
|     _TEST = { | ||||
|         u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter', | ||||
|         u'file': u'6566957.flv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Wetter', | ||||
|             u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at', | ||||
|         }, | ||||
|         u'params': { | ||||
|             # It uses rtmp | ||||
|             u'skip_download': True, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
| 
 | ||||
|         flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') | ||||
|         flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] | ||||
|         flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) | ||||
|         playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') | ||||
|         playlist = json.loads(playlist_json) | ||||
| 
 | ||||
|         videos = [] | ||||
|         ns = '{http://tempuri.org/XMLSchema.xsd}' | ||||
|         xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} | ||||
|         webpage_description = self._og_search_description(webpage) | ||||
|         for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): | ||||
|             # Get best quality url | ||||
|             rtmp_url = None | ||||
|             for q in ['Q6A', 'Q4A', 'Q1A']: | ||||
|                 video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) | ||||
|                 if video_url is not None: | ||||
|                     rtmp_url = video_url.text | ||||
|                     break | ||||
|             if rtmp_url is None: | ||||
|                 raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) | ||||
|             description = self._html_search_regex( | ||||
|                 r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage, | ||||
|                 u'description', default=webpage_description, flags=re.DOTALL) | ||||
|             videos.append({ | ||||
|                 '_type': 'video', | ||||
|                 'id': info['id'], | ||||
|                 'title': info['title'], | ||||
|                 'url': rtmp_url, | ||||
|                 'ext': 'flv', | ||||
|                 'description': description, | ||||
|                 }) | ||||
| 
 | ||||
|         return videos | ||||
|  | @ -11,7 +11,7 @@ class UnistraIE(InfoExtractor): | |||
|         u'md5': u'736f605cfdc96724d55bb543ab3ced24', | ||||
|         u'info_dict': { | ||||
|             u'title': u'M!ss Yella', | ||||
|             u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc', | ||||
|             u'description': u'md5:104892c71bd48e55d70b902736b81bbf', | ||||
|         }, | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -346,7 +346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
|             u"info_dict": { | ||||
|                 u"upload_date": u"20120506", | ||||
|                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", | ||||
|                 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", | ||||
|                 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7", | ||||
|                 u"uploader": u"Icona Pop", | ||||
|                 u"uploader_id": u"IconaPop" | ||||
|             } | ||||
|  | @ -434,11 +434,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
|         elif len(s) == 87: | ||||
|             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] | ||||
|         elif len(s) == 86: | ||||
|             return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86] | ||||
|             return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1] | ||||
|         elif len(s) == 85: | ||||
|             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] | ||||
|         elif len(s) == 84: | ||||
|             return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84] | ||||
|             return s[81:36:-1] + s[0] + s[35:2:-1] | ||||
|         elif len(s) == 83: | ||||
|             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] | ||||
|         elif len(s) == 82: | ||||
|  | @ -1184,7 +1184,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): | |||
| class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | ||||
|     IE_NAME = u'youtube:favorites' | ||||
|     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?' | ||||
|     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' | ||||
|     _LOGIN_REQUIRED = True | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|  |  | |||
|  | @ -213,7 +213,7 @@ if sys.version_info >= (2,7): | |||
|     def find_xpath_attr(node, xpath, key, val): | ||||
|         """ Find the xpath xpath[@key=val] """ | ||||
|         assert re.match(r'^[a-zA-Z]+$', key) | ||||
|         assert re.match(r'^[a-zA-Z@\s]*$', val) | ||||
|         assert re.match(r'^[a-zA-Z0-9@\s]*$', val) | ||||
|         expr = xpath + u"[@%s='%s']" % (key, val) | ||||
|         return node.find(expr) | ||||
| else: | ||||
|  |  | |||
|  | @ -1,2 +1,2 @@ | |||
| 
 | ||||
| __version__ = '2013.08.28.1' | ||||
| __version__ = '2013.08.30' | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Allan Zhou
						Allan Zhou