[Yahoo/NBCSports] Generalize NBC sports info extractor

This commit is contained in:
Yen Chi Hsuan 2015-03-31 02:47:18 +08:00
parent a28ccbabc6
commit a2a4d5fa31
3 changed files with 37 additions and 5 deletions

View file

@ -50,7 +50,7 @@ class NBCIE(InfoExtractor):
return self.url_result(theplatform_url)
class NBCSportsIE(InfoExtractor):
class NBCSportsVPlayerIE(InfoExtractor):
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
_TEST = {
@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor):
}
}
@staticmethod
def _extract_url(webpage):
iframe_m = re.search(
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
if iframe_m:
return iframe_m.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor):
return self.url_result(theplatform_url, 'ThePlatform')
class NBCSportsIE(InfoExtractor):
# Does not include https becuase its certificate is invalid
_VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
_TEST = {
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
'md5': 'ba6c93f96b67bf05344f78bd523dac0f',
'info_dict': {
'id': 'PHJSaFWbrTY9',
'ext': 'flv',
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return self.url_result(
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|