[youtube] Separate methods for embeds extraction
This commit is contained in:
parent
c5c9bf0c12
commit
66c9fa36c1
2 changed files with 41 additions and 29 deletions
|
@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
playback_url, video_id, 'Marking watched',
|
||||
'Unable to mark watched', fatal=False)
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Embedded YouTube player
|
||||
entries = [
|
||||
unescapeHTML(mobj.group('url'))
|
||||
for mobj in re.finditer(r'''(?x)
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
data-video-url=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*|
|
||||
<object[^>]+data=|
|
||||
new\s+SWFObject\(
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||
(?:embed|v|p)/.+?)
|
||||
\1''', webpage)]
|
||||
|
||||
# lazyYT YouTube embed
|
||||
entries.extend(list(map(
|
||||
unescapeHTML,
|
||||
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
|
||||
|
||||
# Wordpress "YouTube Video Importer" plugin
|
||||
matches = re.findall(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||
entries.extend(m[-1] for m in matches)
|
||||
|
||||
return entries
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = YoutubeIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@classmethod
|
||||
def extract_id(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue