[extractor/generic] Add support for mediaset embeds

This commit is contained in:
Sergey M․ 2017-05-14 06:29:16 +07:00
parent ca04de463d
commit 5d29af3d15
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 15 additions and 0 deletions

View file

@ -89,6 +89,7 @@ from .limelight import LimelightBaseIE
from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE
from .mediaset import MediasetIE
class GenericIE(InfoExtractor):
@ -2648,6 +2649,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Look for Mediaset embeds
mediaset_urls = MediasetIE._extract_urls(webpage)
if mediaset_urls:
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')

View file

@ -59,6 +59,14 @@ class MediasetIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)