From 5d29af3d15eaec8958a4fad00103e18112f0c172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 May 2017 06:29:16 +0700 Subject: [PATCH] [extractor/generic] Add support for mediaset embeds --- youtube_dl/extractor/generic.py | 7 +++++++ youtube_dl/extractor/mediaset.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index dde65fa96..32f38c401 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -89,6 +89,7 @@ from .limelight import LimelightBaseIE from .anvato import AnvatoIE from .washingtonpost import WashingtonPostIE from .wistia import WistiaIE +from .mediaset import MediasetIE class GenericIE(InfoExtractor): @@ -2648,6 +2649,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) + # Look for Mediaset embeds + mediaset_urls = MediasetIE._extract_urls(webpage) + if mediaset_urls: + return self.playlist_from_matches( + mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index 3ebc7f2e1..9a0e41663 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -59,6 +59,14 @@ class MediasetIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+\bsrc=(["\'])(?Phttps?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url)