From a01825a5416174de56625a6ef202311e2d6c8cb1 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sat, 8 Apr 2017 05:40:57 -0400 Subject: [PATCH] [kaltura] Add support for iframe embeds --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ youtube_dl/extractor/kaltura.py | 13 ++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 73911940c..658533cf6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1080,6 +1080,21 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # Kaltura iframe embed + 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/', + 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44', + 'info_dict': { + 'id': '0_f2cfbpwy', + 'ext': 'mp4', + 'title': 'I. M. Pei: A Centennial Celebration', + 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c', + 'upload_date': '20170403', + 'uploader_id': 'batchUser', + 'timestamp': 1491232186, + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 54374ea76..c6efdea93 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor): }], }, }, + 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/', 'params': { 'skip_download': True, }, @@ -108,6 +109,7 @@ class KalturaIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = ( + # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site re.search( r"""(?xs) kWidget\.(?:thumb)?[Ee]mbed\( @@ -127,7 +129,16 @@ class KalturaIE(InfoExtractor): (?P["\'])entry_?[Ii]d(?P=q2) )\s*:\s* (?P["\'])(?P(?:(?!(?P=q3)).)+)(?P=q3) - ''', webpage)) + ''', webpage) or + re.search( + r'''(?xs) + ]+src=(?P["\']) + (?:https?:)?//(?:www\.)?kaltura\.com/p/(?P\d+)/ + (?:(?!(?P=q1)).)* + [\?&]entry_id=(?P(?:(?!(?P=q1))[^&])+) + (?P=q1) + ''', webpage) + ) if mobj: embed_info = mobj.groupdict() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info