From 5742c18bc1ea3da5b0fd480e75fcdf099220e52f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:19:00 +0700 Subject: [PATCH] [npo] Add support for anderetijden.nl (Closes #10754) --- youtube_dl/extractor/npo.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index ff02d0309..66035a77c 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( fix_xml_ampersands, + orderedSet, parse_duration, qualities, strip_jsonp, @@ -446,7 +447,7 @@ class NPOPlaylistBaseIE(NPOIE): entries = [ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage)) ] playlist_title = self._html_search_regex( @@ -508,3 +509,18 @@ class WNLIE(NPOPlaylistBaseIE): }, 'playlist_count': 4, }] + + +class AndereTijdenIE(NPOPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P[^/?#&]+)' + _PLAYLIST_TITLE_RE = r'(?s)]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)' + _PLAYLIST_ENTRY_RE = r']+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' + + _TESTS = [{ + 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'info_dict': { + 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'title': 'Duitse soldaten over de Slag bij Arnhem', + }, + 'playlist_count': 3, + }]