From 6d2749aac407df1e039f5b61a294991c1e810cff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 25 Jan 2015 18:56:04 +0100 Subject: [PATCH 1/5] [drtv] Prefer the version without spoken subtitles (fixes #4779) For example for http://www.dr.dk/tv/se/moderne-klassikere/moderne-klassikere-one-republic-apologize#!/, there's a version where everytime someone speaks in English a computer voice translates it. --- youtube_dl/extractor/drtv.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index c44adb109..510ef04b0 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -48,14 +48,20 @@ class DRTVIE(SubtitlesInfoExtractor): elif asset['Kind'] == 'VideoResource': duration = asset['DurationInMilliseconds'] / 1000.0 restricted_to_denmark = asset['RestrictedToDenmark'] + spoken_subtitles = asset['Target'] == 'SpokenSubtitles' for link in asset['Links']: target = link['Target'] uri = link['Uri'] + format_id = target + preference = -1 if target == 'HDS' else -2 + if spoken_subtitles: + preference -= 2 + format_id += '-spoken-subtitles' formats.append({ 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, - 'format_id': target, + 'format_id': format_id, 'ext': link['FileFormat'], - 'preference': -1 if target == 'HDS' else -2, + 'preference': preference, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): From 96a53167fa64293506f446d0c2bf3e0db6c8df31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 00:32:31 +0600 Subject: [PATCH 2/5] [common] Generalize URLs' HTTP errors pre-testing --- youtube_dl/extractor/common.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 388c55e99..478232682 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, + compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, @@ -26,6 +27,7 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, + HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -716,6 +718,27 @@ class InfoExtractor(object): ) formats.sort(key=_formats_key) + def _check_formats(self, formats, video_id): + if formats: + formats[:] = filter( + lambda f: self._is_valid_url( + f['url'], video_id, + item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), + formats) + + def _is_valid_url(self, url, video_id, item='video'): + try: + self._request_webpage( + HEADRequest(url), video_id, + 'Checking %s URL' % item) + return True + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self.report_warning( + '%s URL is invalid, skipping' % item, video_id) + return False + raise + def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ return ( From a57e8ce6580202c179c38a15abc31f84ca471521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 00:33:42 +0600 Subject: [PATCH 3/5] [lynda] Pre-test video URLs for HTTP errors (Closes #2185, closes #4782) --- youtube_dl/extractor/lynda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 26e84970d..762cefa34 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor): } for format_id, video_url in prioritized_streams['0'].items() ]) + self._check_formats(formats, video_id) self._sort_formats(formats) if self._downloader.params.get('listsubtitles', False): From d862a4f94fac46e5c6de790a9b66d78d463d666d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 00:34:31 +0600 Subject: [PATCH 4/5] [spiegel] Use generalized formats pre-testing --- youtube_dl/extractor/spiegel.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index f345883c7..b868241d5 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,14 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_HTTPError, -) -from ..utils import ( - HEADRequest, - ExtractorError, -) +from ..compat import compat_urlparse from .spiegeltv import SpiegeltvIE @@ -72,16 +65,6 @@ class SpiegelIE(InfoExtractor): if n.tag.startswith('type') and n.tag != 'type6': format_id = n.tag.rpartition('type')[2] video_url = base_url + n.find('./filename').text - # Test video URLs beforehand as some of them are invalid - try: - self._request_webpage( - HEADRequest(video_url), video_id, - 'Checking %s video URL' % format_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - self.report_warning( - '%s video URL is invalid, skipping' % format_id, video_id) - continue formats.append({ 'format_id': format_id, 'url': video_url, @@ -94,6 +77,7 @@ class SpiegelIE(InfoExtractor): }) duration = float(idoc[0].findall('./duration')[0].text) + self._check_formats(formats, video_id) self._sort_formats(formats) return { From 80a49d3d7bcd235ba15bd491cc62a0345c9abce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 02:08:29 +0600 Subject: [PATCH 5/5] Credit @David-Development for rtl2 (#4780) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 8362b6d8a..1596a7548 100644 --- a/AUTHORS +++ b/AUTHORS @@ -105,3 +105,4 @@ Dinesh S Johan K. Jensen Yen Chi Hsuan Enam Mijbah Noor +David Luhmer