From cc1db7f9b7aaea745e147153b1acc3013ab6dc72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 8 Mar 2014 19:43:18 +0100 Subject: [PATCH 01/12] [mtv] Improve detection of geoblocked videos --- youtube_dl/extractor/mtv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5447b6c0c..3a33cc9b6 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -40,8 +40,9 @@ class MTVServicesInfoExtractor(InfoExtractor): return thumb_node.attrib['url'] def _extract_video_formats(self, mdoc): - if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: - raise ExtractorError('This video is not available from your country.', expected=True) + if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: + raise ExtractorError('This video is not available from your country.', + expected=True) formats = [] for rendition in mdoc.findall('.//rendition'): From 340b046876f0b188527be169b4b1c7141e6ed8aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 8 Mar 2014 20:06:20 +0100 Subject: [PATCH 02/12] [spike] Add support for downloading the mobile version if the normal version is geoblocked --- youtube_dl/extractor/mtv.py | 28 ++++++++++++++++++++++++++-- youtube_dl/extractor/spike.py | 1 + 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 3a33cc9b6..652054b63 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -5,9 +5,11 @@ import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, + compat_urllib_request, ExtractorError, find_xpath_attr, fix_xml_ampersands, + unescapeHTML, url_basename, RegexNotFoundError, ) @@ -18,6 +20,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): + _MOBILE_TEMPLATE = None @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -39,8 +42,22 @@ class MTVServicesInfoExtractor(InfoExtractor): else: return thumb_node.attrib['url'] - def _extract_video_formats(self, mdoc): + def _extract_mobile_video_formats(self, mtvn_id): + webpage_url = self._MOBILE_TEMPLATE % mtvn_id + req = compat_urllib_request.Request(webpage_url) + # Otherwise we get a webpage that would execute some javascript + req.add_header('Youtubedl-user-agent', 'curl/7') + webpage = self._download_webpage(req, mtvn_id, + 'Downloading mobile page') + url = unescapeHTML(self._search_regex(r' Date: Sat, 8 Mar 2014 21:06:12 +0100 Subject: [PATCH 03/12] [spike] Add support for mobile urls --- youtube_dl/extractor/spike.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index dbae9e15d..a3adf54e3 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,10 +1,15 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor class SpikeIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+' + _VALID_URL = r'''(?x)https?:// + (www\.spike\.com/(video-clips|episodes)/.+| + m\.spike\.com/videos/video.rbml\?id=(?P[^&]+)) + ''' _TEST = { 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', 'md5': '1a9265f32b0c375793d6c4ce45255256', @@ -18,3 +23,10 @@ class SpikeIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + mobile_id = mobj.group('mobile_id') + if mobile_id is not None: + url = 'http://www.spike.com/video-clips/%s' % mobile_id + return super(SpikeIE, self)._real_extract(url) From a496524db27a7c8ea901a5fe9b55c142870a54c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Mar 2014 03:21:26 +0700 Subject: [PATCH 04/12] [collegehumor] Replace youtube test --- youtube_dl/extractor/collegehumor.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 6b9fa4209..45d81aae5 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor): }, # embedded youtube video { - 'url': 'http://www.collegehumor.com/embed/6950457', + 'url': 'http://www.collegehumor.com/embed/6950306', 'info_dict': { - 'id': 'W5gMp3ZjYg4', + 'id': 'Z-bao9fg6Yc', 'ext': 'mp4', - 'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', - 'uploader': 'FunnyPlox TV', - 'uploader_id': 'funnyploxtv', - 'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', - 'upload_date': '20140128', + 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', + 'uploader': 'Mark Dice', + 'uploader_id': 'MarkDice', + 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', + 'upload_date': '20140127', }, 'params': { 'skip_download': True, From 0ef68e04d9f612ab354724f18e833201fb8bab2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 8 Mar 2014 22:06:28 +0100 Subject: [PATCH 05/12] [mtv] Transform the urls from the mobile version to get the best quality And don't report a warning, just log a message, it allows to pass the test from Europe. --- youtube_dl/extractor/mtv.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 652054b63..d75241d3f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, find_xpath_attr, fix_xml_ampersands, + HEADRequest, unescapeHTML, url_basename, RegexNotFoundError, @@ -49,14 +50,19 @@ class MTVServicesInfoExtractor(InfoExtractor): req.add_header('Youtubedl-user-agent', 'curl/7') webpage = self._download_webpage(req, mtvn_id, 'Downloading mobile page') - url = unescapeHTML(self._search_regex(r' Date: Sun, 9 Mar 2014 11:57:30 +0100 Subject: [PATCH 06/12] [gamekings] Modernize and update the test's description field --- youtube_dl/extractor/gamekings.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index a3a5251fe..233398966 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -6,13 +8,14 @@ from .common import InfoExtractor class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[0-9a-z\-]+)' _TEST = { - u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", - u'file': u'20130811.mp4', + 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', # MD5 is flaky, seems to change regularly - #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', + # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', u'info_dict': { - u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", - u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", + 'id': '20130811', + 'ext': 'mp4', + 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', + 'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', } } From 957688cee673472ca58c83465faa365104d0c480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 9 Mar 2014 12:03:49 +0100 Subject: [PATCH 07/12] [ustream:channel] Update test's number of entries --- test/test_playlists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 4bd815a0e..4785161f2 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -99,7 +99,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') self.assertIsPlaylist(result) self.assertEqual(result['id'], '5124905') - self.assertTrue(len(result['entries']) >= 11) + self.assertTrue(len(result['entries']) >= 6) def test_soundcloud_set(self): dl = FakeYDL() From 63ad031583db30f9e5b5b4ca913b9190f20d3af4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 9 Mar 2014 12:20:34 +0100 Subject: [PATCH 08/12] [soundcloud] Add the description field to the second test --- youtube_dl/extractor/soundcloud.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1cc0dcb15..2f254f023 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor): 'id': '47127627', 'ext': 'mp3', 'title': 'Goldrushed', + 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', 'upload_date': '20120521', }, From 31f77343f29ea805d80979d44309e33c1ca41eb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 9 Mar 2014 12:27:38 +0100 Subject: [PATCH 09/12] [vube] Update the test's checksum --- youtube_dl/extractor/vube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index fbdff471a..935c97ae9 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -13,7 +13,7 @@ class VubeIE(InfoExtractor): _TEST = { 'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', - 'md5': 'f81dcf6d0448e3291f54380181695821', + 'md5': 'db7aba89d4603dadd627e9d1973946fe', 'info_dict': { 'id': 'YL2qNPkqon', 'ext': 'mp4', @@ -77,4 +77,4 @@ class VubeIE(InfoExtractor): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - } \ No newline at end of file + } From edb7fc5435ef522753aa0a17a018edc1dbea2ade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Mar 2014 18:39:07 +0700 Subject: [PATCH 10/12] [videodetective] Modernize --- youtube_dl/extractor/videodetective.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 265dd5b91..ac6c25537 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,22 +1,23 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import ( - compat_urlparse, -) +from ..utils import compat_urlparse class VideoDetectiveIE(InfoExtractor): _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P\d+)' _TEST = { - u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', - u'file': u'194487.mp4', - u'info_dict': { - u'title': u'KICK-ASS 2', - u'description': u'md5:65ba37ad619165afac7d432eaded6013', - u'duration': 135, + 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', + 'info_dict': { + 'id': '194487', + 'ext': 'mp4', + 'title': 'KICK-ASS 2', + 'description': 'md5:65ba37ad619165afac7d432eaded6013', + 'duration': 135, }, } @@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage) query = compat_urlparse.urlparse(og_video).query - return self.url_result(InternetVideoArchiveIE._build_url(query), - ie=InternetVideoArchiveIE.ie_key()) + return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) From 6d07ce0162d5a930aaf7c91b5c685a5c4335f306 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 9 Mar 2014 14:53:07 +0100 Subject: [PATCH 11/12] YoutubeDL: If the logger is set call its `warning` method in `report_warning` --- youtube_dl/YoutubeDL.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6fd0969b5..fcb8dd19c 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -370,12 +370,15 @@ class YoutubeDL(object): Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' - if self._err_file.isatty() and os.name != 'nt': - _msg_header = '\033[0;33mWARNING:\033[0m' + if self.params.get('logger') is not None: + self.params['logger'].warning(message) else: - _msg_header = 'WARNING:' - warning_message = '%s %s' % (_msg_header, message) - self.to_stderr(warning_message) + if self._err_file.isatty() and os.name != 'nt': + _msg_header = '\033[0;33mWARNING:\033[0m' + else: + _msg_header = 'WARNING:' + warning_message = '%s %s' % (_msg_header, message) + self.to_stderr(warning_message) def report_error(self, message, tb=None): ''' From 14719565733c114320e99228cf2dc570f8c3b6dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 9 Mar 2014 15:15:46 +0100 Subject: [PATCH 12/12] Add a basic test suite for the InfoExtractor class --- test/test_InfoExtractor.py | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 test/test_InfoExtractor.py diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py new file mode 100644 index 000000000..13c18ed95 --- /dev/null +++ b/test/test_InfoExtractor.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor import YoutubeIE, get_info_extractor + + +class TestIE(InfoExtractor): + pass + + +class TestInfoExtractor(unittest.TestCase): + def setUp(self): + self.ie = TestIE(FakeYDL()) + + def test_ie_key(self): + self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) + + def test_html_search_regex(self): + html = '

Watch this video

' + search = lambda re, *args: self.ie._html_search_regex(re, html, *args) + self.assertEqual(search(r'

(.+?)

', 'foo'), 'Watch this video') + + def test_opengraph(self): + ie = self.ie + html = ''' + + + + ''' + self.assertEqual(ie._og_search_title(html), 'Foo') + self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') + self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + +if __name__ == '__main__': + unittest.main()