From 04ee53eca1345df36a8c4a251c22a29f797ab798 Mon Sep 17 00:00:00 2001 From: AGSPhoenix <lx45803@gmail.com> Date: Fri, 4 Apr 2014 13:42:34 -0400 Subject: [PATCH 1/4] Support TeamCoco URLs with video_id in the title If the URL has the video_id in it, use that since the current method of finding the id breaks on those pages. Fixes 2698. --- youtube_dl/extractor/teamcoco.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 9dcffead0..e0fc3e60f 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -9,7 +9,7 @@ from ..utils import ( class TeamcocoIE(InfoExtractor): - _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)' + _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>\d*)?/?(?P<url_title>.*)' _TEST = { 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 'file': '19705.mp4', @@ -26,11 +26,13 @@ class TeamcocoIE(InfoExtractor): raise ExtractorError('Invalid URL: %s' % url) url_title = mobj.group('url_title') webpage = self._download_webpage(url, url_title) - - video_id = self._html_search_regex( - r'<article class="video" data-id="(\d+?)"', - webpage, 'video id') - + + video_id = mobj.group("video_id") + if video_id == '': + video_id = self._html_search_regex( + r'<article class="video" data-id="(\d+?)"', + webpage, 'video id') + self.report_extraction(video_id) data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id From bb799e811b3f824f6a468336ec21f518bd55eba7 Mon Sep 17 00:00:00 2001 From: AGSPhoenix <lx45803@gmail.com> Date: Fri, 4 Apr 2014 13:52:35 -0400 Subject: [PATCH 2/4] Add a test for the new URL pages Add a test for the pages with the video_id in the URL. --- youtube_dl/extractor/teamcoco.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index e0fc3e60f..fdaf62cd0 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -10,7 +10,17 @@ from ..utils import ( class TeamcocoIE(InfoExtractor): _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>\d*)?/?(?P<url_title>.*)' - _TEST = { + _TESTS = [ + { + 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', + 'file': '80187.mp4', + 'md5': '3f7746aa0dc86de18df7539903d399ea', + 'info_dict': { + 'title': 'Conan Becomes A Mary Kay Beauty Consultant', + 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' + } + }, + { 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 'file': '19705.mp4', 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', @@ -19,6 +29,7 @@ class TeamcocoIE(InfoExtractor): "title": "Louis C.K. Interview Pt. 1 11/3/11" } } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 6d0d573ecaf763ce2b043ef7f83a743170eab16d Mon Sep 17 00:00:00 2001 From: AGSPhoenix <lx45803@gmail.com> Date: Fri, 4 Apr 2014 15:25:28 -0400 Subject: [PATCH 3/4] Workaround for regex engine limitation --- youtube_dl/extractor/teamcoco.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index fdaf62cd0..66c9bd761 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -9,7 +9,7 @@ from ..utils import ( class TeamcocoIE(InfoExtractor): - _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>\d*)?/?(?P<url_title>.*)' + _VALID_URL = r'http://teamcoco\.com/video/([^/]*)?/?(.*)' _TESTS = [ { 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', @@ -35,11 +35,13 @@ class TeamcocoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError('Invalid URL: %s' % url) - url_title = mobj.group('url_title') + url_title = mobj.group(2) + if url_title == '': + url_title = mobj.group(1) webpage = self._download_webpage(url, url_title) - video_id = mobj.group("video_id") - if video_id == '': + video_id = mobj.group(1) + if mobj.group(2) == '': video_id = self._html_search_regex( r'<article class="video" data-id="(\d+?)"', webpage, 'video id') From fa387d2d99b837d827a9a8b8996d245dd3d191c4 Mon Sep 17 00:00:00 2001 From: AGSPhoenix <lx45803@gmail.com> Date: Fri, 4 Apr 2014 15:37:49 -0400 Subject: [PATCH 4/4] Revert "Workaround for regex engine limitation" This reverts commit 6d0d573ecaf763ce2b043ef7f83a743170eab16d. --- youtube_dl/extractor/teamcoco.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 66c9bd761..fdaf62cd0 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -9,7 +9,7 @@ from ..utils import ( class TeamcocoIE(InfoExtractor): - _VALID_URL = r'http://teamcoco\.com/video/([^/]*)?/?(.*)' + _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>\d*)?/?(?P<url_title>.*)' _TESTS = [ { 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', @@ -35,13 +35,11 @@ class TeamcocoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError('Invalid URL: %s' % url) - url_title = mobj.group(2) - if url_title == '': - url_title = mobj.group(1) + url_title = mobj.group('url_title') webpage = self._download_webpage(url, url_title) - video_id = mobj.group(1) - if mobj.group(2) == '': + video_id = mobj.group("video_id") + if video_id == '': video_id = self._html_search_regex( r'<article class="video" data-id="(\d+?)"', webpage, 'video id')