Match --download-archive during playlist processing (Fixes #1745)

This commit is contained in:
Philipp Hagemeister 2013-11-22 22:46:46 +01:00
parent 50123be421
commit 7012b23c94
4 changed files with 52 additions and 27 deletions

View file

@ -84,16 +84,16 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeChannelIE(dl) ie = YoutubeChannelIE(dl)
#test paginated channel #test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90) self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel #test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18) self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self): def test_youtube_user(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeUserIE(dl) ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320) self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self): def test_youtube_safe_search(self):

View file

@ -355,6 +355,8 @@ class YoutubeDL(object):
def _match_entry(self, info_dict): def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """ """ Returns None iff the file should be downloaded """
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title'] title = info_dict['title']
matchtitle = self.params.get('matchtitle', False) matchtitle = self.params.get('matchtitle', False)
if matchtitle: if matchtitle:
@ -374,8 +376,8 @@ class YoutubeDL(object):
if age_limit < info_dict.get('age_limit', 0): if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted' return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict): if self.in_download_archive(info_dict):
return (u'%(title)s has already been recorded in archive' return (u'%s has already been recorded in archive'
% info_dict) % info_dict.get('title', info_dict.get('id', u'video')))
return None return None
@staticmethod @staticmethod
@ -454,7 +456,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'), ie_key=ie_result.get('ie_key'),
extra_info=extra_info) extra_info=extra_info)
elif result_type == 'playlist': elif result_type == 'playlist':
self.add_extra_info(ie_result, extra_info)
# We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None) playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist) self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@ -484,6 +486,12 @@ class YoutubeDL(object):
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
} }
reason = self._match_entry(entry)
if reason is not None:
self.to_screen(u'[download] ' + reason)
continue
entry_result = self.process_ie_result(entry, entry_result = self.process_ie_result(entry,
download=download, download=download,
extra_info=extra) extra_info=extra)
@ -810,7 +818,16 @@ class YoutubeDL(object):
fn = self.params.get('download_archive') fn = self.params.get('download_archive')
if fn is None: if fn is None:
return False return False
vid_id = info_dict['extractor'] + u' ' + info_dict['id'] extractor = info_dict.get('extractor_id')
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return False # Incomplete video information
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = extractor.lower()
vid_id = extractor + u' ' + info_dict['id']
try: try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file: with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file: for line in archive_file:

View file

@ -229,12 +229,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in') self.to_screen(u'Logging in')
#Methods for following #608 #Methods for following #608
def url_result(self, url, ie=None): def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed""" """Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info #TODO: ie should be the class used for getting the info
video_info = {'_type': 'url', video_info = {'_type': 'url',
'url': url, 'url': url,
'ie_key': ie} 'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None): def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist""" """Returns a playlist"""

View file

@ -1552,7 +1552,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
video_id = query_dict['v'][0] video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'): if self._downloader.params.get('noplaylist'):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') return self.url_result(video_id, 'Youtube', video_id=video_id)
else: else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
@ -1571,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
playlist_title = self._og_search_title(page) playlist_title = self._og_search_title(page)
url_results = [self.url_result(vid, 'Youtube') for vid in ids] url_results = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in ids]
return self.playlist_result(url_results, playlist_id, playlist_title) return self.playlist_result(url_results, playlist_id, playlist_title)
@ -1626,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] for video_id in video_ids]
return [self.playlist_result(url_entries, channel_id)] return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
@ -1692,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
if len(ids_in_page) < self._GDATA_PAGE_SIZE: if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break break
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] url_results = [
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] self.url_result(video_id, 'Youtube', video_id=video_id)
return [self.playlist_result(url_results, playlist_title = username)] for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)
class YoutubeSearchIE(SearchInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor):
IE_DESC = u'YouTube.com searches' IE_DESC = u'YouTube.com searches'
@ -1735,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
if len(video_ids) > n: if len(video_ids) > n:
video_ids = video_ids[:n] video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(videos, query) return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE): class YoutubeSearchDateIE(YoutubeSearchIE):
@ -1795,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
feed_html = info['feed_html'] feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids) ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) feed_entries.extend(
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in ids)
if info['paging'] is None: if info['paging'] is None:
break break
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)