Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.
This commit is contained in:
Philipp Hagemeister 2013-10-06 04:27:09 +02:00
parent 226113c880
commit c1c9a79c49
3 changed files with 132 additions and 0 deletions

View file

@ -3,6 +3,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import errno
import io import io
import os import os
import re import re
@ -84,6 +85,9 @@ class YoutubeDL(object):
cachedir: Location of the cache files in the filesystem. cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache. None to disable filesystem cache.
noplaylist: Download single video instead of a playlist if in doubt. noplaylist: Download single video instead of a playlist if in doubt.
downloadarchive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader: the FileDownloader:
@ -309,6 +313,9 @@ class YoutubeDL(object):
dateRange = self.params.get('daterange', DateRange()) dateRange = self.params.get('daterange', DateRange())
if date not in dateRange: if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
if self.in_download_archive(info_dict):
return (u'%(title)s) has already been recorded in archive'
% info_dict)
return None return None
def extract_info(self, url, download=True, ie_key=None, extra_info={}): def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@ -578,6 +585,8 @@ class YoutubeDL(object):
self.report_error(u'postprocessing: %s' % str(err)) self.report_error(u'postprocessing: %s' % str(err))
return return
self.record_download_archive(info_dict)
def download(self, url_list): def download(self, url_list):
"""Download a given list of URLs.""" """Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template(): if len(url_list) > 1 and self.fixed_template():
@ -617,3 +626,26 @@ class YoutubeDL(object):
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
except (IOError, OSError): except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file') self.report_warning(u'Unable to remove downloaded video file')
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
if fn is None:
return False
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
if line.strip() == vid_id:
return True
except IOError as ioe:
if ioe.errno != errno.ENOENT:
raise
return False
def record_download_archive(self, info_dict):
fn = self.params.get('download_archive')
if fn is None:
return
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + u'\n')

View file

@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
selection.add_option('--download-archive', metavar='FILE',
dest='download_archive',
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
@ -631,6 +634,7 @@ def _real_main(argv=None):
'daterange': date, 'daterange': date,
'cachedir': opts.cachedir, 'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code, 'youtube_print_sig_code': opts.youtube_print_sig_code,
'download_archive': opts.download_archive,
}) })
if opts.verbose: if opts.verbose:

View file

@ -830,3 +830,99 @@ def get_cachedir(params={}):
cache_root = os.environ.get('XDG_CACHE_HOME', cache_root = os.environ.get('XDG_CACHE_HOME',
os.path.expanduser('~/.cache')) os.path.expanduser('~/.cache'))
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
# Cross-platform file locking
if sys.platform == 'win32':
import ctypes.wintypes
import msvcrt
class OVERLAPPED(ctypes.Structure):
_fields_ = [
('Internal', ctypes.wintypes.LPVOID),
('InternalHigh', ctypes.wintypes.LPVOID),
('Offset', ctypes.wintypes.DWORD),
('OffsetHigh', ctypes.wintypes.DWORD),
('hEvent', ctypes.wintypes.HANDLE),
]
kernel32 = ctypes.windll.kernel32
LockFileEx = kernel32.LockFileEx
LockFileEx.argtypes = [
ctypes.wintypes.HANDLE, # hFile
ctypes.wintypes.DWORD, # dwFlags
ctypes.wintypes.DWORD, # dwReserved
ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
ctypes.POINTER(OVERLAPPED) # Overlapped
]
LockFileEx.restype = ctypes.wintypes.BOOL
UnlockFileEx = kernel32.UnlockFileEx
UnlockFileEx.argtypes = [
ctypes.wintypes.HANDLE, # hFile
ctypes.wintypes.DWORD, # dwReserved
ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
ctypes.POINTER(OVERLAPPED) # Overlapped
]
UnlockFileEx.restype = ctypes.wintypes.BOOL
whole_low = 0xffffffff
whole_high = 0x7fffffff
def _lock_file(f, exclusive):
overlapped = OVERLAPPED()
overlapped.Offset = 0
overlapped.OffsetHigh = 0
overlapped.hEvent = 0
f._lock_file_overlapped_p = ctypes.pointer(overlapped)
handle = msvcrt.get_osfhandle(f.fileno())
if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
whole_low, whole_high, f._lock_file_overlapped_p):
raise OSError('Locking file failed: %r' % ctypes.FormatError())
def _unlock_file(f):
assert f._lock_file_overlapped_p
handle = msvcrt.get_osfhandle(f.fileno())
if not UnlockFileEx(handle, 0,
whole_low, whole_high, f._lock_file_overlapped_p):
raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
else:
import fcntl
def _lock_file(f, exclusive):
fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
def _unlock_file(f):
fcntl.lockf(f, fcntl.LOCK_UN)
class locked_file(object):
def __init__(self, filename, mode, encoding=None):
assert mode in ['r', 'a', 'w']
self.f = io.open(filename, mode, encoding=encoding)
self.mode = mode
def __enter__(self):
exclusive = self.mode != 'r'
try:
_lock_file(self.f, exclusive)
except IOError:
self.f.close()
raise
return self
def __exit__(self, etype, value, traceback):
try:
_unlock_file(self.f)
finally:
self.f.close()
def __iter__(self):
return iter(self.f)
def write(self, *args):
return self.f.write(*args)
def read(self, *args):
return self.f.read(*args)