Add initial version of postprocessing framework

This commit is contained in:
Ricardo Garcia 2008-07-27 12:13:49 +02:00
parent 5352678576
commit 65cd34c5d7

View file

@ -42,6 +42,14 @@ class SameFileError(Exception):
""" """
pass pass
class PostProcessingError(Exception):
"""Post Processing exception.
This exception may be raised by PostProcessor's .run() method to
indicate an error in the postprocessing task.
"""
pass
class FileDownloader(object): class FileDownloader(object):
"""File Downloader class. """File Downloader class.
@ -83,10 +91,12 @@ class FileDownloader(object):
_params = None _params = None
_ies = [] _ies = []
_pps = []
def __init__(self, params): def __init__(self, params):
"""Create a FileDownloader object with the given options.""" """Create a FileDownloader object with the given options."""
self._ies = [] self._ies = []
self._pps = []
self.set_params(params) self.set_params(params)
@staticmethod @staticmethod
@ -176,6 +186,11 @@ class FileDownloader(object):
self._ies.append(ie) self._ies.append(ie)
ie.set_downloader(self) ie.set_downloader(self)
def add_post_processor(self, pp):
"""Add a PostProcessor object to the end of the chain."""
self._pps.append(pp)
pp.set_downloader(self)
def to_stdout(self, message, skip_eol=False): def to_stdout(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
if not self._params.get('quiet', False): if not self._params.get('quiet', False):
@ -288,11 +303,26 @@ class FileDownloader(object):
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
retcode = self.trouble('ERROR: unable to download video data: %s' % str(err)) retcode = self.trouble('ERROR: unable to download video data: %s' % str(err))
continue continue
try:
self.post_process(filename, result)
except (PostProcessingError), err:
retcode = self.trouble('ERROR: postprocessing: %s' % str(err))
continue
break break
if not suitable_found: if not suitable_found:
retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url) retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
return retcode return retcode
def post_process(self, filename, ie_info):
"""Run the postprocessing chain on the given file."""
info = dict(ie_info)
info['filepath'] = filename
for pp in self._pps:
info = pp.run(info)
if info is None:
break
def _do_download(self, stream, url): def _do_download(self, stream, url):
request = urllib2.Request(url, None, std_headers) request = urllib2.Request(url, None, std_headers)
@ -736,6 +766,62 @@ class YoutubePlaylistIE(InfoExtractor):
information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id))
return information return information
class PostProcessor(object):
"""Post Processor class.
PostProcessor objects can be added to downloaders with their
add_post_processor() method. When the downloader has finished a
successful download, it will take its internal chain of PostProcessors
and start calling the run() method on each one of them, first with
an initial argument and then with the returned value of the previous
PostProcessor.
The chain will be stopped if one of them ever returns None or the end
of the chain is reached.
PostProcessor objects follow a "mutual registration" process similar
to InfoExtractor objects.
"""
_downloader = None
def __init__(self, downloader=None):
self._downloader = downloader
def to_stdout(self, message):
"""Print message to stdout if downloader is not in quiet mode."""
if self._downloader is None or not self._downloader.get_params().get('quiet', False):
print message
def to_stderr(self, message):
"""Print message to stderr."""
print >>sys.stderr, message
def set_downloader(self, downloader):
"""Sets the downloader for this PP."""
self._downloader = downloader
def run(self, information):
"""Run the PostProcessor.
The "information" argument is a dictionary like the ones
returned by InfoExtractors. The only difference is that this
one has an extra field called "filepath" that points to the
downloaded file.
When this method returns None, the postprocessing chain is
stopped. However, this method may return an information
dictionary that will be passed to the next postprocessing
object in the chain. It can be the one it received after
changing some fields.
In addition, this method may raise a PostProcessingError
exception that will be taken into account by the downloader
it was called from.
"""
return information # by default, do nothing
### MAIN PROGRAM ###
if __name__ == '__main__': if __name__ == '__main__':
try: try:
# Modules needed only when running the main program # Modules needed only when running the main program