Add support for direct links to a video (#1973)

This commit is contained in:
Philipp Hagemeister 2013-12-17 12:33:55 +01:00
parent d6c7a367e8
commit 42393ce234
2 changed files with 46 additions and 11 deletions

View file

@ -13,6 +13,8 @@ from ..utils import (
ExtractorError, ExtractorError,
smuggle_url, smuggle_url,
unescapeHTML, unescapeHTML,
unified_strdate,
url_basename,
) )
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
@ -71,6 +73,17 @@ class GenericIE(InfoExtractor):
u'skip_download': True, u'skip_download': True,
}, },
}, },
# Direct link to a video
{
u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4',
u'file': u'trailer.mp4',
u'md5': u'67d406c2bcb6af27fa886f31aa934bbe',
u'info_dict': {
u'id': u'trailer',
u'title': u'trailer',
u'upload_date': u'20100513',
}
}
] ]
def report_download_webpage(self, video_id): def report_download_webpage(self, video_id):
@ -83,7 +96,7 @@ class GenericIE(InfoExtractor):
"""Report information extraction.""" """Report information extraction."""
self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
def _test_redirect(self, url): def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url.""" """Check if it is a redirect, like url shorteners, in case return the new url."""
class HeadRequest(compat_urllib_request.Request): class HeadRequest(compat_urllib_request.Request):
def get_method(self): def get_method(self):
@ -131,29 +144,46 @@ class GenericIE(InfoExtractor):
response = opener.open(HeadRequest(url)) response = opener.open(HeadRequest(url))
if response is None: if response is None:
raise ExtractorError(u'Invalid URL protocol') raise ExtractorError(u'Invalid URL protocol')
new_url = response.geturl() return response
if url == new_url:
return False
self.report_following_redirect(new_url)
return new_url
def _real_extract(self, url): def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url) parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme: if not parsed_url.scheme:
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url) return self.url_result('http://' + url)
video_id = os.path.splitext(url.split('/')[-1])[0]
try: try:
new_url = self._test_redirect(url) response = self._send_head(url)
if new_url:
# Check for redirect
new_url = response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
return self.url_result(new_url) return self.url_result(new_url)
# Check for direct link to a video
content_type = response.headers.get('Content-Type', '')
m = re.match(r'^(?:audio|video)/(?P<format_id>.+)$', content_type)
if m:
upload_date = response.headers.get('Last-Modified')
if upload_date:
upload_date = unified_strdate(upload_date)
assert (url_basename(url) == 'trailer.mp4')
return {
'id': video_id,
'title': os.path.splitext(url_basename(url))[0],
'formats': [{
'format_id': m.group('format_id'),
'url': url,
}],
'upload_date': upload_date,
}
except compat_urllib_error.HTTPError: except compat_urllib_error.HTTPError:
# This may be a stupid server that doesn't like HEAD, our UA, or so # This may be a stupid server that doesn't like HEAD, our UA, or so
pass pass
video_id = url.split('/')[-1]
try: try:
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
except ValueError: except ValueError:

View file

@ -761,12 +761,17 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z', '%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S',
] ]
for expression in format_expressions: for expression in format_expressions:
try: try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except: except:
pass pass
if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
return upload_date return upload_date
def determine_ext(url, default_ext=u'unknown_video'): def determine_ext(url, default_ext=u'unknown_video'):