[generic] Support embedded vimeo videos (#1602)
This commit is contained in:
parent
cd054fc491
commit
9d4660cab1
4 changed files with 63 additions and 2 deletions
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
|
@ -21,6 +22,8 @@ from youtube_dl.utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
get_meta_content,
|
get_meta_content,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
|
@ -155,5 +158,18 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(find('media:song/media:author').text, u'The Author')
|
self.assertEqual(find('media:song/media:author').text, u'The Author')
|
||||||
self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
|
self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
|
||||||
|
|
||||||
|
def test_smuggle_url(self):
|
||||||
|
data = {u"ö": u"ö", u"abc": [3]}
|
||||||
|
url = 'https://foo.bar/baz?x=y#a'
|
||||||
|
smug_url = smuggle_url(url, data)
|
||||||
|
unsmug_url, unsmug_data = unsmuggle_url(smug_url)
|
||||||
|
self.assertEqual(url, unsmug_url)
|
||||||
|
self.assertEqual(data, unsmug_data)
|
||||||
|
|
||||||
|
res_url, res_data = unsmuggle_url(url)
|
||||||
|
self.assertEqual(res_url, url)
|
||||||
|
self.assertEqual(res_data, None)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -11,6 +11,8 @@ from ..utils import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
smuggle_url,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
|
|
||||||
|
@ -29,6 +31,17 @@ class GenericIE(InfoExtractor):
|
||||||
u"title": u"R\u00e9gis plante sa Jeep"
|
u"title": u"R\u00e9gis plante sa Jeep"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# embedded vimeo video
|
||||||
|
{
|
||||||
|
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
||||||
|
u'file': u'22444065.mp4',
|
||||||
|
u'md5': u'2903896e23df39722c33f015af0666e2',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
|
||||||
|
u"uploader_id": u"skillsmatter",
|
||||||
|
u"uploader": u"Skills Matter",
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
|
@ -127,6 +140,14 @@ class GenericIE(InfoExtractor):
|
||||||
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
|
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
|
||||||
return self.url_result(bc_url, 'Brightcove')
|
return self.url_result(bc_url, 'Brightcove')
|
||||||
|
|
||||||
|
# Look for embedded Vimeo player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage)
|
||||||
|
if mobj:
|
||||||
|
player_url = unescapeHTML(mobj.group(1))
|
||||||
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
|
return self.url_result(surl, 'Vimeo')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
|
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
std_headers,
|
std_headers,
|
||||||
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
class VimeoIE(InfoExtractor):
|
class VimeoIE(InfoExtractor):
|
||||||
|
@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor):
|
||||||
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
|
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
|
||||||
u'uploader': u'The BLN & Business of Software',
|
u'uploader': u'The BLN & Business of Software',
|
||||||
},
|
},
|
||||||
},
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
|
@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url, new_video=True):
|
def _real_extract(self, url, new_video=True):
|
||||||
|
url, data = unsmuggle_url(url)
|
||||||
|
headers = std_headers
|
||||||
|
if data is not None:
|
||||||
|
headers = headers.copy()
|
||||||
|
headers.update(data)
|
||||||
|
|
||||||
# Extract ID from URL
|
# Extract ID from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor):
|
||||||
url = 'https://vimeo.com/' + video_id
|
url = 'https://vimeo.com/' + video_id
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
request = compat_urllib_request.Request(url, None, std_headers)
|
request = compat_urllib_request.Request(url, None, headers)
|
||||||
webpage = self._download_webpage(request, video_id)
|
webpage = self._download_webpage(request, video_id)
|
||||||
|
|
||||||
# Now we begin extracting as much information as we can from what we
|
# Now we begin extracting as much information as we can from what we
|
||||||
|
|
|
@ -945,3 +945,20 @@ class locked_file(object):
|
||||||
|
|
||||||
def shell_quote(args):
|
def shell_quote(args):
|
||||||
return ' '.join(map(pipes.quote, args))
|
return ' '.join(map(pipes.quote, args))
|
||||||
|
|
||||||
|
|
||||||
|
def smuggle_url(url, data):
|
||||||
|
""" Pass additional data in a URL for internal use. """
|
||||||
|
|
||||||
|
sdata = compat_urllib_parse.urlencode(
|
||||||
|
{u'__youtubedl_smuggle': json.dumps(data)})
|
||||||
|
return url + u'#' + sdata
|
||||||
|
|
||||||
|
|
||||||
|
def unsmuggle_url(smug_url):
|
||||||
|
if not '#__youtubedl_smuggle' in smug_url:
|
||||||
|
return smug_url, None
|
||||||
|
url, _, sdata = smug_url.rpartition(u'#')
|
||||||
|
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
|
||||||
|
data = json.loads(jsond)
|
||||||
|
return url, data
|
||||||
|
|
Loading…
Reference in a new issue