[brightcove:legacy] Improve embeds detection (closes #11523)

This commit is contained in:
Sergey M․ 2016-12-24 22:46:27 +07:00
parent 264e77c406
commit 53a664edf4
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 29 additions and 8 deletions

View file

@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(
r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
webpage)
r'''(?x)
<meta\s+
(?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
''', webpage)
if url_m:
url = unescapeHTML(url_m.group(1))
url = unescapeHTML(url_m.group('url'))
# Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
if 'playerKey' in url or 'videoId' in url:
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
return [url]
matches = re.findall(
@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor):
url, smuggled_data = unsmuggle_url(url, {})
# Change the 'videoId' and others field to '@videoPlayer'
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url)

View file

@ -344,10 +344,10 @@ class GenericIE(InfoExtractor):
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer'
# in the http requests
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
@ -361,6 +361,24 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# embedded with itemprop embedURL and video id spelled as `idVideo`
'add_id': ['BrightcoveLegacy'],
'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
'info_dict': {
'id': '5255628253001',
'ext': 'mp4',
'title': 'md5:37c519b1128915607601e75a87995fc0',
'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
'uploader': 'BFM BUSINESS',
'uploader_id': '876450612001',
'timestamp': 1482255315,
'upload_date': '20161220',
},
'params': {
'skip_download': True,
},
},
{
# https://github.com/rg3/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',