[videopress] Add extractor

This commit is contained in:
Sergey M․ 2017-02-05 13:37:27 +07:00
parent 6fd138bed8
commit 6ef3e65a7b
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
3 changed files with 122 additions and 0 deletions

View file

@ -1095,6 +1095,7 @@ from .videomore import (
VideomoreSeasonIE, VideomoreSeasonIE,
) )
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .videopress import VideoPressIE
from .vidio import VidioIE from .vidio import VidioIE
from .vidme import ( from .vidme import (
VidmeIE, VidmeIE,

View file

@ -81,6 +81,7 @@ from .videa import VideaIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .openload import OpenloadIE from .openload import OpenloadIE
from .videopress import VideoPressIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1473,6 +1474,21 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [TwentyMinutenIE.ie_key()], 'add_ie': [TwentyMinutenIE.ie_key()],
},
{
# VideoPress embed
'url': 'https://en.support.wordpress.com/videopress/',
'info_dict': {
'id': 'OcobLTqC',
'ext': 'm4v',
'title': 'IMG_5786',
'timestamp': 1435711927,
'upload_date': '20150701',
},
'params': {
'skip_download': True,
},
'add_ie': [VideoPressIE.ie_key()],
} }
# { # {
# # TODO: find another test # # TODO: find another test
@ -2438,6 +2454,12 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches( return _playlist_from_matches(
openload_urls, ie=OpenloadIE.ie_key()) openload_urls, ie=OpenloadIE.ie_key())
# Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls:
return _playlist_from_matches(
videopress_urls, ie=VideoPressIE.ie_key())
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')

View file

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import random
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
float_or_none,
parse_age_limit,
qualities,
try_get,
unified_timestamp,
urljoin,
)
class VideoPressIE(InfoExtractor):
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'https://videopress.com/embed/kUJmAcSf',
'md5': '706956a6c875873d51010921310e4bc6',
'info_dict': {
'id': 'kUJmAcSf',
'ext': 'mp4',
'title': 'VideoPress Demo',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 634.6,
'timestamp': 1434983935,
'upload_date': '20150622',
'age_limit': 0,
},
}, {
# 17+, requires birth_* params
'url': 'https://videopress.com/embed/iH3gstfZ',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
video_id, query={
'birth_month': random.randint(1, 12),
'birth_day': random.randint(1, 31),
'birth_year': random.randint(1950, 1995),
})
title = video['title']
def base_url(scheme):
return try_get(
video, lambda x: x['file_url_base'][scheme], compat_str)
base_url = base_url('https') or base_url('http')
QUALITIES = ('std', 'dvd', 'hd')
quality = qualities(QUALITIES)
formats = []
for format_id, f in video['files'].items():
if not isinstance(f, dict):
continue
for ext, path in f.items():
if ext in ('mp4', 'ogg'):
formats.append({
'url': urljoin(base_url, path),
'format_id': '%s-%s' % (format_id, ext),
'ext': determine_ext(path, ext),
'quality': quality(format_id),
})
original_url = try_get(video, lambda x: x['original'], compat_str)
if original_url:
formats.append({
'url': original_url,
'format_id': 'original',
'quality': len(QUALITIES),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video.get('description'),
'thumbnail': video.get('poster'),
'duration': float_or_none(video.get('duration'), 1000),
'timestamp': unified_timestamp(video.get('upload_date')),
'age_limit': parse_age_limit(video.get('rating')),
'formats': formats,
}