[tvp] Telewizja Polska: new extractor for tvp.pl, fixes #1719

Thanks-To: mplonski

https://github.com/mplonski/linux/blob/master/tvp-dl.py
This commit is contained in:
Marcin Cieślak 2013-11-05 23:30:25 +01:00
parent 7f34001d57
commit 5137ebac0b
2 changed files with 61 additions and 0 deletions

View file

@ -132,6 +132,7 @@ from .tube8 import Tube8IE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tutv import TutvIE from .tutv import TutvIE
from .tvp import TvpIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .ustream import UstreamIE, UstreamChannelIE from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE

View file

@ -0,0 +1,60 @@
# encoding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
RegexNotFoundError,
)
class TvpIE(InfoExtractor):
IE_NAME = u'tvp.pl'
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
_INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s'
_TEST = {
u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
u'file': u'31.10.2013-12878238.wmv',
u'info_dict': {
u'title': u'31.10.2013',
u'description': u'31.10.2013',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id, "Downloading video webpage")
json_params = self._download_webpage(self._INFO_URL % video_id, video_id, "Downloading video metadata")
try:
params = json.loads(json_params)
except:
raise ExtractorError(u'Invalid JSON')
self.report_extraction(video_id)
try:
video_url = params['video_url']
except KeyError:
raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
try:
title = self._og_search_title(webpage)
except RegexNotFoundError:
title = video_id
info = {
'id': video_id,
'title': title,
'ext': 'wmv',
'url': video_url,
}
try:
info['description'] = self._og_search_description(webpage)
info['thumbnail'] = self._og_search_thumbnail(webpage)
except RegexNotFoundError:
pass
return info