Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-10-12 22:21:23 +02:00
parent 3d60d33773
commit 4b7b839f24
4 changed files with 34 additions and 1 deletions

View file

@ -94,6 +94,7 @@ from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .sina import SinaIE from .sina import SinaIE

View file

@ -4,6 +4,7 @@ import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urlparse, compat_urlparse,
compat_urllib_parse,
xpath_with_ns, xpath_with_ns,
determine_ext, determine_ext,
) )
@ -26,6 +27,16 @@ class InternetVideoArchiveIE(InfoExtractor):
def _build_url(query): def _build_url(query):
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
@staticmethod
def _clean_query(query):
NEEDED_ARGS = ['publishedid', 'customerid']
query_dic = compat_urlparse.parse_qs(query)
cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
# Other player ids return m3u8 urls
cleaned_dic['playerid'] = '247'
cleaned_dic['videokbrate'] = '100000'
return compat_urllib_parse.urlencode(cleaned_dic)
def _real_extract(self, url): def _real_extract(self, url):
query = compat_urlparse.urlparse(url).query query = compat_urlparse.urlparse(url).query
query_dic = compat_urlparse.parse_qs(query) query_dic = compat_urlparse.parse_qs(query)
@ -37,6 +48,11 @@ class InternetVideoArchiveIE(InfoExtractor):
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8')) flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality
# and http links (no m3u8 manifests)
file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()),
file_url)
info_xml = self._download_webpage(file_url, video_id, info_xml = self._download_webpage(file_url, video_id,
u'Downloading video info') u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))

View file

@ -0,0 +1,16 @@
from .videodetective import VideoDetectiveIE
# It just uses the same method as videodetective.com,
# the internetvideoarchive.com is extracted from the og:video property
class RottenTomatoesIE(VideoDetectiveIE):
_VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
u'file': '613340.mp4',
u'info_dict': {
u'title': u'TOY STORY 3',
u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
},
}

View file

@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor):
u'info_dict': { u'info_dict': {
u'title': u'KICK-ASS 2', u'title': u'KICK-ASS 2',
u'description': u'md5:65ba37ad619165afac7d432eaded6013', u'description': u'md5:65ba37ad619165afac7d432eaded6013',
u'duration': 135, u'duration': 138,
}, },
} }