Merge branch 'peugeot-vporn'

This commit is contained in:
Sergey M․ 2014-08-31 06:44:14 +07:00
commit 6a400a6339
4 changed files with 106 additions and 1 deletions

View file

@ -211,6 +211,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('00:01:01'), 61) self.assertEqual(parse_duration('00:01:01'), 61)
self.assertEqual(parse_duration('x:y'), None) self.assertEqual(parse_duration('x:y'), None)
self.assertEqual(parse_duration('3h11m53s'), 11513) self.assertEqual(parse_duration('3h11m53s'), 11513)
self.assertEqual(parse_duration('3h 11m 53s'), 11513)
self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('62m45s'), 3765)
self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('6m59s'), 419)
self.assertEqual(parse_duration('49s'), 49) self.assertEqual(parse_duration('49s'), 49)

View file

@ -393,6 +393,7 @@ from .vine import (
from .viki import VikiIE from .viki import VikiIE
from .vk import VKIE from .vk import VKIE
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .vporn import VpornIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vulture import VultureIE from .vulture import VultureIE

View file

@ -0,0 +1,99 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
str_to_int,
)
class VpornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
_TEST = {
'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
'md5': 'facf37c1b86546fa0208058546842c55',
'info_dict': {
'id': '497944',
'display_id': 'violet-on-her-th-birthday',
'ext': 'mp4',
'title': 'Violet on her 19th birthday',
'description': 'Violet dances in front of the camera which is sure to get you horny.',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'kileyGrope',
'categories': ['Masturbation', 'Teen'],
'duration': 393,
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
description = self._html_search_regex(
r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
if thumbnail:
thumbnail = 'http://www.vporn.com' + thumbnail
uploader = self._html_search_regex(
r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>',
webpage, 'uploader', fatal=False)
categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage)
duration = parse_duration(self._search_regex(
r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False))
view_count = str_to_int(self._html_search_regex(
r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False))
dislike_count = str_to_int(self._html_search_regex(
r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False))
formats = []
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
video_url = video[1]
fmt = {
'url': video_url,
'format_id': video[0],
}
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
if m:
fmt.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
'vbr': int(m.group('vbr')),
})
formats.append(fmt)
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'categories': categories,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
'age_limit': 18,
'formats': formats,
}

View file

@ -1332,8 +1332,10 @@ def parse_duration(s):
if s is None: if s is None:
return None return None
s = s.strip()
m = re.match( m = re.match(
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?(?P<ms>\.[0-9]+)?$', s) r'(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
if not m: if not m:
return None return None
res = int(m.group('secs')) res = int(m.group('secs'))