[Motherless] Add new extractor

2014-06-27 18:03:20 +02:00 · 2014-06-27 18:03:20 +02:00 · a69969ee05
commit a69969ee05
parent f2b8db57eb
2 changed files with 94 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -169,6 +169,7 @@ from .mpora import MporaIE
 from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motherless import MotherlessIE
 from .motorsport import MotorsportIE
 from .moviezine import MoviezineIE
 from .movshare import MovShareIE
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@ -0,0 +1,93 @@
 from __future__ import unicode_literals
 import datetime
 import re
 from .common import InfoExtractor
 from ..utils import str_to_int
 class MotherlessIE(InfoExtractor):
    """Information Extractor for Motherless"""
    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
    _TESTS = [
        {
            'url': 'http://motherless.com/AC3FFE1',
            'md5': '5527fef81d2e529215dad3c2d744a7d9',
            'info_dict': {
                'id': 'AC3FFE1',
                'ext': 'flv',
                'title': 'Fucked in the ass while playing PS3',
                'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
                'upload_date': '20100913',
                'uploader_id': 'famouslyfuckedup',
                'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/AC3FFE1.jpg',
                'age_limit': 18,
            }
        },
        {
            'url': 'http://motherless.com/532291B',
            'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
            'info_dict': {
                'id': '532291B',
                'ext': 'mp4',
                'title': 'Amazing girl playing the omegle game, PERFECT!',
                'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
                'upload_date': '20140622',
                'uploader_id': 'Sulivana7x',
                'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/532291B.jpg',
                'age_limit': 18,
            }
        }
    ]
    def _real_extract(self,url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<title>(?P<title>.+?) - MOTHERLESS.COM</title>', webpage, 'title')
        video_url = self._search_regex(r"__fileurl = '(?P<video_url>[^']+)'", webpage, 'video_url')
        thumbnail = self._og_search_thumbnail(webpage)
        age_limit = self._rta_search(webpage)  # Hint: it's 18 ;)
        view_count = str_to_int(self._html_search_regex(r'<strong>Views</strong>(.+?)</h2>', webpage,
                                                        'view_count', flags=re.DOTALL))
        like_count = str_to_int(self._html_search_regex(r'<strong>Favorited</strong>(.+?)</h2>', webpage,
                                                        'like_count', flags=re.DOTALL))
        comment_count = webpage.count('class="media-comment-contents"')
        uploader_id = self._html_search_regex(r'<div class="thumb-member-username">.*?<a [^>]*>(.+?)</a>',
                                              webpage, 'uploader_id', flags=re.DOTALL)
        categories = self._html_search_meta('keywords', webpage)
        if categories is not None:
            categories = [cat.strip() for cat in categories.split(',')]
        upload_date = self._html_search_regex(r'<strong>Uploaded</strong>(.+?)</h2>', webpage,
                                              'upload_date', flags=re.DOTALL)
        mobj = re.search(r'(\d+) days? ago', upload_date, re.I)
        if mobj is not None:
            upload_date = datetime.datetime.now() - datetime.timedelta(days=int(mobj.group(1)))
        else:
            mobj = re.search(r'(\w+) (\d+)\w* (\d+)', upload_date, re.I)
            if mobj is not None:
                upload_date = datetime.datetime.strptime('%s %s %s' % mobj.groups(), '%b %d %Y').date()
            else:
                upload_date = None
        if upload_date is not None:
            upload_date = upload_date.strftime('%Y%m%d')
        return {
            'id': video_id,
            'title': title,
            'upload_date': upload_date,
            'uploader_id': uploader_id,
            'thumbnail': thumbnail,
            'categories': categories,
            'view_count': view_count,
            'like_count': like_count,
            'comment_count': comment_count,
            'age_limit': age_limit,
            'url': video_url,
        }