[pornhub:playlist] Improve extraction (closes #11594)

This commit is contained in:
Sergey M․ 2017-01-04 05:32:18 +07:00
parent 1911d77d28
commit 96d315c2be
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -229,7 +229,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
entries = self._extract_entries(webpage)
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
# https://github.com/rg3/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)
entries = self._extract_entries(container)
playlist = self._parse_json(
self._search_regex(
@ -243,12 +250,12 @@ class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.pornhub.com/playlist/6201671',
'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': {
'id': '6201671',
'title': 'P0p4',
'id': '4667351',
'title': 'Nataly Hot',
},
'playlist_mincount': 35,
'playlist_mincount': 2,
}]