Compare commits
11 commits
2020.09.20
...
master
Author | SHA1 | Date | |
---|---|---|---|
416da574ec | |||
48c5663c5f | |||
7d740e7dc7 | |||
4eda10499e | |||
605535776a | |||
1050e0d09f | |||
d65d89183f | |||
0c92f1e96b | |||
adae9e844b | |||
c5764b3f89 | |||
0837992a22 |
|
@ -545,7 +545,7 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||||
- `extractor` (string): Name of the extractor
|
- `extractor` (string): Name of the extractor
|
||||||
- `extractor_key` (string): Key name of the extractor
|
- `extractor_key` (string): Key name of the extractor
|
||||||
- `epoch` (numeric): Unix epoch when creating the file
|
- `epoch` (numeric): Unix epoch when creating the file
|
||||||
- `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
|
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||||
- `playlist` (string): Name or id of the playlist that contains the video
|
- `playlist` (string): Name or id of the playlist that contains the video
|
||||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||||
- `playlist_id` (string): Playlist identifier
|
- `playlist_id` (string): Playlist identifier
|
||||||
|
|
|
@ -994,6 +994,12 @@ class TestUtil(unittest.TestCase):
|
||||||
on = js_to_json('{42:4.2e1}')
|
on = js_to_json('{42:4.2e1}')
|
||||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||||
|
|
||||||
|
on = js_to_json('{ "0x40": "0x40" }')
|
||||||
|
self.assertEqual(json.loads(on), {'0x40': '0x40'})
|
||||||
|
|
||||||
|
on = js_to_json('{ "040": "040" }')
|
||||||
|
self.assertEqual(json.loads(on), {'040': '040'})
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
|
@ -223,9 +223,10 @@ class HttpFD(FileDownloader):
|
||||||
|
|
||||||
def retry(e):
|
def retry(e):
|
||||||
to_stdout = ctx.tmpfilename == '-'
|
to_stdout = ctx.tmpfilename == '-'
|
||||||
if not to_stdout:
|
if ctx.stream is not None:
|
||||||
ctx.stream.close()
|
if not to_stdout:
|
||||||
ctx.stream = None
|
ctx.stream.close()
|
||||||
|
ctx.stream = None
|
||||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
raise RetryDownload(e)
|
raise RetryDownload(e)
|
||||||
|
|
||||||
|
@ -240,7 +241,7 @@ class HttpFD(FileDownloader):
|
||||||
except socket.error as e:
|
except socket.error as e:
|
||||||
# SSLError on python 2 (inherits socket.error) may have
|
# SSLError on python 2 (inherits socket.error) may have
|
||||||
# no errno set but this error message
|
# no errno set but this error message
|
||||||
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message') == 'The read operation timed out':
|
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
|
||||||
retry(e)
|
retry(e)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
|
@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||||
if video_element is None or video_element.text is None:
|
if video_element is None or video_element.text is None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s video does not exist' % video_id, expected=True)
|
'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
video_url = video_element.text.strip()
|
video_url = video_element.text.strip()
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ from ..utils import (
|
||||||
class ExpressenIE(InfoExtractor):
|
class ExpressenIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?expressen\.se/
|
(?:www\.)?(?:expressen|di)\.se/
|
||||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||||
tv/(?:[^/]+/)*
|
tv/(?:[^/]+/)*
|
||||||
(?P<id>[^/?#&]+)
|
(?P<id>[^/?#&]+)
|
||||||
|
@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [
|
return [
|
||||||
mobj.group('url') for mobj in re.finditer(
|
mobj.group('url') for mobj in re.finditer(
|
||||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -86,7 +86,8 @@ class IPrimaIE(InfoExtractor):
|
||||||
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
|
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
|
||||||
r'data-product="([^"]+)">',
|
r'data-product="([^"]+)">',
|
||||||
r'id=["\']player-(p\d+)"',
|
r'id=["\']player-(p\d+)"',
|
||||||
r'playerId\s*:\s*["\']player-(p\d+)'),
|
r'playerId\s*:\s*["\']player-(p\d+)',
|
||||||
|
r'\bvideos\s*=\s*["\'](p\d+)'),
|
||||||
webpage, 'real id')
|
webpage, 'real id')
|
||||||
|
|
||||||
playerpage = self._download_webpage(
|
playerpage = self._download_webpage(
|
||||||
|
|
|
@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object):
|
||||||
elif function in other_functions:
|
elif function in other_functions:
|
||||||
other_functions[function]()
|
other_functions[function]()
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unknown funcion %s' % function)
|
raise ExtractorError('Unknown function %s' % function)
|
||||||
|
|
||||||
return sdk.target
|
return sdk.target
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,8 @@ from ..utils import int_or_none
|
||||||
|
|
||||||
class TwentyThreeVideoIE(InfoExtractor):
|
class TwentyThreeVideoIE(InfoExtractor):
|
||||||
IE_NAME = '23video'
|
IE_NAME = '23video'
|
||||||
_VALID_URL = r'https?://video\.(?P<domain>twentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
|
_VALID_URL = r'https?://(?P<domain>[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
|
'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
|
||||||
'md5': '75fcf216303eb1dae9920d651f85ced4',
|
'md5': '75fcf216303eb1dae9920d651f85ced4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -21,11 +21,14 @@ class TwentyThreeVideoIE(InfoExtractor):
|
||||||
'uploader_id': '12258964',
|
'uploader_id': '12258964',
|
||||||
'uploader': 'Rasmus Bysted',
|
'uploader': 'Rasmus Bysted',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, query, photo_id = re.match(self._VALID_URL, url).groups()
|
domain, query, photo_id = re.match(self._VALID_URL, url).groups()
|
||||||
base_url = 'https://video.%s' % domain
|
base_url = 'https://%s' % domain
|
||||||
photo_data = self._download_json(
|
photo_data = self._download_json(
|
||||||
base_url + '/api/photo/list?' + query, photo_id, query={
|
base_url + '/api/photo/list?' + query, photo_id, query={
|
||||||
'format': 'json',
|
'format': 'json',
|
||||||
|
|
|
@ -19,7 +19,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
|
||||||
IE_NAME = 'ustream'
|
IE_NAME = 'ustream'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ustream.tv/recorded/20274954',
|
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||||
|
@ -67,12 +67,15 @@ class UstreamIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 download
|
'skip_download': True, # m3u8 download
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.ibm.com/embed/recorded/128240221?&autoplay=true&controls=true&volume=100',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
|
|
|
@ -3181,54 +3181,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
|
||||||
_MAX_RESULTS = float('inf')
|
_MAX_RESULTS = float('inf')
|
||||||
IE_NAME = 'youtube:search'
|
IE_NAME = 'youtube:search'
|
||||||
_SEARCH_KEY = 'ytsearch'
|
_SEARCH_KEY = 'ytsearch'
|
||||||
_EXTRA_QUERY_ARGS = {}
|
_SEARCH_PARAMS = None
|
||||||
_TESTS = []
|
_TESTS = []
|
||||||
|
|
||||||
|
def _entries(self, query, n):
|
||||||
|
data = {
|
||||||
|
'context': {
|
||||||
|
'client': {
|
||||||
|
'clientName': 'WEB',
|
||||||
|
'clientVersion': '2.20201021.03.00',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'query': query,
|
||||||
|
}
|
||||||
|
if self._SEARCH_PARAMS:
|
||||||
|
data['params'] = self._SEARCH_PARAMS
|
||||||
|
total = 0
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
search = self._download_json(
|
||||||
|
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||||
|
video_id='query "%s"' % query,
|
||||||
|
note='Downloading page %s' % page_num,
|
||||||
|
errnote='Unable to download API page', fatal=False,
|
||||||
|
data=json.dumps(data).encode('utf8'),
|
||||||
|
headers={'content-type': 'application/json'})
|
||||||
|
if not search:
|
||||||
|
break
|
||||||
|
slr_contents = try_get(
|
||||||
|
search,
|
||||||
|
(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
|
||||||
|
lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
|
||||||
|
list)
|
||||||
|
if not slr_contents:
|
||||||
|
break
|
||||||
|
isr_contents = try_get(
|
||||||
|
slr_contents,
|
||||||
|
lambda x: x[0]['itemSectionRenderer']['contents'],
|
||||||
|
list)
|
||||||
|
if not isr_contents:
|
||||||
|
break
|
||||||
|
for content in isr_contents:
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
continue
|
||||||
|
video = content.get('videoRenderer')
|
||||||
|
if not isinstance(video, dict):
|
||||||
|
continue
|
||||||
|
video_id = video.get('videoId')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
|
||||||
|
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
|
||||||
|
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||||
|
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)', re.sub(r'\s', '', view_count_text),
|
||||||
|
'view count', default=None))
|
||||||
|
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||||
|
total += 1
|
||||||
|
yield {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': YoutubeIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'uploader': uploader,
|
||||||
|
}
|
||||||
|
if total == n:
|
||||||
|
return
|
||||||
|
token = try_get(
|
||||||
|
slr_contents,
|
||||||
|
lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
||||||
|
compat_str)
|
||||||
|
if not token:
|
||||||
|
break
|
||||||
|
data['continuation'] = token
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
|
return self.playlist_result(self._entries(query, n), query)
|
||||||
videos = []
|
|
||||||
limit = n
|
|
||||||
|
|
||||||
url_query = {
|
|
||||||
'search_query': query.encode('utf-8'),
|
|
||||||
}
|
|
||||||
url_query.update(self._EXTRA_QUERY_ARGS)
|
|
||||||
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
|
|
||||||
|
|
||||||
for pagenum in itertools.count(1):
|
|
||||||
data = self._download_json(
|
|
||||||
result_url, video_id='query "%s"' % query,
|
|
||||||
note='Downloading page %s' % pagenum,
|
|
||||||
errnote='Unable to download API page',
|
|
||||||
query={'spf': 'navigate'})
|
|
||||||
html_content = data[1]['body']['content']
|
|
||||||
|
|
||||||
if 'class="search-message' in html_content:
|
|
||||||
raise ExtractorError(
|
|
||||||
'[youtube] No video results', expected=True)
|
|
||||||
|
|
||||||
new_videos = list(self._process_page(html_content))
|
|
||||||
videos += new_videos
|
|
||||||
if not new_videos or len(videos) > limit:
|
|
||||||
break
|
|
||||||
next_link = self._html_search_regex(
|
|
||||||
r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
|
|
||||||
html_content, 'next link', default=None)
|
|
||||||
if next_link is None:
|
|
||||||
break
|
|
||||||
result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
|
|
||||||
|
|
||||||
if len(videos) > n:
|
|
||||||
videos = videos[:n]
|
|
||||||
return self.playlist_result(videos, query)
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||||
_SEARCH_KEY = 'ytsearchdate'
|
_SEARCH_KEY = 'ytsearchdate'
|
||||||
IE_DESC = 'YouTube.com searches, newest videos first'
|
IE_DESC = 'YouTube.com searches, newest videos first'
|
||||||
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
|
_SEARCH_PARAMS = 'CAI%3D'
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
|
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
|
||||||
|
|
|
@ -4088,12 +4088,12 @@ def js_to_json(code):
|
||||||
'\\\n': '',
|
'\\\n': '',
|
||||||
'\\x': '\\u00',
|
'\\x': '\\u00',
|
||||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
}.get(m.group(0), m.group(0)), v[1:-1])
|
||||||
|
else:
|
||||||
for regex, base in INTEGER_TABLE:
|
for regex, base in INTEGER_TABLE:
|
||||||
im = re.match(regex, v)
|
im = re.match(regex, v)
|
||||||
if im:
|
if im:
|
||||||
i = int(im.group(1), base)
|
i = int(im.group(1), base)
|
||||||
return '"%d":' % i if v.endswith(':') else '%d' % i
|
return '"%d":' % i if v.endswith(':') else '%d' % i
|
||||||
|
|
||||||
return '"%s"' % v
|
return '"%s"' % v
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue