[YoutubeDL] Sanitize more fields (#13313)
This commit is contained in:
parent
76e6378358
commit
c996943418
1 changed files with 37 additions and 15 deletions
|
@ -58,6 +58,7 @@ from .utils import (
|
||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
|
@ -302,6 +303,17 @@ class YoutubeDL(object):
|
||||||
postprocessor.
|
postprocessor.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_NUMERIC_FIELDS = set((
|
||||||
|
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||||
|
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||||
|
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||||
|
'average_rating', 'comment_count', 'age_limit',
|
||||||
|
'start_time', 'end_time',
|
||||||
|
'chapter_number', 'season_number', 'episode_number',
|
||||||
|
'track_number', 'disc_number', 'release_year',
|
||||||
|
'playlist_index',
|
||||||
|
))
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
_ies = []
|
_ies = []
|
||||||
_pps = []
|
_pps = []
|
||||||
|
@ -639,22 +651,11 @@ class YoutubeDL(object):
|
||||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||||
outtmpl)
|
outtmpl)
|
||||||
|
|
||||||
NUMERIC_FIELDS = set((
|
|
||||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
|
||||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
|
||||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
|
||||||
'average_rating', 'comment_count', 'age_limit',
|
|
||||||
'start_time', 'end_time',
|
|
||||||
'chapter_number', 'season_number', 'episode_number',
|
|
||||||
'track_number', 'disc_number', 'release_year',
|
|
||||||
'playlist_index',
|
|
||||||
))
|
|
||||||
|
|
||||||
# Missing numeric fields used together with integer presentation types
|
# Missing numeric fields used together with integer presentation types
|
||||||
# in format specification will break the argument substitution since
|
# in format specification will break the argument substitution since
|
||||||
# string 'NA' is returned for missing fields. We will patch output
|
# string 'NA' is returned for missing fields. We will patch output
|
||||||
# template for missing fields to meet string presentation type.
|
# template for missing fields to meet string presentation type.
|
||||||
for numeric_field in NUMERIC_FIELDS:
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
if numeric_field not in template_dict:
|
if numeric_field not in template_dict:
|
||||||
# As of [1] format syntax is:
|
# As of [1] format syntax is:
|
||||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||||
|
@ -1345,9 +1346,28 @@ class YoutubeDL(object):
|
||||||
if 'title' not in info_dict:
|
if 'title' not in info_dict:
|
||||||
raise ExtractorError('Missing "title" field in extractor result')
|
raise ExtractorError('Missing "title" field in extractor result')
|
||||||
|
|
||||||
if not isinstance(info_dict['id'], compat_str):
|
def report_force_conversion(field, field_not, conversion):
|
||||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
self.report_warning(
|
||||||
info_dict['id'] = compat_str(info_dict['id'])
|
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||||
|
% (field, field_not, conversion))
|
||||||
|
|
||||||
|
def sanitize_string_field(info, string_field):
|
||||||
|
field = info.get(string_field)
|
||||||
|
if field is None or isinstance(field, compat_str):
|
||||||
|
return
|
||||||
|
report_force_conversion(string_field, 'a string', 'string')
|
||||||
|
info[string_field] = compat_str(field)
|
||||||
|
|
||||||
|
def sanitize_numeric_fields(info):
|
||||||
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
|
field = info.get(numeric_field)
|
||||||
|
if field is None or isinstance(field, compat_numeric_types):
|
||||||
|
continue
|
||||||
|
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||||
|
info[numeric_field] = int_or_none(field)
|
||||||
|
|
||||||
|
sanitize_string_field(info_dict, 'id')
|
||||||
|
sanitize_numeric_fields(info_dict)
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
|
@ -1435,6 +1455,8 @@ class YoutubeDL(object):
|
||||||
if 'url' not in format:
|
if 'url' not in format:
|
||||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||||
|
|
||||||
|
sanitize_string_field(format, 'format_id')
|
||||||
|
sanitize_numeric_fields(format)
|
||||||
format['url'] = sanitize_url(format['url'])
|
format['url'] = sanitize_url(format['url'])
|
||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
|
|
Loading…
Reference in a new issue