Merge pull request #2939 from codesparkle/upload-date-fix

No longer erroneously calculate upload_date within some extractors
This commit is contained in:
Sergey M. 2014-05-20 19:53:28 +07:00
commit 1a1826c1af
3 changed files with 20 additions and 30 deletions

View file

@ -1,7 +1,6 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -16,6 +15,7 @@ class AftonbladetIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
'description': 'Jupiters måne mest aktiv av alla himlakroppar', 'description': 'Jupiters måne mest aktiv av alla himlakroppar',
'timestamp': 1394142732,
'upload_date': '20140306', 'upload_date': '20140306',
}, },
} }
@ -27,17 +27,17 @@ class AftonbladetIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# find internal video meta data # find internal video meta data
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
internal_meta_id = self._html_search_regex( internal_meta_id = self._html_search_regex(
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
internal_meta_url = META_URL % internal_meta_id internal_meta_url = meta_url % internal_meta_id
internal_meta_json = self._download_json( internal_meta_json = self._download_json(
internal_meta_url, video_id, 'Downloading video meta data') internal_meta_url, video_id, 'Downloading video meta data')
# find internal video formats # find internal video formats
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
internal_video_id = internal_meta_json['videoId'] internal_video_id = internal_meta_json['videoId']
internal_formats_url = FORMATS_URL % internal_video_id internal_formats_url = format_url % internal_video_id
internal_formats_json = self._download_json( internal_formats_json = self._download_json(
internal_formats_url, video_id, 'Downloading video formats') internal_formats_url, video_id, 'Downloading video formats')
@ -54,16 +54,13 @@ class AftonbladetIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
upload_date = timestamp.strftime('%Y%m%d')
return { return {
'id': video_id, 'id': video_id,
'title': internal_meta_json['title'], 'title': internal_meta_json['title'],
'formats': formats, 'formats': formats,
'thumbnail': internal_meta_json['imageUrl'], 'thumbnail': internal_meta_json['imageUrl'],
'description': internal_meta_json['shortPreamble'], 'description': internal_meta_json['shortPreamble'],
'upload_date': upload_date, 'timestamp': internal_meta_json['timePublished'],
'duration': internal_meta_json['duration'], 'duration': internal_meta_json['duration'],
'view_count': internal_meta_json['views'], 'view_count': internal_meta_json['views'],
} }

View file

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime
import json import json
import re import re
@ -19,15 +18,16 @@ class BlinkxIE(InfoExtractor):
'file': '8aQUy7GV.mp4', 'file': '8aQUy7GV.mp4',
'md5': '2e9a07364af40163a908edbf10bb2492', 'md5': '2e9a07364af40163a908edbf10bb2492',
'info_dict': { 'info_dict': {
"title": "Police Car Rolls Away", 'title': 'Police Car Rolls Away',
"uploader": "stupidvideos.com", 'uploader': 'stupidvideos.com',
"upload_date": "20131215", 'upload_date': '20131215',
"description": "A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!", 'timestamp': 1387068000,
"duration": 14.886, 'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
"thumbnails": [{ 'duration': 14.886,
"width": 100, 'thumbnails': [{
"height": 76, 'width': 100,
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg", 'height': 76,
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
}], }],
}, },
} }
@ -41,9 +41,6 @@ class BlinkxIE(InfoExtractor):
'video=%s' % video_id) 'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id) data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0] data = json.loads(data_json)['api']['results'][0]
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
pload_date = dt.strftime('%Y%m%d')
duration = None duration = None
thumbnails = [] thumbnails = []
formats = [] formats = []
@ -64,10 +61,7 @@ class BlinkxIE(InfoExtractor):
vcodec = remove_start(m['vcodec'], 'ff') vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff') acodec = remove_start(m['acodec'], 'ff')
tbr = (int(m['vbr']) + int(m['abr'])) // 1000 tbr = (int(m['vbr']) + int(m['abr'])) // 1000
format_id = (u'%s-%sk-%s' % format_id = u'%s-%sk-%s' % (vcodec, tbr, m['w'])
(vcodec,
tbr,
m['w']))
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'url': m['link'], 'url': m['link'],
@ -88,7 +82,7 @@ class BlinkxIE(InfoExtractor):
'title': data['title'], 'title': data['title'],
'formats': formats, 'formats': formats,
'uploader': data['channel_name'], 'uploader': data['channel_name'],
'upload_date': pload_date, 'timestamp': data['pubdate_epoch'],
'description': data.get('description'), 'description': data.get('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': duration, 'duration': duration,

View file

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import datetime
from .common import InfoExtractor from .common import InfoExtractor
@ -19,6 +18,7 @@ class MailRuIE(InfoExtractor):
'id': '46301138', 'id': '46301138',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
'timestamp': 1393232740,
'upload_date': '20140224', 'upload_date': '20140224',
'uploader': 'sonypicturesrus', 'uploader': 'sonypicturesrus',
'uploader_id': 'sonypicturesrus@mail.ru', 'uploader_id': 'sonypicturesrus@mail.ru',
@ -43,7 +43,6 @@ class MailRuIE(InfoExtractor):
thumbnail = movie['poster'] thumbnail = movie['poster']
duration = movie['duration'] duration = movie['duration']
upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
view_count = video_data['views_count'] view_count = video_data['views_count']
formats = [ formats = [
@ -57,7 +56,7 @@ class MailRuIE(InfoExtractor):
'id': content_id, 'id': content_id,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'timestamp': video_data['timestamp'],
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'duration': duration, 'duration': duration,