Merge remote-tracking branch 'anisse/fix-content-encoding-charset'

This commit is contained in:
Philipp Hagemeister 2014-04-07 23:27:03 +02:00
commit aafddb2b0a

View file

@ -251,7 +251,10 @@ class InfoExtractor(object):
with open(filename, 'wb') as outf: with open(filename, 'wb') as outf:
outf.write(webpage_bytes) outf.write(webpage_bytes)
try:
content = webpage_bytes.decode(encoding, 'replace') content = webpage_bytes.decode(encoding, 'replace')
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if (u'<title>Access to this site is blocked</title>' in content and if (u'<title>Access to this site is blocked</title>' in content and
u'Websense' in content[:512]): u'Websense' in content[:512]):