Merge remote-tracking branch 'anisse/fix-content-encoding-charset'
This commit is contained in:
commit
aafddb2b0a
1 changed files with 4 additions and 1 deletions
|
@ -251,7 +251,10 @@ class InfoExtractor(object):
|
||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb') as outf:
|
||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
|
||||||
|
try:
|
||||||
content = webpage_bytes.decode(encoding, 'replace')
|
content = webpage_bytes.decode(encoding, 'replace')
|
||||||
|
except LookupError:
|
||||||
|
content = webpage_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
if (u'<title>Access to this site is blocked</title>' in content and
|
if (u'<title>Access to this site is blocked</title>' in content and
|
||||||
u'Websense' in content[:512]):
|
u'Websense' in content[:512]):
|
||||||
|
|
Loading…
Reference in a new issue