[extractor/common] Skip html comment tags (Closes #6822)

This commit is contained in:
Sergey M․ 2015-09-11 21:07:32 +06:00
parent 73eb13dfc7
commit 586f1cc532

View file

@ -731,6 +731,7 @@ class InfoExtractor(object):
@staticmethod
def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
hidden_inputs = {}
for input in re.findall(r'(?i)<input([^>]+)>', html):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):