Fix detection of the webpage charset if it's declared using ' instead of "
Like in "<meta charset='utf-8'/>"
This commit is contained in:
parent
2891932bf0
commit
0d75ae2ce3
@ -150,7 +150,7 @@ class InfoExtractor(object):
|
||||
if m:
|
||||
encoding = m.group(1)
|
||||
else:
|
||||
m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
|
||||
m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
|
||||
webpage_bytes[:1024])
|
||||
if m:
|
||||
encoding = m.group(1).decode('ascii')
|
||||
|
Loading…
Reference in New Issue
Block a user