Hash :
149c04c0
Author :
Date :
2025-08-02T14:59:02
html: Escape < and > when serializing attributes This reverts the change in cdaf657f. Coincidentally, the HTML spec just changed to mandate the old escaping behavior: https://github.com/whatwg/html/issues/6235 Fixes #957.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
#!/usr/bin/env python3
entities = [
[ '', '�' ],
[ '\t', '	' ],
[ '\n', ' ' ],
[ '\r', ' ' ],
[ '"', '"' ],
[ '&', '&' ],
[ '<', '<' ],
[ '>', '>' ],
]
offset = [ None ] * 128
def gen_content(out):
pos = 0
r = ''
for rec in entities:
char, repl = rec
if char:
offset[ord(char)] = pos
if pos % 12 == 0: r += '\n '
else: r += ' '
r += '%3d,' % len(repl)
pos += 1
for c in repl:
if pos % 12 == 0: r += '\n '
else: r += ' '
r += "'%s'," % c
pos += 1
out.write('static const char xmlEscapeContent[] = {%s\n};\n\n' % r)
def gen_tab(out, name, escape, is_xml):
r = ''
for i in range(0x80):
if chr(i) in escape:
v = offset[i]
elif i == 0:
v = 0
elif is_xml and i < 32 and i != 9 and i != 10:
v = 0
else:
v = -1
if i % 16 == 0: r += '\n '
else: r += ' '
r += '%2d,' % v
out.write('static const signed char %s[128] = {%s\n};\n\n' % (name, r))
with open('codegen/escape.inc', 'w') as out:
gen_content(out)
gen_tab(out, 'xmlEscapeTab', '\r&<>', True)
gen_tab(out, 'xmlEscapeTabQuot', '\r"&<>', True)
gen_tab(out, 'xmlEscapeTabAttr', '\t\n\r"&<>', True)
out.write('#ifdef LIBXML_HTML_ENABLED\n\n')
gen_tab(out, 'htmlEscapeTab', '&<>', False)
gen_tab(out, 'htmlEscapeTabAttr', '"&<>', False)
out.write('#endif /* LIBXML_HTML_ENABLED */\n')