lezer-parser / html

An HTML parser for Lezer
MIT License
13 stars 10 forks source link

UnquotedAttributeValue consumes slash of EndTag #14

Closed milahu closed 6 months ago

milahu commented 6 months ago
<hr singlequoted='val' doublequoted="val" unquoted=val noval= bool/>

actual

node 10 = StartTag                  : "<"                            : "<"
node 22 = TagName                   : "hr"                           : "hr"
node 24 = AttributeName             : "singlequoted"                 : " singlequoted"
node 25 = Is                        : "="                            : "="
node 26 = AttributeValue            : "'val'"                        : "'val'"
node 24 = AttributeName             : "doublequoted"                 : " doublequoted"
node 25 = Is                        : "="                            : "="
node 26 = AttributeValue            : "\"val\""                      : "\"val\""
node 24 = AttributeName             : "unquoted"                     : " unquoted"
node 25 = Is                        : "="                            : "="
node 27 = UnquotedAttributeValue    : "val"                          : "val"
node 24 = AttributeName             : "noval"                        : " noval"
node 25 = Is                        : "="                            : "="
node 27 = UnquotedAttributeValue    : "bool/"                        : " bool/"
node  4 = EndTag                    : ">"                            : ">"
node 16 = Text                      : "\n"                           : "\n"

expected

node 27 = UnquotedAttributeValue    : "bool"                         : " bool"
node  4 = EndTag                    : "/>"                           : "/>"
marijnh commented 6 months ago

This seems to align with how browsers interpret this, so it seems correct.

document.body.innerHTML = "<div a=foo/></div>" 
document.body.firstChild.getAttribute("a")
→ "foo/"