{"tests": [ {"description":"", "input":"", "output":["ParseError",["DOCTYPE", null, null, null, false]]}, {"description":"", "input":"", "output":[["DOCTYPE", "h", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "H", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "ht", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "Ht", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "hT", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "hT", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "htm", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "Htm", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "hTM", null, null, true]]}, {"description":"", "input":"", "output":[["DOCTYPE", "html5", null, null, true]]}, {"description":"PUBLIC\u0022\u0022", "input":"", "output":[["DOCTYPE", "html", "", null, true]]}, {"description":"PUBLIC''", "input":"", "output":[["DOCTYPE", "html", "", null, true]]}, {"description":"PUBLICbogus", "input":"", "output":["ParseError",["DOCTYPE", "html", null, null, false]]}, {"description":"PUBLIC bogus", "input":"", "output":["ParseError",["DOCTYPE", "html", null, null, false]]}, {"description":"PUBLIC \u0022\u0022bogus", "input":"", "output":["ParseError",["DOCTYPE", "html", "", null, false]]}, {"description":"PUBLIC \u0022\u0022 bogus", "input":"", "output":["ParseError",["DOCTYPE", "html", "", null, false]]}, {"description":"PUBLIC \u0022\u0022 \u0022\u0022bogus", "input":"", "output":["ParseError",["DOCTYPE", "html", "", "", true]]}, {"description":"PUBLIC \u0022\u0022 \u0022\u0022 bogus", "input":"", "output":["ParseError",["DOCTYPE", "html", "", "", true]]}, {"description":"PUBLIC \u0022\u0022\u0022\u0022>", "input":"", "output":[["DOCTYPE", "html", "", "", true]]}, {"description":"PUBLIC \u0022\u0022''>", "input":"", "output":[["DOCTYPE", "html", "", "", true]]}, {"description":"SYSTEM a", "input":"", "output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, {"description": "garbage after SYSTEM literal", "input": "", "output": ["ParseError", ["DOCTYPE", "html", null, "a", true]]}, {"description": "garbage after PUBLIC and SYSTEM literal", "input": "", "output": ["ParseError", ["DOCTYPE", "html", "c", "a", true]]}, {"description": "garbage immediately after SYSTEM literal", "input": "", "output": ["ParseError", ["DOCTYPE", "html", null, "a", true]]}, {"description": "garbage immediately after PUBLIC and SYSTEM literal", "input": "", "output": ["ParseError", ["DOCTYPE", "html", "c", "a", true]]}, {"description": "", "input": "", "output": ["ParseError", ["Comment", "D"]]}, {"description": "", "input": "", "output": ["ParseError", ["Comment", "DO"]]}, {"description": "", "input": "", "output": ["ParseError", ["Comment", "DOc"]]}, {"description": "", "input": "", "output": ["ParseError", ["Comment", "dOcT"]]}, {"description": "", "input": "", "output": ["ParseError", ["Comment", "doctyp!"]]}, {"description": "", "input": "", "output": ["ParseError", ["Comment", "Doc?"]]}, {"description": ">", "input": ">", "output": ["ParseError", ["Comment", "Doc"]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description": "", "input": "", "output": ["ParseError", ["DOCTYPE", "HTML", null, null, false]]}, {"description":"", "input":"", "output":[["Comment", "-x"]]}, {"description":"", "input":"", "output":["ParseError", ["Comment", "--x"]]}, {"description":"non-PCDATA content model flag, start tag-like string", "contentModelFlags":["PLAINTEXT","RCDATA","CDATA"], "input":"
", "output":[["Character", "
"]]}, {"description":"end tag followed by character data", "contentModelFlags":["RCDATA","CDATA"], "lastStartTag":"xxx", "input":"ABCD", "output":[["Character", "AB"],["EndTag", "xxx"],["Character", "CD"]]}, {"description":"NULL character reference", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"NULL character reference", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"character entity ", "input":" ", "output":["ParseError", ["Character", "\u000A"]]}, {"description":"character entity ", "input":" ", "output":["ParseError", ["Character", "\u000A"]]}, {"description":"character entities ", "input":" ", "output":["ParseError", ["Character", "\u000A\u000A"]]}, {"description":"character entity followed by a LF", "input":" \u000A", "output":["ParseError", ["Character", "\u000A\u000A"]]}, {"description":"c1 character references", "input":"€‚ƒ„…†‡", "output":[ "ParseError", ["Character", "\u20AC"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u201A"], "ParseError", ["Character", "\u0192"], "ParseError", ["Character", "\u201E"], "ParseError", ["Character", "\u2026"], "ParseError", ["Character", "\u2020"], "ParseError", ["Character", "\u2021"] ]}, {"description":"c1 character references", "input":"ˆ‰Š‹ŒŽ", "output":[ "ParseError", ["Character", "\u02C6"], "ParseError", ["Character", "\u2030"], "ParseError", ["Character", "\u0160"], "ParseError", ["Character", "\u2039"], "ParseError", ["Character", "\u0152"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u017D"], "ParseError", ["Character", "\uFFFD"] ]}, {"description":"c1 character references", "input":"‘’“”•–—", "output":[ "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u2018"], "ParseError", ["Character", "\u2019"], "ParseError", ["Character", "\u201C"], "ParseError", ["Character", "\u201D"], "ParseError", ["Character", "\u2022"], "ParseError", ["Character", "\u2013"], "ParseError", ["Character", "\u2014"] ]}, {"description":"c1 character references", "input":"˜™š›œžŸ", "output":[ "ParseError", ["Character", "\u02DC"], "ParseError", ["Character", "\u2122"], "ParseError", ["Character", "\u0161"], "ParseError", ["Character", "\u203A"], "ParseError", ["Character", "\u0153"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u017E"], "ParseError", ["Character", "\u0178"] ]}, {"description":"c1 character references", "input":"€‚ƒ„…†‡", "output":[ "ParseError", ["Character", "\u20AC"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u201A"], "ParseError", ["Character", "\u0192"], "ParseError", ["Character", "\u201E"], "ParseError", ["Character", "\u2026"], "ParseError", ["Character", "\u2020"], "ParseError", ["Character", "\u2021"] ]}, {"description":"c1 character references", "input":"ˆ‰Š‹ŒŽ", "output":[ "ParseError", ["Character", "\u02C6"], "ParseError", ["Character", "\u2030"], "ParseError", ["Character", "\u0160"], "ParseError", ["Character", "\u2039"], "ParseError", ["Character", "\u0152"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u017D"], "ParseError", ["Character", "\uFFFD"] ]}, {"description":"c1 character references", "input":"‘’“”•–—", "output":[ "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u2018"], "ParseError", ["Character", "\u2019"], "ParseError", ["Character", "\u201C"], "ParseError", ["Character", "\u201D"], "ParseError", ["Character", "\u2022"], "ParseError", ["Character", "\u2013"], "ParseError", ["Character", "\u2014"] ]}, {"description":"c1 character references", "input":"˜™š›œžŸ", "output":[ "ParseError", ["Character", "\u02DC"], "ParseError", ["Character", "\u2122"], "ParseError", ["Character", "\u0161"], "ParseError", ["Character", "\u203A"], "ParseError", ["Character", "\u0153"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\u017E"], "ParseError", ["Character", "\u0178"] ]}, {"description":"Numeric entity representing a Windows-1252 'codepoint'", "input":"‰", "output":["ParseError", ["Character", "\u2030"]]}, {"description":"Hexadecimal entity representing a Windows-1252 'codepoint'", "input":"‰", "output":["ParseError", ["Character", "\u2030"]]}, {"description":"surrogate character reference", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"surrogate character references", "input":"��", "output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]}, {"description":"surrogate character reference", "input":"\uD800�", "output":[["Character", "\uD800"], "ParseError", ["Character", "\uFFFD"]]}, {"description":"surrogate character reference", "input":"�\uDFFF", "output":["ParseError", ["Character", "\uFFFD\uDFFF"]]}, {"description":"surrogate character reference", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"surrogate character reference", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"non-Unicode character reference", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"non-Unicode character reference", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"Broken hcro (&#x)", "input":"&#x", "output":["ParseError", ["Character", "&#x"]]}, {"description":"Broken hcro (&#X)", "input":"&#X", "output":["ParseError", ["Character", "&#X"]]}, {"description":"Broken hcro (&#x;)", "input":"&#x;", "output":["ParseError", ["Character", "&#x;"]]}, {"description":"Broken hcro (&#X;)", "input":"&#X;", "output":["ParseError", ["Character", "&#X;"]]}, {"description":"Broken hcro (&#xg)", "input":"&#xg", "output":["ParseError", ["Character", "&#xg"]]}, {"description":"Broken hcro (&#Xg)", "input":"&#Xg", "output":["ParseError", ["Character", "&#Xg"]]}, {"description":"Broken hcro (&#xg;)", "input":"&#xg;", "output":["ParseError", ["Character", "&#xg;"]]}, {"description":"Broken hcro (&#Xg;)", "input":"&#Xg;", "output":["ParseError", ["Character", "&#Xg;"]]}, {"description":"Broken hcro (&#x!)", "input":"&#x!", "output":["ParseError", ["Character", "&#x!"]]}, {"description":"Broken hcro (&#X!)", "input":"&#X!", "output":["ParseError", ["Character", "&#X!"]]}, {"description":"NULL character", "input":"\u0000", "output":["ParseError",["Character", "\uFFFD"]]}, {"description":"™", "input":"™", "output":[["Character", "\u2122"]]}, {"description":"⟨", "input":"⟨", "output":[["Character", "\u27E8"]]}, {"description":"⟩", "input":"⟩", "output":[["Character", "\u27E9"]]}, {"description":"&;", "input":"&;", "output":[["Character", "&;"]]}, {"description":"&HT", "input":"&\u0009", "output":[["Character", "&\u0009"]]}, {"description":"&LF", "input":"&\u000A", "output":[["Character", "&\u000A"]]}, {"description":"&VT", "input":"&\u000B", "output":[["Character", "&\u000B"]]}, {"description":"&FF", "input":"&\u000C", "output":[["Character", "&\u000C"]]}, {"description":"&SP", "input":"& ", "output":[["Character", "& "]]}, {"description":"&<", "input":"&<", "output":[["Character", "&"], "ParseError", ["Character", "<"]]}, {"description":"&

", "input":"&

", "output":[["Character", "&"], ["StartTag", "p", {}]]}, {"description":"&&", "input":"&&", "output":[["Character", "&&"]]}, {"description":"&&", "input":"&&", "output":[["Character", "&&"]]}, {"description":"&", "input":"&", "output":[["Character", "&"]]}, {"description":"named character reference w/o refc", "input":"&abc", "output":["ParseError", ["Character", "&abc"]]}, {"description":"named character reference", "input":"&abc", "output":[["Character", "&abc"]]}, {"description":"named character reference w/o refc", "input":"&abc", "output":["ParseError", ["Character", "&abc"]]}, {"description":"unknown named character reference", "input":"&abc;", "output":["ParseError", ["Character", "&abc;"]]}, {"description":"named character reference w/o refc in attr", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "&abc"}]]}, {"description":"named character reference in attr", "input":"", "output":[["StartTag", "a", {"href": "&abc"}]]}, {"description":"named character reference w/o refc in attr", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "&abc"}]]}, {"description":"unknown named character reference in attr", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "&abc;"}]]}, {"description":"entity w/o refc at the end of unterminated attribute value", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "\u00A9"}]]}, {"description":"entity w/refc at the end of attribute value", "input":"", "output":[["StartTag", "a", {"href": "\u00A9"}]]}, {"description":"entity w/o refc href='©!'", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "\u00A9!"}]]}, {"description":"entity w/o refc href='©=ok'", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "\u00A9=ok"}]]}, {"description":"entity w/o refc href='©right'", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "©right"}]]}, {"description":"entity w/o refc href='©right;'", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "©right;"}]]}, {"description":"entity w/o refc href='©rightc'", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "©rightc"}]]}, {"description":"entity w/o refc href='©rightcd'", "input":"", "output":["ParseError", ["StartTag", "a", {"href": "©rightcd"}]]}, {"description":"cdata end tag containing <", "contentModelFlags":["RCDATA","CDATA"], "lastStartTag":"xxx", "input":"", "output":[["Character", ""]]}, {"description":"< in tag name state", "input":"", "output":[["StartTag", "p", "output":[["StartTag", "p", {"", "output":[["StartTag", "p", {"align", "output":[["StartTag", "p", {"align":"","", "output":[["StartTag", "p", {"align":"", "output":[["StartTag", "p", {"align":"left", "output": ["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"''b": ""}]]}, {"description":"/\"\"b=\"\"", "input": "", "output": ["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"\"\"b": ""}]]}, {"description":"\"b=\"\"", "input": "", "output": ["ParseError", ["StartTag", "a", {"\"b": ""}]]}, {"description":"\"\"b=\"\"", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"\"\"b": ""}]]}, {"description":"'b=\"\"", "input": "", "output": ["ParseError", ["StartTag", "a", {"'b": ""}]]}, {"description":"''b=\"\"", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"''b": ""}]]}, {"description":"/b=\"\"", "input": "", "output": ["ParseError", ["StartTag", "a", {"b": ""}]]}, {"description":"b/=\"\"", "input": "", "output": ["ParseError", "ParseError", "ParseError", "ParseError", ["StartTag", "a", {"b": "", "=\"\"": ""}]]}, {"description":"b/c=\"\"", "input": "", "output": ["ParseError", ["StartTag", "a", {"b": "", "c": ""}]]}, {"description":"bc[NULL]=\"\"", "input": "", "output": ["ParseError", ["StartTag", "a", {"bc\uFFFD": ""}]]}, {"description":"b[NULL]c=\"\"", "input": "", "output": ["ParseError", ["StartTag", "a", {"b\uFFFDc": ""}]]}, {"description":"[NULL]bc=\"\"", "input": "", "output": ["ParseError", ["StartTag", "a", {"\uFFFDbc": ""}]]}, {"description": "", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "span", {"=": "="}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "span", {"a": "="}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "span", {"=": "a"}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "span", {"a": "b="}]]}, {"description": "", "input": "", "output": ["ParseError", "ParseError", "ParseError", ["StartTag", "span", {"=x\"": "x>"}]]}, {"description": "", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "span", {"=x\"": "x"}]]}, {"description":"&\"", "input": "&\"", "output": ["ParseError", ["Character", "&\""]]}, {"description":"&'", "input": "&'", "output": ["ParseError", ["Character", "&'"]]}, {"description":"", "input": "", "output": [["StartTag", "a", {"href": "&"}]]}, {"description":"", "input": "", "output": ["ParseError", ["StartTag", "a", {"href": "&'"}]]}, {"description":"", "input": "", "output": [["StartTag", "a", {"href": "&"}]]}, {"description":"", "input": "", "output": ["ParseError", ["StartTag", "a", {"href": "&\""}]]}, {"description":"", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"href": "&\""}]]}, {"description":"", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"href": "&'"}]]}, {"description": "-->", "input": "-->", "output": ["ParseError", ["Comment", "-"], ["Character", "-->"]]}, {"description": "-->", "input": "-->", "output": ["ParseError", ["Comment", "!---"], ["Character", "-->"]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"href": "", "rel": ""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"href": "", "rel": ""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"href": "", "rel": ""}]]}, {"description": "", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"href": "", "rel": ""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "link", {"href": "", "rel": ""}]]}, {"description": "permitted slash after a space", "input": "
", "output": [["StartTag", "br", {}]]}, {"description": "slash in tag after a space", "input": "
", "output": ["ParseError", ["StartTag", "br", {}]]}, {"description": "non-permitted slash after a space", "input": "", "output": ["ParseError", ["StartTag", "b", {}]]}, {"description": "end tag (non-)permitted slash after a space", "input": "
", "output": ["ParseError", ["EndTag", "br"]]}, {"description": "permitted slash after attribute name", "input": "
", "output": [["StartTag", "br", {"class": ""}]]}, {"description": "slash in tag after attribute name", "input": "
", "output": ["ParseError", ["StartTag", "br", {"class": ""}]]}, {"description": "non permitted slash after attribute name", "input": "", "output": ["ParseError", ["StartTag", "b", {"class": ""}]]}, {"description": "end tag (non-)permitted slash after attribute name", "input": "
", "output": ["ParseError", "ParseError", ["EndTag", "br"]]}, {"description": "permitted slash in after attribute name state", "input": "
", "output": [["StartTag", "br", {"class": ""}]]}, {"description": "non permitted slash in after attribute name state", "input": "", "output": ["ParseError", ["StartTag", "b", {"class": ""}]]}, {"description": "end tag (non-)permitted slash in after attribute name state", "input": "
", "output": ["ParseError", "ParseError", ["EndTag", "br"]]}, {"description": "
", "input": "", "output": ["ParseError", ["StartTag", "a", {"\"": ""}]]}, {"description": "", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"\"\"": ""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"'": ""}]]}, {"description": "", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"''": ""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"a\"": ""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"a'": ""}]]}, {"description": "", "input": "", "output": ["ParseError", "ParseError", ["StartTag", "a", {"a\"'": ""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"a": ">"}]]}, {"description": "", "output": ["ParseError", ["StartTag", "a", {"a": ">"}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"a": "a\""}]]}, {"description": "", "input": "", "output": ["ParseError", ["StartTag", "a", {"a": "a'"}]]}, {"description": "", "input": "", "output": [["StartTag", "a", {"a": ""}]]}, {"description": "end tag attribute (before attribute, >)", "input": "

", "output": ["ParseError", ["EndTag", "p"]]}, {"description": "end tag attribute (before attribute, EOF)", "input": "

", "output": [["EndTag", "p"]]}, {"description": "end tag not closed (attribute name)", "input": "

", "output": ["ParseError", ["EndTag", "p"]]}, {"description": "end tag attributes uc (after attribute name)", "input": "

", "output": ["ParseError", ["EndTag", "p"]]}, {"description": "end tag attributes lc (after attribute name)", "input": "

", "output": ["ParseError", ["EndTag", "p"]]}, {"description": "end tag not closed (after attribute name)", "input": "

", "output": ["ParseError", ["EndTag", "p"]]}, {"description": "end tag not closed (before attribute value)", "input": "

", "output": ["ParseError", ["EndTag", "p"]]}, {"description": "end tag not closed (after attribute value double quoted)", "input": "