t/tokenizer/test1.test

{"tests": [

{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
"output":[["DOCTYPE", "html", null, null, true]]},

{"description":"Correct Doctype uppercase",
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "HTML", null, null, true]]},

{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>", 
"output":[["DOCTYPE", "HtMl", null, null, true]]},

{"description":"Truncated doctype start",
"input":"<!DOC>", 
"output":["ParseError", ["Comment", "DOC"]]},

{"description":"Doctype in error",
"input":"<!DOCTYPE foo>", 
"output":[["DOCTYPE", "foo", null, null, true]]},

{"description":"Single Start Tag",
"input":"<h>",
"output":[["StartTag", "h", {}]]},

{"description":"Empty end tag",
"input":"</>",
"output":["ParseError"]},

{"description":"Empty start tag",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},

{"description":"Start Tag w/attribute",
"input":"<h a='b'>",
"output":[["StartTag", "h", {"a":"b"}]]},

{"description":"Start Tag w/attribute no quotes",
"input":"<h a=b>",
"output":[["StartTag", "h", {"a":"b"}]]},

{"description":"Start/End Tag",
"input":"<h></h>",
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},

{"description":"Two unclosed start tags",
"input":"<p>One<p>Two",
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},

{"description":"End Tag w/attribute",
"input":"<h></h a='b'>",
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},

{"description":"Multiple atts",
"input":"<h a='b' c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},

{"description":"Multiple atts no space",
"input":"<h a='b'c='d'>",
"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},

{"description":"Repeated attr",
 "input":"<h a='b' a='d'>",
 "output":["ParseError", ["StartTag", "h", {"a":"b"}]]},

{"description":"Simple comment",
 "input":"<!--comment-->",
 "output":[["Comment", "comment"]]},

{"description":"Comment, Central dash no space",
 "input":"<!----->",
 "output":["ParseError", ["Comment", "-"]]},

{"description":"Comment, two central dashes",
"input":"<!-- --comment -->",
"output":["ParseError", ["Comment", " --comment "]]},

{"description":"Unfinished comment",
"input":"<!--comment",
"output":["ParseError", ["Comment", "comment"]]},

{"description":"Start of a comment",
"input":"<!-",
"output":["ParseError", ["Comment", "-"]]},

{"description":"Short comment",
 "input":"<!-->",
 "output":["ParseError", ["Comment", ""]]},

{"description":"Short comment two",
 "input":"<!--->",
 "output":["ParseError", ["Comment", ""]]},

{"description":"Short comment three",
 "input":"<!---->",
 "output":[["Comment", ""]]},


{"description":"Ampersand EOF",
"input":"&",
"output":[["Character", "&"]]},

{"description":"Ampersand ampersand EOF",
"input":"&&",
"output":[["Character", "&&"]]},

{"description":"Ampersand space EOF",
"input":"& ",
"output":[["Character", "& "]]},

{"description":"Unfinished entity",
"input":"&f",
"output":["ParseError", ["Character", "&f"]]},

{"description":"Ampersand, number sign",
"input":"&#",
"output":["ParseError", ["Character", "&#"]]},

{"description":"Unfinished numeric entity",
"input":"&#x",
"output":["ParseError", ["Character", "&#x"]]},

{"description":"Entity with trailing semicolon (1)",
"input":"I'm &not;it",
"output":[["Character","I'm ¬it"]]},

{"description":"Entity with trailing semicolon (2)",
"input":"I'm &notin;",
"output":[["Character","I'm ∉"]]},

{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
"output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},

{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",
"output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},

{"description":"Partial entity match at end of file",
"input":"I'm &no",
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},

{"description":"ASCII decimal entity",
"input":"&#0036;",
"output":[["Character","$"]]},

{"description":"ASCII hexadecimal entity",
"input":"&#x3f;",
"output":[["Character","?"]]},

{"description":"Hexadecimal entity in attribute",
"input":"<h a='&#x3f;'></h>",
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},

{"description":"Entity in attribute without semicolon ending in x",
"input":"<h a='&notx'>",
"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},

{"description":"Entity in attribute without semicolon ending in 1",
"input":"<h a='&not1'>",
"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},

{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},

{"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}

]}
1	wakaba	1.1	{"tests": [
2
3			{"description":"Correct Doctype lowercase",
4			"input":"<!DOCTYPE html>",
5	wakaba	1.2	"output":[["DOCTYPE", "html", null, null, true]]},
6	wakaba	1.1
7			{"description":"Correct Doctype uppercase",
8	wakaba	1.2	"input":"<!DOCTYPE HTML>",
9			"output":[["DOCTYPE", "HTML", null, null, true]]},
10	wakaba	1.1
11			{"description":"Correct Doctype mixed case",
12			"input":"<!DOCTYPE HtMl>",
13	wakaba	1.2	"output":[["DOCTYPE", "HtMl", null, null, true]]},
14	wakaba	1.1
15			{"description":"Truncated doctype start",
16			"input":"<!DOC>",
17			"output":["ParseError", ["Comment", "DOC"]]},
18
19			{"description":"Doctype in error",
20			"input":"<!DOCTYPE foo>",
21	wakaba	1.2	"output":[["DOCTYPE", "foo", null, null, true]]},
22	wakaba	1.1
23			{"description":"Single Start Tag",
24			"input":"<h>",
25			"output":[["StartTag", "h", {}]]},
26
27			{"description":"Empty end tag",
28			"input":"</>",
29			"output":["ParseError"]},
30
31			{"description":"Empty start tag",
32			"input":"<>",
33			"output":["ParseError", ["Character", "<>"]]},
34
35			{"description":"Start Tag w/attribute",
36			"input":"<h a='b'>",
37			"output":[["StartTag", "h", {"a":"b"}]]},
38
39			{"description":"Start Tag w/attribute no quotes",
40			"input":"<h a=b>",
41			"output":[["StartTag", "h", {"a":"b"}]]},
42
43			{"description":"Start/End Tag",
44			"input":"<h></h>",
45			"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
46
47			{"description":"Two unclosed start tags",
48			"input":"<p>One<p>Two",
49			"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
50
51			{"description":"End Tag w/attribute",
52			"input":"<h></h a='b'>",
53			"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
54
55			{"description":"Multiple atts",
56			"input":"<h a='b' c='d'>",
57			"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
58
59			{"description":"Multiple atts no space",
60			"input":"<h a='b'c='d'>",
61	wakaba	1.4	"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
62	wakaba	1.1
63			{"description":"Repeated attr",
64			"input":"<h a='b' a='d'>",
65			"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
66
67			{"description":"Simple comment",
68			"input":"<!--comment-->",
69			"output":[["Comment", "comment"]]},
70
71			{"description":"Comment, Central dash no space",
72			"input":"<!----->",
73			"output":["ParseError", ["Comment", "-"]]},
74
75			{"description":"Comment, two central dashes",
76			"input":"<!-- --comment -->",
77			"output":["ParseError", ["Comment", " --comment "]]},
78
79			{"description":"Unfinished comment",
80			"input":"<!--comment",
81			"output":["ParseError", ["Comment", "comment"]]},
82
83			{"description":"Start of a comment",
84			"input":"<!-",
85			"output":["ParseError", ["Comment", "-"]]},
86
87	wakaba	1.2	{"description":"Short comment",
88			"input":"<!-->",
89			"output":["ParseError", ["Comment", ""]]},
90
91			{"description":"Short comment two",
92			"input":"<!--->",
93			"output":["ParseError", ["Comment", ""]]},
94
95			{"description":"Short comment three",
96			"input":"<!---->",
97			"output":[["Comment", ""]]},
98
99
100			{"description":"Ampersand EOF",
101	wakaba	1.1	"input":"&",
102	wakaba	1.2	"output":[["Character", "&"]]},
103
104			{"description":"Ampersand ampersand EOF",
105			"input":"&&",
106			"output":[["Character", "&&"]]},
107
108			{"description":"Ampersand space EOF",
109			"input":"& ",
110			"output":[["Character", "& "]]},
111	wakaba	1.1
112			{"description":"Unfinished entity",
113			"input":"&f",
114	wakaba	1.2	"output":["ParseError", ["Character", "&f"]]},
115	wakaba	1.1
116			{"description":"Ampersand, number sign",
117			"input":"&#",
118	wakaba	1.2	"output":["ParseError", ["Character", "&#"]]},
119	wakaba	1.1
120			{"description":"Unfinished numeric entity",
121			"input":"&#x",
122			"output":["ParseError", ["Character", "&#x"]]},
123
124			{"description":"Entity with trailing semicolon (1)",
125			"input":"I'm ¬it",
126			"output":[["Character","I'm ¬it"]]},
127
128			{"description":"Entity with trailing semicolon (2)",
129			"input":"I'm ∉",
130			"output":[["Character","I'm ∉"]]},
131
132			{"description":"Entity without trailing semicolon (1)",
133			"input":"I'm &notit",
134	wakaba	1.2	"output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
135	wakaba	1.1
136			{"description":"Entity without trailing semicolon (2)",
137			"input":"I'm &notin",
138	wakaba	1.2	"output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},
139	wakaba	1.1
140			{"description":"Partial entity match at end of file",
141			"input":"I'm &no",
142			"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
143
144			{"description":"ASCII decimal entity",
145			"input":"$",
146			"output":[["Character","$"]]},
147
148			{"description":"ASCII hexadecimal entity",
149			"input":"?",
150			"output":[["Character","?"]]},
151
152			{"description":"Hexadecimal entity in attribute",
153			"input":"<h a='?'></h>",
154	wakaba	1.2	"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
155
156			{"description":"Entity in attribute without semicolon ending in x",
157			"input":"<h a='&notx'>",
158			"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
159
160			{"description":"Entity in attribute without semicolon ending in 1",
161			"input":"<h a='&not1'>",
162			"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
163
164	wakaba	1.3	{"description":"Entity in attribute without semicolon ending in i",
165			"input":"<h a='&noti'>",
166			"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
167
168	wakaba	1.2	{"description":"Entity in attribute without semicolon",
169			"input":"<h a='&COPY'>",
170			"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}
171	wakaba	1.1
172			]}