/[suikacvs]/markup/html/whatpm/t/tokenizer/test1.test
Suika

Contents of /markup/html/whatpm/t/tokenizer/test1.test

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Mon Jul 16 07:03:09 2007 UTC (18 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +4 -0 lines
++ whatpm/t/ChangeLog	16 Jul 2007 07:03:04 -0000
	* HTML-tokenizer.t: It now requires newer version
	of JSON.pm due to the broken handling for utf8 string
	of older version.  Test data files |test3.test|
	and |test4.test| are added.  Convert surrogate
	pair |\u| escapes in JSON file before it is input
	into JSON.pm parser since it does not decode
	them correctly.  U+000D convertion was incorrect.

	* HTML-tree.t: Support for new format for fragment
	parsing tests.

	* Makefile (tokenizer-files): New test
	data files |test3.test| and |test4.test| are added.

	* tokenizer-test-1.test: Test data for bare |hcro| and
	entity references in attribute values are added.

	* tokenizer-test-2.dat: Test data for U+000D are added.

2007-07-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	16 Jul 2007 06:56:42 -0000
	* HTML.pm.src: The character immediately following
	a bare |hcro| was discarded.  Fix handling of
	entity references in attribute values.

2007-07-16  Wakaba  <wakaba@suika.fam.cx>

1 {"tests": [
2
3 {"description":"Correct Doctype lowercase",
4 "input":"<!DOCTYPE html>",
5 "output":[["DOCTYPE", "html", null, null, true]]},
6
7 {"description":"Correct Doctype uppercase",
8 "input":"<!DOCTYPE HTML>",
9 "output":[["DOCTYPE", "HTML", null, null, true]]},
10
11 {"description":"Correct Doctype mixed case",
12 "input":"<!DOCTYPE HtMl>",
13 "output":[["DOCTYPE", "HtMl", null, null, true]]},
14
15 {"description":"Truncated doctype start",
16 "input":"<!DOC>",
17 "output":["ParseError", ["Comment", "DOC"]]},
18
19 {"description":"Doctype in error",
20 "input":"<!DOCTYPE foo>",
21 "output":[["DOCTYPE", "foo", null, null, true]]},
22
23 {"description":"Single Start Tag",
24 "input":"<h>",
25 "output":[["StartTag", "h", {}]]},
26
27 {"description":"Empty end tag",
28 "input":"</>",
29 "output":["ParseError"]},
30
31 {"description":"Empty start tag",
32 "input":"<>",
33 "output":["ParseError", ["Character", "<>"]]},
34
35 {"description":"Start Tag w/attribute",
36 "input":"<h a='b'>",
37 "output":[["StartTag", "h", {"a":"b"}]]},
38
39 {"description":"Start Tag w/attribute no quotes",
40 "input":"<h a=b>",
41 "output":[["StartTag", "h", {"a":"b"}]]},
42
43 {"description":"Start/End Tag",
44 "input":"<h></h>",
45 "output":[["StartTag", "h", {}], ["EndTag", "h"]]},
46
47 {"description":"Two unclosed start tags",
48 "input":"<p>One<p>Two",
49 "output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
50
51 {"description":"End Tag w/attribute",
52 "input":"<h></h a='b'>",
53 "output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
54
55 {"description":"Multiple atts",
56 "input":"<h a='b' c='d'>",
57 "output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
58
59 {"description":"Multiple atts no space",
60 "input":"<h a='b'c='d'>",
61 "output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
62
63 {"description":"Repeated attr",
64 "input":"<h a='b' a='d'>",
65 "output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
66
67 {"description":"Simple comment",
68 "input":"<!--comment-->",
69 "output":[["Comment", "comment"]]},
70
71 {"description":"Comment, Central dash no space",
72 "input":"<!----->",
73 "output":["ParseError", ["Comment", "-"]]},
74
75 {"description":"Comment, two central dashes",
76 "input":"<!-- --comment -->",
77 "output":["ParseError", ["Comment", " --comment "]]},
78
79 {"description":"Unfinished comment",
80 "input":"<!--comment",
81 "output":["ParseError", ["Comment", "comment"]]},
82
83 {"description":"Start of a comment",
84 "input":"<!-",
85 "output":["ParseError", ["Comment", "-"]]},
86
87 {"description":"Short comment",
88 "input":"<!-->",
89 "output":["ParseError", ["Comment", ""]]},
90
91 {"description":"Short comment two",
92 "input":"<!--->",
93 "output":["ParseError", ["Comment", ""]]},
94
95 {"description":"Short comment three",
96 "input":"<!---->",
97 "output":[["Comment", ""]]},
98
99
100 {"description":"Ampersand EOF",
101 "input":"&",
102 "output":[["Character", "&"]]},
103
104 {"description":"Ampersand ampersand EOF",
105 "input":"&&",
106 "output":[["Character", "&&"]]},
107
108 {"description":"Ampersand space EOF",
109 "input":"& ",
110 "output":[["Character", "& "]]},
111
112 {"description":"Unfinished entity",
113 "input":"&f",
114 "output":["ParseError", ["Character", "&f"]]},
115
116 {"description":"Ampersand, number sign",
117 "input":"&#",
118 "output":["ParseError", ["Character", "&#"]]},
119
120 {"description":"Unfinished numeric entity",
121 "input":"&#x",
122 "output":["ParseError", ["Character", "&#x"]]},
123
124 {"description":"Entity with trailing semicolon (1)",
125 "input":"I'm &not;it",
126 "output":[["Character","I'm ¬it"]]},
127
128 {"description":"Entity with trailing semicolon (2)",
129 "input":"I'm &notin;",
130 "output":[["Character","I'm ∉"]]},
131
132 {"description":"Entity without trailing semicolon (1)",
133 "input":"I'm &notit",
134 "output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
135
136 {"description":"Entity without trailing semicolon (2)",
137 "input":"I'm &notin",
138 "output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},
139
140 {"description":"Partial entity match at end of file",
141 "input":"I'm &no",
142 "output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
143
144 {"description":"ASCII decimal entity",
145 "input":"&#0036;",
146 "output":[["Character","$"]]},
147
148 {"description":"ASCII hexadecimal entity",
149 "input":"&#x3f;",
150 "output":[["Character","?"]]},
151
152 {"description":"Hexadecimal entity in attribute",
153 "input":"<h a='&#x3f;'></h>",
154 "output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
155
156 {"description":"Entity in attribute without semicolon ending in x",
157 "input":"<h a='&notx'>",
158 "output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
159
160 {"description":"Entity in attribute without semicolon ending in 1",
161 "input":"<h a='&not1'>",
162 "output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
163
164 {"description":"Entity in attribute without semicolon ending in i",
165 "input":"<h a='&noti'>",
166 "output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
167
168 {"description":"Entity in attribute without semicolon",
169 "input":"<h a='&COPY'>",
170 "output":["ParseError", ["StartTag", "h", {"a":"©"}]]}
171
172 ]}

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24