/[suikacvs]/markup/html/whatpm/t/tokenizer/test4.test
Suika

Contents of /markup/html/whatpm/t/tokenizer/test4.test

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Mon Jul 16 07:03:09 2007 UTC (17 years, 3 months ago) by wakaba
Branch: MAIN
++ whatpm/t/ChangeLog	16 Jul 2007 07:03:04 -0000
	* HTML-tokenizer.t: It now requires newer version
	of JSON.pm due to the broken handling for utf8 string
	of older version.  Test data files |test3.test|
	and |test4.test| are added.  Convert surrogate
	pair |\u| escapes in JSON file before it is input
	into JSON.pm parser since it does not decode
	them correctly.  U+000D convertion was incorrect.

	* HTML-tree.t: Support for new format for fragment
	parsing tests.

	* Makefile (tokenizer-files): New test
	data files |test3.test| and |test4.test| are added.

	* tokenizer-test-1.test: Test data for bare |hcro| and
	entity references in attribute values are added.

	* tokenizer-test-2.dat: Test data for U+000D are added.

2007-07-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	16 Jul 2007 06:56:42 -0000
	* HTML.pm.src: The character immediately following
	a bare |hcro| was discarded.  Fix handling of
	entity references in attribute values.

2007-07-16  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 {"tests": [
2    
3     {"description":"< in attribute name",
4     "input":"<z/0 <",
5     "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
6    
7     {"description":"< in attribute value",
8     "input":"<z x=<",
9     "output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
10    
11     {"description":"CR EOF after doctype name",
12     "input":"<!doctype html \r",
13     "output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
14    
15     {"description":"CR EOF in tag name",
16     "input":"<z\r",
17     "output":["ParseError", ["StartTag", "z", {}]]},
18    
19     {"description":"Zero hex numeric entity",
20     "input":"&#x0",
21     "output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
22    
23     {"description":"Zero decimal numeric entity",
24     "input":"&#0",
25     "output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
26    
27     {"description":"Zero-prefixed hex numeric entity",
28     "input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
29     "output":[["Character", "A"]]},
30    
31     {"description":"Zero-prefixed decimal numeric entity",
32     "input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
33     "output":[["Character", "A"]]},
34    
35     {"description":"Empty hex numeric entities",
36     "input":"&#x &#X ",
37     "output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
38    
39     {"description":"Empty decimal numeric entities",
40     "input":"&# &#; ",
41     "output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
42    
43     {"description":"Non-BMP numeric entity",
44     "input":"&#x10000;",
45     "output":[["Character", "\uD800\uDC00"]]},
46    
47     {"description":"Maximum non-BMP numeric entity",
48     "input":"&#X10FFFF;",
49     "output":[["Character", "\uDBFF\uDFFF"]]},
50    
51     {"description":"Above maximum numeric entity",
52     "input":"&#x110000;",
53     "output":["ParseError", ["Character", "\uFFFD"]]},
54    
55     {"description":"32-bit hex numeric entity",
56     "input":"&#x80000041;",
57     "output":["ParseError", ["Character", "\uFFFD"]]},
58    
59     {"description":"33-bit hex numeric entity",
60     "input":"&#x100000041;",
61     "output":["ParseError", ["Character", "\uFFFD"]]},
62    
63     {"description":"33-bit decimal numeric entity",
64     "input":"&#4294967361;",
65     "output":["ParseError", ["Character", "\uFFFD"]]},
66    
67     {"description":"65-bit hex numeric entity",
68     "input":"&#x10000000000000041;",
69     "output":["ParseError", ["Character", "\uFFFD"]]},
70    
71     {"description":"65-bit decimal numeric entity",
72     "input":"&#18446744073709551681;",
73     "output":["ParseError", ["Character", "\uFFFD"]]},
74    
75     {"description":"Surrogate code point edge cases",
76     "input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
77     "output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
78    
79     {"description":"Uppercase start tag name",
80     "input":"<X>",
81     "output":[["StartTag", "x", {}]]},
82    
83     {"description":"Uppercase end tag name",
84     "input":"</X>",
85     "output":[["EndTag", "x"]]},
86    
87     {"description":"Uppercase attribute name",
88     "input":"<x X>",
89     "output":[["StartTag", "x", { "x":"" }]]},
90    
91     {"description":"Tag/attribute name case edge values",
92     "input":"<x@AZ[`az{ @AZ[`az{>",
93     "output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
94    
95     {"description":"Duplicate different-case attributes",
96     "input":"<x x=1 x=2 X=3>",
97     "output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
98    
99     {"description":"Uppercase close tag attributes",
100     "input":"</x X>",
101     "output":["ParseError", ["EndTag", "x"]]},
102    
103     {"description":"Duplicate close tag attributes",
104     "input":"</x x x>",
105     "output":["ParseError", "ParseError", ["EndTag", "x"]]},
106    
107     {"description":"Permitted slash",
108     "input":"<br/>",
109     "output":[["StartTag", "br", {}]]},
110    
111     {"description":"Non-permitted slash",
112     "input":"<xr/>",
113     "output":["ParseError", ["StartTag", "xr", {}]]},
114    
115     {"description":"Permitted slash but in close tag",
116     "input":"</br/>",
117     "output":["ParseError", ["EndTag", "br"]]},
118    
119     {"description":"Doctype public case-sensitivity (1)",
120     "input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
121     "output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
122    
123     {"description":"Doctype public case-sensitivity (2)",
124     "input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
125     "output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
126    
127     {"description":"Doctype system case-sensitivity (1)",
128     "input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
129     "output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
130    
131     {"description":"Doctype system case-sensitivity (2)",
132     "input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
133     "output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
134    
135     {"description":"U+0000 in lookahead region after non-matching character",
136     "input":"<!doc>\u0000",
137     "output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
138     "ignoreErrorOrder":true},
139    
140     {"description":"U+0000 in lookahead region",
141     "input":"<!doc\u0000",
142     "output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
143     "ignoreErrorOrder":true},
144    
145     {"description":"CR followed by U+0000",
146     "input":"\r\u0000",
147     "output":["ParseError", ["Character", "\n\uFFFD"]],
148     "ignoreErrorOrder":true},
149    
150     {"description":"CR followed by non-LF",
151     "input":"\r?",
152     "output":[["Character", "\n?"]]},
153    
154     {"description":"CR at EOF",
155     "input":"\r",
156     "output":[["Character", "\n"]]},
157    
158     {"description":"LF at EOF",
159     "input":"\n",
160     "output":[["Character", "\n"]]},
161    
162     {"description":"CR LF",
163     "input":"\r\n",
164     "output":[["Character", "\n"]]},
165    
166     {"description":"CR CR",
167     "input":"\r\r",
168     "output":[["Character", "\n\n"]]},
169    
170     {"description":"LF LF",
171     "input":"\n\n",
172     "output":[["Character", "\n\n"]]},
173    
174     {"description":"LF CR",
175     "input":"\n\r",
176     "output":[["Character", "\n\n"]]}
177    
178     ]}

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24