/[suikacvs]/markup/html/whatpm/t/tokenizer/test2.test
Suika

Contents of /markup/html/whatpm/t/tokenizer/test2.test

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Mon Jul 16 07:03:09 2007 UTC (18 years ago) by wakaba
Branch: MAIN
Changes since 1.3: +5 -1 lines
++ whatpm/t/ChangeLog	16 Jul 2007 07:03:04 -0000
	* HTML-tokenizer.t: It now requires newer version
	of JSON.pm due to the broken handling for utf8 string
	of older version.  Test data files |test3.test|
	and |test4.test| are added.  Convert surrogate
	pair |\u| escapes in JSON file before it is input
	into JSON.pm parser since it does not decode
	them correctly.  U+000D convertion was incorrect.

	* HTML-tree.t: Support for new format for fragment
	parsing tests.

	* Makefile (tokenizer-files): New test
	data files |test3.test| and |test4.test| are added.

	* tokenizer-test-1.test: Test data for bare |hcro| and
	entity references in attribute values are added.

	* tokenizer-test-2.dat: Test data for U+000D are added.

2007-07-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	16 Jul 2007 06:56:42 -0000
	* HTML.pm.src: The character immediately following
	a bare |hcro| was discarded.  Fix handling of
	entity references in attribute values.

2007-07-16  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 {"tests": [
2    
3 wakaba 1.2 {"description":"DOCTYPE without name",
4 wakaba 1.1 "input":"<!DOCTYPE>",
5 wakaba 1.2 "output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
6 wakaba 1.1
7 wakaba 1.2 {"description":"DOCTYPE without space before name",
8 wakaba 1.1 "input":"<!DOCTYPEhtml>",
9 wakaba 1.2 "output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
10 wakaba 1.1
11 wakaba 1.2 {"description":"Incorrect DOCTYPE without a space before name",
12 wakaba 1.1 "input":"<!DOCTYPEfoo>",
13 wakaba 1.2 "output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
14 wakaba 1.1
15 wakaba 1.2 {"description":"DOCTYPE with publicId",
16 wakaba 1.1 "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
17 wakaba 1.2 "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
18    
19     {"description":"DOCTYPE with EOF after PUBLIC",
20     "input":"<!DOCTYPE html PUBLIC",
21     "output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
22    
23     {"description":"DOCTYPE with EOF after PUBLIC '",
24     "input":"<!DOCTYPE html PUBLIC '",
25     "output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
26    
27     {"description":"DOCTYPE with EOF after PUBLIC 'x",
28     "input":"<!DOCTYPE html PUBLIC 'x",
29     "output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
30    
31     {"description":"DOCTYPE with systemId",
32     "input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
33     "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
34    
35     {"description":"DOCTYPE with publicId and systemId",
36     "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
37     "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
38 wakaba 1.1
39     {"description":"Incomplete doctype",
40     "input":"<!DOCTYPE html ",
41 wakaba 1.2 "output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
42 wakaba 1.1
43     {"description":"Numeric entity representing the NUL character",
44     "input":"&#0000;",
45 wakaba 1.3 "output":["ParseError", ["Character", "\uFFFD"]]},
46 wakaba 1.1
47     {"description":"Hexadecimal entity representing the NUL character",
48     "input":"&#x0000;",
49 wakaba 1.3 "output":["ParseError", ["Character", "\uFFFD"]]},
50 wakaba 1.1
51     {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
52     "input":"&#2225222;",
53 wakaba 1.3 "output":["ParseError", ["Character", "\uFFFD"]]},
54 wakaba 1.1
55     {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
56     "input":"&#x1010FFFF;",
57 wakaba 1.3 "output":["ParseError", ["Character", "\uFFFD"]]},
58    
59     {"description":"Hexadecimal entity pair representing a surrogate pair",
60     "input":"&#xD869;&#xDED6;",
61     "output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
62 wakaba 1.1
63     {"description":"Numeric entity representing a Windows-1252 'codepoint'",
64     "input":"&#137;",
65 wakaba 1.2 "output":["ParseError", ["Character", "\u2030"]]},
66 wakaba 1.1
67     {"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
68     "input":"&#x89;",
69 wakaba 1.2 "output":["ParseError", ["Character", "\u2030"]]},
70 wakaba 1.1
71     {"description":"Hexadecimal entity with mixed uppercase and lowercase",
72     "input":"&#xaBcD;",
73     "output":[["Character", "\uABCD"]]},
74    
75     {"description":"Entity without a name",
76     "input":"&;",
77     "output":["ParseError", ["Character", "&;"]]},
78    
79     {"description":"Unescaped ampersand in attribute value",
80     "input":"<h a='&'>",
81     "output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
82    
83     {"description":"StartTag containing <",
84     "input":"<a<b>",
85 wakaba 1.2 "output":[["StartTag", "a<b", { }]]},
86 wakaba 1.1
87     {"description":"Non-void element containing trailing /",
88     "input":"<h/>",
89     "output":["ParseError", ["StartTag", "h", { }]]},
90    
91     {"description":"Void element with permitted slash",
92     "input":"<br/>",
93     "output":[["StartTag", "br", { }]]},
94    
95     {"description":"StartTag containing /",
96     "input":"<h/a='b'>",
97     "output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
98    
99     {"description":"Double-quoted attribute value",
100     "input":"<h a=\"b\">",
101     "output":[["StartTag", "h", { "a":"b" }]]},
102    
103     {"description":"Unescaped </",
104     "input":"</",
105     "output":["ParseError", ["Character", "</"]]},
106    
107     {"description":"Illegal end tag name",
108     "input":"</1>",
109     "output":["ParseError", ["Comment", "1"]]},
110    
111     {"description":"Simili processing instruction",
112     "input":"<?namespace>",
113     "output":["ParseError", ["Comment", "?namespace"]]},
114    
115     {"description":"A bogus comment stops at >, even if preceeded by two dashes",
116     "input":"<?foo-->",
117     "output":["ParseError", ["Comment", "?foo--"]]},
118    
119     {"description":"Unescaped <",
120     "input":"foo < bar",
121     "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
122    
123     {"description":"Null Byte Replacement",
124     "input":"\u0000",
125 wakaba 1.4 "output":["ParseError", ["Character", "\ufffd"]]},
126    
127     {"description":"Comment with dash",
128     "input":"<!---x",
129     "output":["ParseError", ["Comment", "-x"]]}
130 wakaba 1.1
131     ]}
132    
133    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24