656 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
657 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
658 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
659 |
if ($self->{current_token}->{attributes}) { |
#if ($self->{current_token}->{attributes}) { |
660 |
!!!cp (36); |
# ## NOTE: This should never be reached. |
661 |
!!!parse-error (type => 'end tag attribute'); |
# !!! cp (36); |
662 |
} else { |
# !!! parse-error (type => 'end tag attribute'); |
663 |
|
#} else { |
664 |
!!!cp (37); |
!!!cp (37); |
665 |
} |
#} |
666 |
} else { |
} else { |
667 |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
668 |
} |
} |
689 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
690 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
691 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
692 |
if ($self->{current_token}->{attributes}) { |
#if ($self->{current_token}->{attributes}) { |
693 |
!!!cp (40); |
# ## NOTE: This state should never be reached. |
694 |
!!!parse-error (type => 'end tag attribute'); |
# !!! cp (40); |
695 |
} else { |
# !!! parse-error (type => 'end tag attribute'); |
696 |
|
#} else { |
697 |
!!!cp (41); |
!!!cp (41); |
698 |
} |
#} |
699 |
} else { |
} else { |
700 |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
701 |
} |
} |
914 |
!!!cp (67); |
!!!cp (67); |
915 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
916 |
} else { |
} else { |
917 |
|
## NOTE: This state should never be reached. |
918 |
!!!cp (68); |
!!!cp (68); |
919 |
} |
} |
920 |
} else { |
} else { |
966 |
!!!cp (74); |
!!!cp (74); |
967 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
968 |
} else { |
} else { |
969 |
|
## NOTE: This state should never be reached. |
970 |
!!!cp (75); |
!!!cp (75); |
971 |
} |
} |
972 |
} else { |
} else { |
1015 |
!!!cp (80); |
!!!cp (80); |
1016 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1017 |
} else { |
} else { |
1018 |
|
## NOTE: This state should never be reached. |
1019 |
!!!cp (81); |
!!!cp (81); |
1020 |
} |
} |
1021 |
} else { |
} else { |
1072 |
!!!cp (88); |
!!!cp (88); |
1073 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1074 |
} else { |
} else { |
1075 |
|
## NOTE: This state should never be reached. |
1076 |
!!!cp (89); |
!!!cp (89); |
1077 |
} |
} |
1078 |
} else { |
} else { |
1097 |
!!!cp (91); |
!!!cp (91); |
1098 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1099 |
} else { |
} else { |
1100 |
|
## NOTE: This state should never be reached. |
1101 |
!!!cp (92); |
!!!cp (92); |
1102 |
} |
} |
1103 |
} else { |
} else { |
1146 |
!!!cp (98); |
!!!cp (98); |
1147 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1148 |
} else { |
} else { |
1149 |
|
## NOTE: This state should never be reached. |
1150 |
!!!cp (99); |
!!!cp (99); |
1151 |
} |
} |
1152 |
} else { |
} else { |
1190 |
!!!cp (104); |
!!!cp (104); |
1191 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1192 |
} else { |
} else { |
1193 |
|
## NOTE: This state should never be reached. |
1194 |
!!!cp (105); |
!!!cp (105); |
1195 |
} |
} |
1196 |
} else { |
} else { |
1237 |
!!!cp (110); |
!!!cp (110); |
1238 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1239 |
} else { |
} else { |
1240 |
|
## NOTE: This state should never be reached. |
1241 |
!!!cp (111); |
!!!cp (111); |
1242 |
} |
} |
1243 |
} else { |
} else { |
1262 |
!!!cp (113); |
!!!cp (113); |
1263 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1264 |
} else { |
} else { |
1265 |
|
## NOTE: This state should never be reached. |
1266 |
!!!cp (114); |
!!!cp (114); |
1267 |
} |
} |
1268 |
} else { |
} else { |
1334 |
!!!cp (120); |
!!!cp (120); |
1335 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
1336 |
} else { |
} else { |
1337 |
|
## NOTE: This state should never be reached. |
1338 |
!!!cp (121); |
!!!cp (121); |
1339 |
} |
} |
1340 |
} else { |
} else { |
2202 |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
2203 |
$additional => 1, |
$additional => 1, |
2204 |
}->{$self->{next_char}}) { |
}->{$self->{next_char}}) { |
2205 |
|
!!!cp (1001); |
2206 |
## Don't consume |
## Don't consume |
2207 |
## No error |
## No error |
2208 |
return undef; |
return undef; |
2216 |
!!!next-input-character; |
!!!next-input-character; |
2217 |
if (0x0030 <= $self->{next_char} and |
if (0x0030 <= $self->{next_char} and |
2218 |
$self->{next_char} <= 0x0039) { # 0..9 |
$self->{next_char} <= 0x0039) { # 0..9 |
2219 |
|
!!!cp (1002); |
2220 |
$code ||= 0; |
$code ||= 0; |
2221 |
$code *= 0x10; |
$code *= 0x10; |
2222 |
$code += $self->{next_char} - 0x0030; |
$code += $self->{next_char} - 0x0030; |
2223 |
redo X; |
redo X; |
2224 |
} elsif (0x0061 <= $self->{next_char} and |
} elsif (0x0061 <= $self->{next_char} and |
2225 |
$self->{next_char} <= 0x0066) { # a..f |
$self->{next_char} <= 0x0066) { # a..f |
2226 |
|
!!!cp (1003); |
2227 |
$code ||= 0; |
$code ||= 0; |
2228 |
$code *= 0x10; |
$code *= 0x10; |
2229 |
$code += $self->{next_char} - 0x0060 + 9; |
$code += $self->{next_char} - 0x0060 + 9; |
2230 |
redo X; |
redo X; |
2231 |
} elsif (0x0041 <= $self->{next_char} and |
} elsif (0x0041 <= $self->{next_char} and |
2232 |
$self->{next_char} <= 0x0046) { # A..F |
$self->{next_char} <= 0x0046) { # A..F |
2233 |
|
!!!cp (1004); |
2234 |
$code ||= 0; |
$code ||= 0; |
2235 |
$code *= 0x10; |
$code *= 0x10; |
2236 |
$code += $self->{next_char} - 0x0040 + 9; |
$code += $self->{next_char} - 0x0040 + 9; |
2237 |
redo X; |
redo X; |
2238 |
} elsif (not defined $code) { # no hexadecimal digit |
} elsif (not defined $code) { # no hexadecimal digit |
2239 |
|
!!!cp (1005); |
2240 |
!!!parse-error (type => 'bare hcro'); |
!!!parse-error (type => 'bare hcro'); |
2241 |
!!!back-next-input-character ($x_char, $self->{next_char}); |
!!!back-next-input-character ($x_char, $self->{next_char}); |
2242 |
$self->{next_char} = 0x0023; # # |
$self->{next_char} = 0x0023; # # |
2243 |
return undef; |
return undef; |
2244 |
} elsif ($self->{next_char} == 0x003B) { # ; |
} elsif ($self->{next_char} == 0x003B) { # ; |
2245 |
|
!!!cp (1006); |
2246 |
!!!next-input-character; |
!!!next-input-character; |
2247 |
} else { |
} else { |
2248 |
|
!!!cp (1007); |
2249 |
!!!parse-error (type => 'no refc'); |
!!!parse-error (type => 'no refc'); |
2250 |
} |
} |
2251 |
|
|
2252 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
2253 |
|
!!!cp (1008); |
2254 |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
2255 |
$code = 0xFFFD; |
$code = 0xFFFD; |
2256 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
2257 |
|
!!!cp (1009); |
2258 |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
2259 |
$code = 0xFFFD; |
$code = 0xFFFD; |
2260 |
} elsif ($code == 0x000D) { |
} elsif ($code == 0x000D) { |
2261 |
|
!!!cp (1010); |
2262 |
!!!parse-error (type => 'CR character reference'); |
!!!parse-error (type => 'CR character reference'); |
2263 |
$code = 0x000A; |
$code = 0x000A; |
2264 |
} elsif (0x80 <= $code and $code <= 0x9F) { |
} elsif (0x80 <= $code and $code <= 0x9F) { |
2265 |
|
!!!cp (1011); |
2266 |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
2267 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
2268 |
} |
} |
2277 |
|
|
2278 |
while (0x0030 <= $self->{next_char} and |
while (0x0030 <= $self->{next_char} and |
2279 |
$self->{next_char} <= 0x0039) { # 0..9 |
$self->{next_char} <= 0x0039) { # 0..9 |
2280 |
|
!!!cp (1012); |
2281 |
$code *= 10; |
$code *= 10; |
2282 |
$code += $self->{next_char} - 0x0030; |
$code += $self->{next_char} - 0x0030; |
2283 |
|
|
2285 |
} |
} |
2286 |
|
|
2287 |
if ($self->{next_char} == 0x003B) { # ; |
if ($self->{next_char} == 0x003B) { # ; |
2288 |
|
!!!cp (1013); |
2289 |
!!!next-input-character; |
!!!next-input-character; |
2290 |
} else { |
} else { |
2291 |
|
!!!cp (1014); |
2292 |
!!!parse-error (type => 'no refc'); |
!!!parse-error (type => 'no refc'); |
2293 |
} |
} |
2294 |
|
|
2295 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
2296 |
|
!!!cp (1015); |
2297 |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
2298 |
$code = 0xFFFD; |
$code = 0xFFFD; |
2299 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
2300 |
|
!!!cp (1016); |
2301 |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
2302 |
$code = 0xFFFD; |
$code = 0xFFFD; |
2303 |
} elsif ($code == 0x000D) { |
} elsif ($code == 0x000D) { |
2304 |
|
!!!cp (1017); |
2305 |
!!!parse-error (type => 'CR character reference'); |
!!!parse-error (type => 'CR character reference'); |
2306 |
$code = 0x000A; |
$code = 0x000A; |
2307 |
} elsif (0x80 <= $code and $code <= 0x9F) { |
} elsif (0x80 <= $code and $code <= 0x9F) { |
2308 |
|
!!!cp (1018); |
2309 |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
2310 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
2311 |
} |
} |
2312 |
|
|
2313 |
return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1}; |
return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1}; |
2314 |
} else { |
} else { |
2315 |
|
!!!cp (1019); |
2316 |
!!!parse-error (type => 'bare nero'); |
!!!parse-error (type => 'bare nero'); |
2317 |
!!!back-next-input-character ($self->{next_char}); |
!!!back-next-input-character ($self->{next_char}); |
2318 |
$self->{next_char} = 0x0023; # # |
$self->{next_char} = 0x0023; # # |
2342 |
$entity_name .= chr $self->{next_char}; |
$entity_name .= chr $self->{next_char}; |
2343 |
if (defined $EntityChar->{$entity_name}) { |
if (defined $EntityChar->{$entity_name}) { |
2344 |
if ($self->{next_char} == 0x003B) { # ; |
if ($self->{next_char} == 0x003B) { # ; |
2345 |
|
!!!cp (1020); |
2346 |
$value = $EntityChar->{$entity_name}; |
$value = $EntityChar->{$entity_name}; |
2347 |
$match = 1; |
$match = 1; |
2348 |
!!!next-input-character; |
!!!next-input-character; |
2349 |
last; |
last; |
2350 |
} else { |
} else { |
2351 |
|
!!!cp (1021); |
2352 |
$value = $EntityChar->{$entity_name}; |
$value = $EntityChar->{$entity_name}; |
2353 |
$match = -1; |
$match = -1; |
2354 |
!!!next-input-character; |
!!!next-input-character; |
2355 |
} |
} |
2356 |
} else { |
} else { |
2357 |
|
!!!cp (1022); |
2358 |
$value .= chr $self->{next_char}; |
$value .= chr $self->{next_char}; |
2359 |
$match *= 2; |
$match *= 2; |
2360 |
!!!next-input-character; |
!!!next-input-character; |
2362 |
} |
} |
2363 |
|
|
2364 |
if ($match > 0) { |
if ($match > 0) { |
2365 |
|
!!!cp (1023); |
2366 |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
2367 |
} elsif ($match < 0) { |
} elsif ($match < 0) { |
2368 |
!!!parse-error (type => 'no refc'); |
!!!parse-error (type => 'no refc'); |
2369 |
if ($in_attr and $match < -1) { |
if ($in_attr and $match < -1) { |
2370 |
|
!!!cp (1024); |
2371 |
return {type => CHARACTER_TOKEN, data => '&'.$entity_name}; |
return {type => CHARACTER_TOKEN, data => '&'.$entity_name}; |
2372 |
} else { |
} else { |
2373 |
|
!!!cp (1025); |
2374 |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
2375 |
} |
} |
2376 |
} else { |
} else { |
2377 |
|
!!!cp (1026); |
2378 |
!!!parse-error (type => 'bare ero'); |
!!!parse-error (type => 'bare ero'); |
2379 |
## NOTE: "No characters are consumed" in the spec. |
## NOTE: "No characters are consumed" in the spec. |
2380 |
return {type => CHARACTER_TOKEN, data => '&'.$value}; |
return {type => CHARACTER_TOKEN, data => '&'.$value}; |
2381 |
} |
} |
2382 |
} else { |
} else { |
2383 |
|
!!!cp (1027); |
2384 |
## no characters are consumed |
## no characters are consumed |
2385 |
!!!parse-error (type => 'bare ero'); |
!!!parse-error (type => 'bare ero'); |
2386 |
return undef; |
return undef; |