| 656 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 657 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 658 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 659 |
if ($self->{current_token}->{attributes}) { |
#if ($self->{current_token}->{attributes}) { |
| 660 |
!!!cp (36); |
# ## NOTE: This should never be reached. |
| 661 |
!!!parse-error (type => 'end tag attribute'); |
# !!! cp (36); |
| 662 |
} else { |
# !!! parse-error (type => 'end tag attribute'); |
| 663 |
|
#} else { |
| 664 |
!!!cp (37); |
!!!cp (37); |
| 665 |
} |
#} |
| 666 |
} else { |
} else { |
| 667 |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
| 668 |
} |
} |
| 689 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 690 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 691 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 692 |
if ($self->{current_token}->{attributes}) { |
#if ($self->{current_token}->{attributes}) { |
| 693 |
!!!cp (40); |
# ## NOTE: This state should never be reached. |
| 694 |
!!!parse-error (type => 'end tag attribute'); |
# !!! cp (40); |
| 695 |
} else { |
# !!! parse-error (type => 'end tag attribute'); |
| 696 |
|
#} else { |
| 697 |
!!!cp (41); |
!!!cp (41); |
| 698 |
} |
#} |
| 699 |
} else { |
} else { |
| 700 |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
die "$0: $self->{current_token}->{type}: Unknown token type"; |
| 701 |
} |
} |
| 914 |
!!!cp (67); |
!!!cp (67); |
| 915 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 916 |
} else { |
} else { |
| 917 |
|
## NOTE: This state should never be reached. |
| 918 |
!!!cp (68); |
!!!cp (68); |
| 919 |
} |
} |
| 920 |
} else { |
} else { |
| 966 |
!!!cp (74); |
!!!cp (74); |
| 967 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 968 |
} else { |
} else { |
| 969 |
|
## NOTE: This state should never be reached. |
| 970 |
!!!cp (75); |
!!!cp (75); |
| 971 |
} |
} |
| 972 |
} else { |
} else { |
| 1015 |
!!!cp (80); |
!!!cp (80); |
| 1016 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1017 |
} else { |
} else { |
| 1018 |
|
## NOTE: This state should never be reached. |
| 1019 |
!!!cp (81); |
!!!cp (81); |
| 1020 |
} |
} |
| 1021 |
} else { |
} else { |
| 1072 |
!!!cp (88); |
!!!cp (88); |
| 1073 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1074 |
} else { |
} else { |
| 1075 |
|
## NOTE: This state should never be reached. |
| 1076 |
!!!cp (89); |
!!!cp (89); |
| 1077 |
} |
} |
| 1078 |
} else { |
} else { |
| 1097 |
!!!cp (91); |
!!!cp (91); |
| 1098 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1099 |
} else { |
} else { |
| 1100 |
|
## NOTE: This state should never be reached. |
| 1101 |
!!!cp (92); |
!!!cp (92); |
| 1102 |
} |
} |
| 1103 |
} else { |
} else { |
| 1146 |
!!!cp (98); |
!!!cp (98); |
| 1147 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1148 |
} else { |
} else { |
| 1149 |
|
## NOTE: This state should never be reached. |
| 1150 |
!!!cp (99); |
!!!cp (99); |
| 1151 |
} |
} |
| 1152 |
} else { |
} else { |
| 1190 |
!!!cp (104); |
!!!cp (104); |
| 1191 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1192 |
} else { |
} else { |
| 1193 |
|
## NOTE: This state should never be reached. |
| 1194 |
!!!cp (105); |
!!!cp (105); |
| 1195 |
} |
} |
| 1196 |
} else { |
} else { |
| 1237 |
!!!cp (110); |
!!!cp (110); |
| 1238 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1239 |
} else { |
} else { |
| 1240 |
|
## NOTE: This state should never be reached. |
| 1241 |
!!!cp (111); |
!!!cp (111); |
| 1242 |
} |
} |
| 1243 |
} else { |
} else { |
| 1262 |
!!!cp (113); |
!!!cp (113); |
| 1263 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1264 |
} else { |
} else { |
| 1265 |
|
## NOTE: This state should never be reached. |
| 1266 |
!!!cp (114); |
!!!cp (114); |
| 1267 |
} |
} |
| 1268 |
} else { |
} else { |
| 1334 |
!!!cp (120); |
!!!cp (120); |
| 1335 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 1336 |
} else { |
} else { |
| 1337 |
|
## NOTE: This state should never be reached. |
| 1338 |
!!!cp (121); |
!!!cp (121); |
| 1339 |
} |
} |
| 1340 |
} else { |
} else { |
| 2202 |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
| 2203 |
$additional => 1, |
$additional => 1, |
| 2204 |
}->{$self->{next_char}}) { |
}->{$self->{next_char}}) { |
| 2205 |
|
!!!cp (1001); |
| 2206 |
## Don't consume |
## Don't consume |
| 2207 |
## No error |
## No error |
| 2208 |
return undef; |
return undef; |
| 2216 |
!!!next-input-character; |
!!!next-input-character; |
| 2217 |
if (0x0030 <= $self->{next_char} and |
if (0x0030 <= $self->{next_char} and |
| 2218 |
$self->{next_char} <= 0x0039) { # 0..9 |
$self->{next_char} <= 0x0039) { # 0..9 |
| 2219 |
|
!!!cp (1002); |
| 2220 |
$code ||= 0; |
$code ||= 0; |
| 2221 |
$code *= 0x10; |
$code *= 0x10; |
| 2222 |
$code += $self->{next_char} - 0x0030; |
$code += $self->{next_char} - 0x0030; |
| 2223 |
redo X; |
redo X; |
| 2224 |
} elsif (0x0061 <= $self->{next_char} and |
} elsif (0x0061 <= $self->{next_char} and |
| 2225 |
$self->{next_char} <= 0x0066) { # a..f |
$self->{next_char} <= 0x0066) { # a..f |
| 2226 |
|
!!!cp (1003); |
| 2227 |
$code ||= 0; |
$code ||= 0; |
| 2228 |
$code *= 0x10; |
$code *= 0x10; |
| 2229 |
$code += $self->{next_char} - 0x0060 + 9; |
$code += $self->{next_char} - 0x0060 + 9; |
| 2230 |
redo X; |
redo X; |
| 2231 |
} elsif (0x0041 <= $self->{next_char} and |
} elsif (0x0041 <= $self->{next_char} and |
| 2232 |
$self->{next_char} <= 0x0046) { # A..F |
$self->{next_char} <= 0x0046) { # A..F |
| 2233 |
|
!!!cp (1004); |
| 2234 |
$code ||= 0; |
$code ||= 0; |
| 2235 |
$code *= 0x10; |
$code *= 0x10; |
| 2236 |
$code += $self->{next_char} - 0x0040 + 9; |
$code += $self->{next_char} - 0x0040 + 9; |
| 2237 |
redo X; |
redo X; |
| 2238 |
} elsif (not defined $code) { # no hexadecimal digit |
} elsif (not defined $code) { # no hexadecimal digit |
| 2239 |
|
!!!cp (1005); |
| 2240 |
!!!parse-error (type => 'bare hcro'); |
!!!parse-error (type => 'bare hcro'); |
| 2241 |
!!!back-next-input-character ($x_char, $self->{next_char}); |
!!!back-next-input-character ($x_char, $self->{next_char}); |
| 2242 |
$self->{next_char} = 0x0023; # # |
$self->{next_char} = 0x0023; # # |
| 2243 |
return undef; |
return undef; |
| 2244 |
} elsif ($self->{next_char} == 0x003B) { # ; |
} elsif ($self->{next_char} == 0x003B) { # ; |
| 2245 |
|
!!!cp (1006); |
| 2246 |
!!!next-input-character; |
!!!next-input-character; |
| 2247 |
} else { |
} else { |
| 2248 |
|
!!!cp (1007); |
| 2249 |
!!!parse-error (type => 'no refc'); |
!!!parse-error (type => 'no refc'); |
| 2250 |
} |
} |
| 2251 |
|
|
| 2252 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
| 2253 |
|
!!!cp (1008); |
| 2254 |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
| 2255 |
$code = 0xFFFD; |
$code = 0xFFFD; |
| 2256 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 2257 |
|
!!!cp (1009); |
| 2258 |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
| 2259 |
$code = 0xFFFD; |
$code = 0xFFFD; |
| 2260 |
} elsif ($code == 0x000D) { |
} elsif ($code == 0x000D) { |
| 2261 |
|
!!!cp (1010); |
| 2262 |
!!!parse-error (type => 'CR character reference'); |
!!!parse-error (type => 'CR character reference'); |
| 2263 |
$code = 0x000A; |
$code = 0x000A; |
| 2264 |
} elsif (0x80 <= $code and $code <= 0x9F) { |
} elsif (0x80 <= $code and $code <= 0x9F) { |
| 2265 |
|
!!!cp (1011); |
| 2266 |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
| 2267 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
| 2268 |
} |
} |
| 2277 |
|
|
| 2278 |
while (0x0030 <= $self->{next_char} and |
while (0x0030 <= $self->{next_char} and |
| 2279 |
$self->{next_char} <= 0x0039) { # 0..9 |
$self->{next_char} <= 0x0039) { # 0..9 |
| 2280 |
|
!!!cp (1012); |
| 2281 |
$code *= 10; |
$code *= 10; |
| 2282 |
$code += $self->{next_char} - 0x0030; |
$code += $self->{next_char} - 0x0030; |
| 2283 |
|
|
| 2285 |
} |
} |
| 2286 |
|
|
| 2287 |
if ($self->{next_char} == 0x003B) { # ; |
if ($self->{next_char} == 0x003B) { # ; |
| 2288 |
|
!!!cp (1013); |
| 2289 |
!!!next-input-character; |
!!!next-input-character; |
| 2290 |
} else { |
} else { |
| 2291 |
|
!!!cp (1014); |
| 2292 |
!!!parse-error (type => 'no refc'); |
!!!parse-error (type => 'no refc'); |
| 2293 |
} |
} |
| 2294 |
|
|
| 2295 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
| 2296 |
|
!!!cp (1015); |
| 2297 |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code); |
| 2298 |
$code = 0xFFFD; |
$code = 0xFFFD; |
| 2299 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 2300 |
|
!!!cp (1016); |
| 2301 |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
!!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code); |
| 2302 |
$code = 0xFFFD; |
$code = 0xFFFD; |
| 2303 |
} elsif ($code == 0x000D) { |
} elsif ($code == 0x000D) { |
| 2304 |
|
!!!cp (1017); |
| 2305 |
!!!parse-error (type => 'CR character reference'); |
!!!parse-error (type => 'CR character reference'); |
| 2306 |
$code = 0x000A; |
$code = 0x000A; |
| 2307 |
} elsif (0x80 <= $code and $code <= 0x9F) { |
} elsif (0x80 <= $code and $code <= 0x9F) { |
| 2308 |
|
!!!cp (1018); |
| 2309 |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
| 2310 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
| 2311 |
} |
} |
| 2312 |
|
|
| 2313 |
return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1}; |
return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1}; |
| 2314 |
} else { |
} else { |
| 2315 |
|
!!!cp (1019); |
| 2316 |
!!!parse-error (type => 'bare nero'); |
!!!parse-error (type => 'bare nero'); |
| 2317 |
!!!back-next-input-character ($self->{next_char}); |
!!!back-next-input-character ($self->{next_char}); |
| 2318 |
$self->{next_char} = 0x0023; # # |
$self->{next_char} = 0x0023; # # |
| 2342 |
$entity_name .= chr $self->{next_char}; |
$entity_name .= chr $self->{next_char}; |
| 2343 |
if (defined $EntityChar->{$entity_name}) { |
if (defined $EntityChar->{$entity_name}) { |
| 2344 |
if ($self->{next_char} == 0x003B) { # ; |
if ($self->{next_char} == 0x003B) { # ; |
| 2345 |
|
!!!cp (1020); |
| 2346 |
$value = $EntityChar->{$entity_name}; |
$value = $EntityChar->{$entity_name}; |
| 2347 |
$match = 1; |
$match = 1; |
| 2348 |
!!!next-input-character; |
!!!next-input-character; |
| 2349 |
last; |
last; |
| 2350 |
} else { |
} else { |
| 2351 |
|
!!!cp (1021); |
| 2352 |
$value = $EntityChar->{$entity_name}; |
$value = $EntityChar->{$entity_name}; |
| 2353 |
$match = -1; |
$match = -1; |
| 2354 |
!!!next-input-character; |
!!!next-input-character; |
| 2355 |
} |
} |
| 2356 |
} else { |
} else { |
| 2357 |
|
!!!cp (1022); |
| 2358 |
$value .= chr $self->{next_char}; |
$value .= chr $self->{next_char}; |
| 2359 |
$match *= 2; |
$match *= 2; |
| 2360 |
!!!next-input-character; |
!!!next-input-character; |
| 2362 |
} |
} |
| 2363 |
|
|
| 2364 |
if ($match > 0) { |
if ($match > 0) { |
| 2365 |
|
!!!cp (1023); |
| 2366 |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
| 2367 |
} elsif ($match < 0) { |
} elsif ($match < 0) { |
| 2368 |
!!!parse-error (type => 'no refc'); |
!!!parse-error (type => 'no refc'); |
| 2369 |
if ($in_attr and $match < -1) { |
if ($in_attr and $match < -1) { |
| 2370 |
|
!!!cp (1024); |
| 2371 |
return {type => CHARACTER_TOKEN, data => '&'.$entity_name}; |
return {type => CHARACTER_TOKEN, data => '&'.$entity_name}; |
| 2372 |
} else { |
} else { |
| 2373 |
|
!!!cp (1025); |
| 2374 |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1}; |
| 2375 |
} |
} |
| 2376 |
} else { |
} else { |
| 2377 |
|
!!!cp (1026); |
| 2378 |
!!!parse-error (type => 'bare ero'); |
!!!parse-error (type => 'bare ero'); |
| 2379 |
## NOTE: "No characters are consumed" in the spec. |
## NOTE: "No characters are consumed" in the spec. |
| 2380 |
return {type => CHARACTER_TOKEN, data => '&'.$value}; |
return {type => CHARACTER_TOKEN, data => '&'.$value}; |
| 2381 |
} |
} |
| 2382 |
} else { |
} else { |
| 2383 |
|
!!!cp (1027); |
| 2384 |
## no characters are consumed |
## no characters are consumed |
| 2385 |
!!!parse-error (type => 'bare ero'); |
!!!parse-error (type => 'bare ero'); |
| 2386 |
return undef; |
return undef; |