| 164 |
sub MD_NAME_STATE () { 69 } |
sub MD_NAME_STATE () { 69 } |
| 165 |
sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 } |
sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 } |
| 166 |
sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 } |
sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 } |
| 167 |
|
sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 } |
| 168 |
|
sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 } |
| 169 |
|
sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 } |
| 170 |
|
sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 } |
| 171 |
|
sub BEFORE_ALLOWED_TOKEN_STATE () { 76 } |
| 172 |
|
sub ALLOWED_TOKEN_STATE () { 77 } |
| 173 |
|
sub AFTER_ALLOWED_TOKEN_STATE () { 78 } |
| 174 |
|
sub AFTER_ALLOWED_TOKENS_STATE () { 79 } |
| 175 |
|
sub BEFORE_ATTR_DEFAULT_STATE () { 80 } |
| 176 |
|
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 } |
| 177 |
|
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 } |
| 178 |
|
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 } |
| 179 |
|
sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 } |
| 180 |
|
sub BOGUS_MD_STATE () { 85 } |
| 181 |
|
|
| 182 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
| 183 |
## list and descriptions) |
## list and descriptions) |
| 1271 |
redo A; |
redo A; |
| 1272 |
} |
} |
| 1273 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { |
| 1274 |
## XML5: "Tag attribute value double quoted state". |
## XML5: "Tag attribute value double quoted state" and "DOCTYPE |
| 1275 |
|
## ATTLIST attribute value double quoted state". |
| 1276 |
|
|
| 1277 |
if ($self->{nc} == 0x0022) { # " |
if ($self->{nc} == 0x0022) { # " |
| 1278 |
!!!cp (95); |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 1279 |
## XML5: "Tag attribute name before state". |
!!!cp (95.1); |
| 1280 |
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
## XML5: "DOCTYPE ATTLIST name after state". |
| 1281 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 1282 |
|
$self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE; |
| 1283 |
|
} else { |
| 1284 |
|
!!!cp (95); |
| 1285 |
|
## XML5: "Tag attribute name before state". |
| 1286 |
|
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
| 1287 |
|
} |
| 1288 |
!!!next-input-character; |
!!!next-input-character; |
| 1289 |
redo A; |
redo A; |
| 1290 |
} elsif ($self->{nc} == 0x0026) { # & |
} elsif ($self->{nc} == 0x0026) { # & |
| 1305 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 1306 |
!!!cp (97); |
!!!cp (97); |
| 1307 |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
| 1308 |
|
|
| 1309 |
|
$self->{state} = DATA_STATE; |
| 1310 |
|
$self->{s_kwd} = ''; |
| 1311 |
|
## reconsume |
| 1312 |
|
!!!emit ($self->{ct}); # start tag |
| 1313 |
|
redo A; |
| 1314 |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
| 1315 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1316 |
if ($self->{ct}->{attributes}) { |
if ($self->{ct}->{attributes}) { |
| 1320 |
## NOTE: This state should never be reached. |
## NOTE: This state should never be reached. |
| 1321 |
!!!cp (99); |
!!!cp (99); |
| 1322 |
} |
} |
| 1323 |
|
|
| 1324 |
|
$self->{state} = DATA_STATE; |
| 1325 |
|
$self->{s_kwd} = ''; |
| 1326 |
|
## reconsume |
| 1327 |
|
!!!emit ($self->{ct}); # end tag |
| 1328 |
|
redo A; |
| 1329 |
|
} elsif ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 1330 |
|
## XML5: No parse error above; not defined yet. |
| 1331 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 1332 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 1333 |
|
## Reconsume. |
| 1334 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 1335 |
|
redo A; |
| 1336 |
} else { |
} else { |
| 1337 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1338 |
} |
} |
|
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
|
## reconsume |
|
|
|
|
|
!!!emit ($self->{ct}); # start tag or end tag |
|
|
|
|
|
redo A; |
|
| 1339 |
} else { |
} else { |
| 1340 |
|
## XML5 [ATTLIST]: Not defined yet. |
| 1341 |
if ($self->{is_xml} and $self->{nc} == 0x003C) { # < |
if ($self->{is_xml} and $self->{nc} == 0x003C) { # < |
| 1342 |
!!!cp (100); |
!!!cp (100); |
| 1343 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1355 |
redo A; |
redo A; |
| 1356 |
} |
} |
| 1357 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { |
| 1358 |
## XML5: "Tag attribute value single quoted state". |
## XML5: "Tag attribute value single quoted state" and "DOCTYPE |
| 1359 |
|
## ATTLIST attribute value single quoted state". |
| 1360 |
|
|
| 1361 |
if ($self->{nc} == 0x0027) { # ' |
if ($self->{nc} == 0x0027) { # ' |
| 1362 |
!!!cp (101); |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 1363 |
## XML5: "Before attribute name state" (sic). |
!!!cp (101.1); |
| 1364 |
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
## XML5: "DOCTYPE ATTLIST name after state". |
| 1365 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 1366 |
|
$self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE; |
| 1367 |
|
} else { |
| 1368 |
|
!!!cp (101); |
| 1369 |
|
## XML5: "Before attribute name state" (sic). |
| 1370 |
|
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
| 1371 |
|
} |
| 1372 |
!!!next-input-character; |
!!!next-input-character; |
| 1373 |
redo A; |
redo A; |
| 1374 |
} elsif ($self->{nc} == 0x0026) { # & |
} elsif ($self->{nc} == 0x0026) { # & |
| 1389 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 1390 |
!!!cp (103); |
!!!cp (103); |
| 1391 |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
| 1392 |
|
|
| 1393 |
|
$self->{state} = DATA_STATE; |
| 1394 |
|
$self->{s_kwd} = ''; |
| 1395 |
|
## reconsume |
| 1396 |
|
!!!emit ($self->{ct}); # start tag |
| 1397 |
|
redo A; |
| 1398 |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
| 1399 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1400 |
if ($self->{ct}->{attributes}) { |
if ($self->{ct}->{attributes}) { |
| 1404 |
## NOTE: This state should never be reached. |
## NOTE: This state should never be reached. |
| 1405 |
!!!cp (105); |
!!!cp (105); |
| 1406 |
} |
} |
| 1407 |
|
|
| 1408 |
|
$self->{state} = DATA_STATE; |
| 1409 |
|
$self->{s_kwd} = ''; |
| 1410 |
|
## reconsume |
| 1411 |
|
!!!emit ($self->{ct}); # end tag |
| 1412 |
|
redo A; |
| 1413 |
|
} elsif ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 1414 |
|
## XML5: No parse error above; not defined yet. |
| 1415 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 1416 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 1417 |
|
## Reconsume. |
| 1418 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 1419 |
|
redo A; |
| 1420 |
} else { |
} else { |
| 1421 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1422 |
} |
} |
|
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
|
## reconsume |
|
|
|
|
|
!!!emit ($self->{ct}); # start tag or end tag |
|
|
|
|
|
redo A; |
|
| 1423 |
} else { |
} else { |
| 1424 |
|
## XML5 [ATTLIST]: Not defined yet. |
| 1425 |
if ($self->{is_xml} and $self->{nc} == 0x003C) { # < |
if ($self->{is_xml} and $self->{nc} == 0x003C) { # < |
| 1426 |
!!!cp (106); |
!!!cp (106); |
| 1427 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1442 |
## XML5: "Tag attribute value unquoted state". |
## XML5: "Tag attribute value unquoted state". |
| 1443 |
|
|
| 1444 |
if ($is_space->{$self->{nc}}) { |
if ($is_space->{$self->{nc}}) { |
| 1445 |
!!!cp (107); |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 1446 |
## XML5: "Tag attribute name before state". |
!!!cp (107.1); |
| 1447 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 1448 |
|
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
| 1449 |
|
} else { |
| 1450 |
|
!!!cp (107); |
| 1451 |
|
## XML5: "Tag attribute name before state". |
| 1452 |
|
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1453 |
|
} |
| 1454 |
!!!next-input-character; |
!!!next-input-character; |
| 1455 |
redo A; |
redo A; |
| 1456 |
} elsif ($self->{nc} == 0x0026) { # & |
} elsif ($self->{nc} == 0x0026) { # & |
| 1471 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 1472 |
!!!cp (109); |
!!!cp (109); |
| 1473 |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
| 1474 |
|
|
| 1475 |
|
$self->{state} = DATA_STATE; |
| 1476 |
|
$self->{s_kwd} = ''; |
| 1477 |
|
!!!next-input-character; |
| 1478 |
|
!!!emit ($self->{ct}); # start tag |
| 1479 |
|
redo A; |
| 1480 |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
| 1481 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1482 |
if ($self->{ct}->{attributes}) { |
if ($self->{ct}->{attributes}) { |
| 1486 |
## NOTE: This state should never be reached. |
## NOTE: This state should never be reached. |
| 1487 |
!!!cp (111); |
!!!cp (111); |
| 1488 |
} |
} |
| 1489 |
|
|
| 1490 |
|
$self->{state} = DATA_STATE; |
| 1491 |
|
$self->{s_kwd} = ''; |
| 1492 |
|
!!!next-input-character; |
| 1493 |
|
!!!emit ($self->{ct}); # end tag |
| 1494 |
|
redo A; |
| 1495 |
|
} elsif ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 1496 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 1497 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 1498 |
|
!!!next-input-character; |
| 1499 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 1500 |
|
redo A; |
| 1501 |
} else { |
} else { |
| 1502 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1503 |
} |
} |
|
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
|
!!!next-input-character; |
|
|
|
|
|
!!!emit ($self->{ct}); # start tag or end tag |
|
|
|
|
|
redo A; |
|
| 1504 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!parse-error (type => 'unclosed tag'); |
|
| 1505 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 1506 |
!!!cp (112); |
!!!cp (112); |
| 1507 |
|
!!!parse-error (type => 'unclosed tag'); |
| 1508 |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
$self->{last_stag_name} = $self->{ct}->{tag_name}; |
| 1509 |
|
|
| 1510 |
|
$self->{state} = DATA_STATE; |
| 1511 |
|
$self->{s_kwd} = ''; |
| 1512 |
|
## reconsume |
| 1513 |
|
!!!emit ($self->{ct}); # start tag |
| 1514 |
|
redo A; |
| 1515 |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{ct}->{type} == END_TAG_TOKEN) { |
| 1516 |
|
!!!parse-error (type => 'unclosed tag'); |
| 1517 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1518 |
if ($self->{ct}->{attributes}) { |
if ($self->{ct}->{attributes}) { |
| 1519 |
!!!cp (113); |
!!!cp (113); |
| 1522 |
## NOTE: This state should never be reached. |
## NOTE: This state should never be reached. |
| 1523 |
!!!cp (114); |
!!!cp (114); |
| 1524 |
} |
} |
| 1525 |
|
|
| 1526 |
|
$self->{state} = DATA_STATE; |
| 1527 |
|
$self->{s_kwd} = ''; |
| 1528 |
|
## reconsume |
| 1529 |
|
!!!emit ($self->{ct}); # end tag |
| 1530 |
|
redo A; |
| 1531 |
|
} elsif ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 1532 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 1533 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 1534 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 1535 |
|
## Reconsume. |
| 1536 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 1537 |
|
redo A; |
| 1538 |
} else { |
} else { |
| 1539 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1540 |
} |
} |
|
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
|
## reconsume |
|
|
|
|
|
!!!emit ($self->{ct}); # start tag or end tag |
|
|
|
|
|
redo A; |
|
| 1541 |
} else { |
} else { |
| 1542 |
if ({ |
if ({ |
| 1543 |
0x0022 => 1, # " |
0x0022 => 1, # " |
| 2213 |
!!!next-input-character; |
!!!next-input-character; |
| 2214 |
redo A; |
redo A; |
| 2215 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2216 |
!!!cp (166); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2217 |
$self->{state} = DATA_STATE; |
!!!cp (166); |
| 2218 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
| 2219 |
|
$self->{s_kwd} = ''; |
| 2220 |
|
} else { |
| 2221 |
|
!!!cp (166.1); |
| 2222 |
|
!!!parse-error (type => 'no md def'); ## TODO: type |
| 2223 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2224 |
|
} |
| 2225 |
|
|
| 2226 |
!!!next-input-character; |
!!!next-input-character; |
| 2227 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2228 |
redo A; |
redo A; |
| 2229 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2230 |
!!!cp (167); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2231 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (167); |
| 2232 |
$self->{state} = DATA_STATE; |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2233 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
| 2234 |
## reconsume |
$self->{s_kwd} = ''; |
| 2235 |
|
$self->{ct}->{quirks} = 1; |
| 2236 |
$self->{ct}->{quirks} = 1; |
} else { |
| 2237 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (167.12); |
| 2238 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 2239 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2240 |
|
} |
| 2241 |
|
|
| 2242 |
|
## Reconsume. |
| 2243 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
| 2244 |
redo A; |
redo A; |
| 2245 |
} elsif ($self->{nc} == 0x0050 or # P |
} elsif ($self->{nc} == 0x0050 or # P |
| 2246 |
$self->{nc} == 0x0070) { # p |
$self->{nc} == 0x0070) { # p |
| 2256 |
$self->{kwd} = chr $self->{nc}; |
$self->{kwd} = chr $self->{nc}; |
| 2257 |
!!!next-input-character; |
!!!next-input-character; |
| 2258 |
redo A; |
redo A; |
| 2259 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
## TODO: " and ' for ENTITY |
| 2260 |
|
} elsif ($self->{is_xml} and |
| 2261 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
| 2262 |
|
$self->{nc} == 0x005B) { # [ |
| 2263 |
!!!cp (167.3); |
!!!cp (167.3); |
| 2264 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2265 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 2268 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2269 |
redo A; |
redo A; |
| 2270 |
} else { |
} else { |
| 2271 |
!!!cp (180); |
!!!parse-error (type => 'string after DOCTYPE name'); ## TODO: type |
| 2272 |
!!!parse-error (type => 'string after DOCTYPE name'); |
|
| 2273 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2274 |
|
!!!cp (180); |
| 2275 |
|
$self->{ct}->{quirks} = 1; |
| 2276 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 2277 |
|
} else { |
| 2278 |
|
!!!cp (180.1); |
| 2279 |
|
$self->{state} = BOGUS_MD_STATE; |
| 2280 |
|
} |
| 2281 |
|
|
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
|
| 2282 |
!!!next-input-character; |
!!!next-input-character; |
| 2283 |
redo A; |
redo A; |
| 2284 |
} |
} |
| 2320 |
!!!next-input-character; |
!!!next-input-character; |
| 2321 |
redo A; |
redo A; |
| 2322 |
} else { |
} else { |
| 2323 |
!!!cp (169); |
!!!parse-error (type => 'string after DOCTYPE name', ## TODO: type |
|
!!!parse-error (type => 'string after DOCTYPE name', |
|
| 2324 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2325 |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
| 2326 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2327 |
|
!!!cp (169); |
| 2328 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{ct}->{quirks} = 1; |
| 2329 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 2330 |
|
} else { |
| 2331 |
|
!!!cp (169.1); |
| 2332 |
|
$self->{state} = BOGUS_MD_STATE; |
| 2333 |
|
} |
| 2334 |
## Reconsume. |
## Reconsume. |
| 2335 |
redo A; |
redo A; |
| 2336 |
} |
} |
| 2372 |
!!!next-input-character; |
!!!next-input-character; |
| 2373 |
redo A; |
redo A; |
| 2374 |
} else { |
} else { |
| 2375 |
!!!cp (172); |
!!!parse-error (type => 'string after DOCTYPE name', ## TODO: type |
|
!!!parse-error (type => 'string after DOCTYPE name', |
|
| 2376 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2377 |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
| 2378 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2379 |
|
!!!cp (172); |
| 2380 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{ct}->{quirks} = 1; |
| 2381 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 2382 |
|
} else { |
| 2383 |
|
!!!cp (172.1); |
| 2384 |
|
$self->{state} = BOGUS_MD_STATE; |
| 2385 |
|
} |
| 2386 |
## Reconsume. |
## Reconsume. |
| 2387 |
redo A; |
redo A; |
| 2388 |
} |
} |
| 2405 |
!!!next-input-character; |
!!!next-input-character; |
| 2406 |
redo A; |
redo A; |
| 2407 |
} elsif ($self->{nc} eq 0x003E) { # > |
} elsif ($self->{nc} eq 0x003E) { # > |
|
!!!cp (184); |
|
| 2408 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
| 2409 |
|
|
| 2410 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2411 |
$self->{s_kwd} = ''; |
!!!cp (184); |
| 2412 |
|
$self->{state} = DATA_STATE; |
| 2413 |
|
$self->{s_kwd} = ''; |
| 2414 |
|
$self->{ct}->{quirks} = 1; |
| 2415 |
|
} else { |
| 2416 |
|
!!!cp (184.1); |
| 2417 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2418 |
|
} |
| 2419 |
|
|
| 2420 |
!!!next-input-character; |
!!!next-input-character; |
| 2421 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2422 |
redo A; |
redo A; |
| 2423 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2424 |
!!!cp (185); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2425 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (185); |
| 2426 |
|
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2427 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2428 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 2429 |
|
$self->{ct}->{quirks} = 1; |
| 2430 |
|
} else { |
| 2431 |
|
!!!cp (185.1); |
| 2432 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 2433 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2434 |
|
} |
| 2435 |
|
|
| 2436 |
## reconsume |
## reconsume |
|
|
|
|
$self->{ct}->{quirks} = 1; |
|
| 2437 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
| 2438 |
redo A; |
redo A; |
| 2439 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
| 2440 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
| 2441 |
|
$self->{nc} == 0x005B) { # [ |
| 2442 |
!!!cp (186.1); |
!!!cp (186.1); |
| 2443 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
| 2444 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2448 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2449 |
redo A; |
redo A; |
| 2450 |
} else { |
} else { |
|
!!!cp (186); |
|
| 2451 |
!!!parse-error (type => 'string after PUBLIC'); |
!!!parse-error (type => 'string after PUBLIC'); |
|
$self->{ct}->{quirks} = 1; |
|
| 2452 |
|
|
| 2453 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2454 |
|
!!!cp (186); |
| 2455 |
|
$self->{ct}->{quirks} = 1; |
| 2456 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 2457 |
|
} else { |
| 2458 |
|
!!!cp (186.2); |
| 2459 |
|
$self->{state} = BOGUS_MD_STATE; |
| 2460 |
|
} |
| 2461 |
|
|
| 2462 |
!!!next-input-character; |
!!!next-input-character; |
| 2463 |
redo A; |
redo A; |
| 2464 |
} |
} |
| 2469 |
!!!next-input-character; |
!!!next-input-character; |
| 2470 |
redo A; |
redo A; |
| 2471 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
!!!cp (188); |
|
| 2472 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2473 |
|
|
| 2474 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2475 |
$self->{s_kwd} = ''; |
!!!cp (188); |
| 2476 |
!!!next-input-character; |
$self->{state} = DATA_STATE; |
| 2477 |
|
$self->{s_kwd} = ''; |
| 2478 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2479 |
!!!emit ($self->{ct}); # DOCTYPE |
} else { |
| 2480 |
|
!!!cp (188.1); |
| 2481 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2482 |
|
} |
| 2483 |
|
|
| 2484 |
|
!!!next-input-character; |
| 2485 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
| 2486 |
redo A; |
redo A; |
| 2487 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (189); |
|
| 2488 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2489 |
|
|
| 2490 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2491 |
$self->{s_kwd} = ''; |
!!!cp (189); |
| 2492 |
## reconsume |
$self->{state} = DATA_STATE; |
| 2493 |
|
$self->{s_kwd} = ''; |
| 2494 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2495 |
|
} else { |
| 2496 |
|
!!!cp (189.1); |
| 2497 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2498 |
|
} |
| 2499 |
|
|
| 2500 |
|
## Reconsume. |
| 2501 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
| 2502 |
redo A; |
redo A; |
| 2503 |
} else { |
} else { |
| 2504 |
!!!cp (190); |
!!!cp (190); |
| 2505 |
$self->{ct}->{pubid} # DOCTYPE |
$self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
| 2506 |
$self->{read_until}->($self->{ct}->{pubid}, q[">], |
$self->{read_until}->($self->{ct}->{pubid}, q[">], |
| 2507 |
length $self->{ct}->{pubid}); |
length $self->{ct}->{pubid}); |
| 2508 |
|
|
| 2517 |
!!!next-input-character; |
!!!next-input-character; |
| 2518 |
redo A; |
redo A; |
| 2519 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
!!!cp (192); |
|
| 2520 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2521 |
|
|
| 2522 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2523 |
$self->{s_kwd} = ''; |
!!!cp (192); |
| 2524 |
!!!next-input-character; |
$self->{state} = DATA_STATE; |
| 2525 |
|
$self->{s_kwd} = ''; |
| 2526 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2527 |
!!!emit ($self->{ct}); # DOCTYPE |
} else { |
| 2528 |
|
!!!cp (192.1); |
| 2529 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2530 |
|
} |
| 2531 |
|
|
| 2532 |
|
!!!next-input-character; |
| 2533 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
| 2534 |
redo A; |
redo A; |
| 2535 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (193); |
|
| 2536 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2537 |
|
|
| 2538 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2539 |
$self->{s_kwd} = ''; |
!!!cp (193); |
| 2540 |
|
$self->{state} = DATA_STATE; |
| 2541 |
|
$self->{s_kwd} = ''; |
| 2542 |
|
$self->{ct}->{quirks} = 1; |
| 2543 |
|
} else { |
| 2544 |
|
!!!cp (193.1); |
| 2545 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2546 |
|
} |
| 2547 |
|
|
| 2548 |
## reconsume |
## reconsume |
| 2549 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2550 |
redo A; |
redo A; |
| 2551 |
} else { |
} else { |
| 2552 |
!!!cp (194); |
!!!cp (194); |
| 2553 |
$self->{ct}->{pubid} # DOCTYPE |
$self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
| 2554 |
$self->{read_until}->($self->{ct}->{pubid}, q['>], |
$self->{read_until}->($self->{ct}->{pubid}, q['>], |
| 2555 |
length $self->{ct}->{pubid}); |
length $self->{ct}->{pubid}); |
| 2556 |
|
|
| 2566 |
redo A; |
redo A; |
| 2567 |
} elsif ($self->{nc} == 0x0022) { # " |
} elsif ($self->{nc} == 0x0022) { # " |
| 2568 |
!!!cp (196); |
!!!cp (196); |
| 2569 |
$self->{ct}->{sysid} = ''; # DOCTYPE |
$self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION |
| 2570 |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; |
| 2571 |
!!!next-input-character; |
!!!next-input-character; |
| 2572 |
redo A; |
redo A; |
| 2573 |
} elsif ($self->{nc} == 0x0027) { # ' |
} elsif ($self->{nc} == 0x0027) { # ' |
| 2574 |
!!!cp (197); |
!!!cp (197); |
| 2575 |
$self->{ct}->{sysid} = ''; # DOCTYPE |
$self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION |
| 2576 |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; |
| 2577 |
!!!next-input-character; |
!!!next-input-character; |
| 2578 |
redo A; |
redo A; |
| 2579 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2580 |
if ($self->{is_xml}) { |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2581 |
!!!cp (198.1); |
if ($self->{is_xml}) { |
| 2582 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!cp (198.1); |
| 2583 |
|
!!!parse-error (type => 'no SYSTEM literal'); |
| 2584 |
|
} else { |
| 2585 |
|
!!!cp (198); |
| 2586 |
|
} |
| 2587 |
|
$self->{state} = DATA_STATE; |
| 2588 |
|
$self->{s_kwd} = ''; |
| 2589 |
} else { |
} else { |
| 2590 |
!!!cp (198); |
if ($self->{ct}->{type} == NOTATION_TOKEN) { |
| 2591 |
|
!!!cp (198.2); |
| 2592 |
|
} else { |
| 2593 |
|
!!!cp (198.3); |
| 2594 |
|
!!!parse-error (type => 'no SYSTEM literal'); |
| 2595 |
|
} |
| 2596 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2597 |
} |
} |
| 2598 |
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
| 2599 |
!!!next-input-character; |
!!!next-input-character; |
| 2600 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2601 |
redo A; |
redo A; |
| 2602 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2603 |
!!!cp (199); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2604 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (199); |
| 2605 |
|
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2606 |
$self->{state} = DATA_STATE; |
|
| 2607 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
| 2608 |
|
$self->{s_kwd} = ''; |
| 2609 |
|
$self->{ct}->{quirks} = 1; |
| 2610 |
|
} else { |
| 2611 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 2612 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2613 |
|
} |
| 2614 |
|
|
| 2615 |
## reconsume |
## reconsume |
| 2616 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2617 |
redo A; |
redo A; |
| 2618 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
| 2619 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
| 2620 |
|
$self->{nc} == 0x005B) { # [ |
| 2621 |
!!!cp (200.1); |
!!!cp (200.1); |
| 2622 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
| 2623 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2627 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2628 |
redo A; |
redo A; |
| 2629 |
} else { |
} else { |
|
!!!cp (200); |
|
| 2630 |
!!!parse-error (type => 'string after PUBLIC literal'); |
!!!parse-error (type => 'string after PUBLIC literal'); |
|
$self->{ct}->{quirks} = 1; |
|
| 2631 |
|
|
| 2632 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2633 |
|
!!!cp (200); |
| 2634 |
|
$self->{ct}->{quirks} = 1; |
| 2635 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 2636 |
|
} else { |
| 2637 |
|
!!!cp (200.2); |
| 2638 |
|
$self->{state} = BOGUS_MD_STATE; |
| 2639 |
|
} |
| 2640 |
|
|
| 2641 |
!!!next-input-character; |
!!!next-input-character; |
| 2642 |
redo A; |
redo A; |
| 2643 |
} |
} |
| 2660 |
!!!next-input-character; |
!!!next-input-character; |
| 2661 |
redo A; |
redo A; |
| 2662 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
!!!cp (204); |
|
| 2663 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
|
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
| 2664 |
!!!next-input-character; |
!!!next-input-character; |
| 2665 |
|
|
| 2666 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2667 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (204); |
| 2668 |
|
$self->{state} = DATA_STATE; |
| 2669 |
|
$self->{s_kwd} = ''; |
| 2670 |
|
$self->{ct}->{quirks} = 1; |
| 2671 |
|
} else { |
| 2672 |
|
!!!cp (204.1); |
| 2673 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2674 |
|
} |
| 2675 |
|
|
| 2676 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
| 2677 |
redo A; |
redo A; |
| 2678 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2679 |
!!!cp (205); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2680 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (205); |
| 2681 |
|
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2682 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2683 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 2684 |
|
$self->{ct}->{quirks} = 1; |
| 2685 |
|
} else { |
| 2686 |
|
!!!cp (205.1); |
| 2687 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 2688 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2689 |
|
} |
| 2690 |
|
|
| 2691 |
## reconsume |
## reconsume |
| 2692 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2693 |
redo A; |
redo A; |
| 2694 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
| 2695 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
| 2696 |
|
$self->{nc} == 0x005B) { # [ |
| 2697 |
!!!cp (206.1); |
!!!cp (206.1); |
| 2698 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
| 2699 |
|
|
| 2704 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2705 |
redo A; |
redo A; |
| 2706 |
} else { |
} else { |
|
!!!cp (206); |
|
| 2707 |
!!!parse-error (type => 'string after SYSTEM'); |
!!!parse-error (type => 'string after SYSTEM'); |
|
$self->{ct}->{quirks} = 1; |
|
| 2708 |
|
|
| 2709 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2710 |
|
!!!cp (206); |
| 2711 |
|
$self->{ct}->{quirks} = 1; |
| 2712 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 2713 |
|
} else { |
| 2714 |
|
!!!cp (206.2); |
| 2715 |
|
$self->{state} = BOGUS_MD_STATE; |
| 2716 |
|
} |
| 2717 |
|
|
| 2718 |
!!!next-input-character; |
!!!next-input-character; |
| 2719 |
redo A; |
redo A; |
| 2720 |
} |
} |
| 2725 |
!!!next-input-character; |
!!!next-input-character; |
| 2726 |
redo A; |
redo A; |
| 2727 |
} elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # > |
} elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # > |
|
!!!cp (208); |
|
| 2728 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 2729 |
|
|
| 2730 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2731 |
$self->{s_kwd} = ''; |
!!!cp (208); |
| 2732 |
|
$self->{state} = DATA_STATE; |
| 2733 |
|
$self->{s_kwd} = ''; |
| 2734 |
|
$self->{ct}->{quirks} = 1; |
| 2735 |
|
} else { |
| 2736 |
|
!!!cp (208.1); |
| 2737 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2738 |
|
} |
| 2739 |
|
|
| 2740 |
!!!next-input-character; |
!!!next-input-character; |
| 2741 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2742 |
redo A; |
redo A; |
| 2743 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (209); |
|
| 2744 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 2745 |
|
|
| 2746 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2747 |
$self->{s_kwd} = ''; |
!!!cp (209); |
| 2748 |
|
$self->{state} = DATA_STATE; |
| 2749 |
|
$self->{s_kwd} = ''; |
| 2750 |
|
$self->{ct}->{quirks} = 1; |
| 2751 |
|
} else { |
| 2752 |
|
!!!cp (209.1); |
| 2753 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2754 |
|
} |
| 2755 |
|
|
| 2756 |
## reconsume |
## reconsume |
| 2757 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
| 2758 |
redo A; |
redo A; |
| 2759 |
} else { |
} else { |
| 2760 |
!!!cp (210); |
!!!cp (210); |
| 2761 |
$self->{ct}->{sysid} # DOCTYPE |
$self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
| 2762 |
$self->{read_until}->($self->{ct}->{sysid}, q[">], |
$self->{read_until}->($self->{ct}->{sysid}, q[">], |
| 2763 |
length $self->{ct}->{sysid}); |
length $self->{ct}->{sysid}); |
| 2764 |
|
|
| 2785 |
|
|
| 2786 |
redo A; |
redo A; |
| 2787 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (213); |
|
| 2788 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 2789 |
|
|
| 2790 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2791 |
$self->{s_kwd} = ''; |
!!!cp (213); |
| 2792 |
## reconsume |
$self->{state} = DATA_STATE; |
| 2793 |
|
$self->{s_kwd} = ''; |
| 2794 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2795 |
!!!emit ($self->{ct}); # DOCTYPE |
} else { |
| 2796 |
|
!!!cp (213.1); |
| 2797 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2798 |
|
} |
| 2799 |
|
|
| 2800 |
|
## reconsume |
| 2801 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
| 2802 |
redo A; |
redo A; |
| 2803 |
} else { |
} else { |
| 2804 |
!!!cp (214); |
!!!cp (214); |
| 2805 |
$self->{ct}->{sysid} # DOCTYPE |
$self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
| 2806 |
$self->{read_until}->($self->{ct}->{sysid}, q['>], |
$self->{read_until}->($self->{ct}->{sysid}, q['>], |
| 2807 |
length $self->{ct}->{sysid}); |
length $self->{ct}->{sysid}); |
| 2808 |
|
|
| 2817 |
!!!next-input-character; |
!!!next-input-character; |
| 2818 |
redo A; |
redo A; |
| 2819 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2820 |
!!!cp (216); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2821 |
$self->{state} = DATA_STATE; |
!!!cp (216); |
| 2822 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
| 2823 |
!!!next-input-character; |
$self->{s_kwd} = ''; |
| 2824 |
|
} else { |
| 2825 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (216.1); |
| 2826 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2827 |
|
} |
| 2828 |
|
|
| 2829 |
|
!!!next-input-character; |
| 2830 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
| 2831 |
redo A; |
redo A; |
| 2832 |
|
## TODO: "NDATA" |
| 2833 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2834 |
!!!cp (217); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2835 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (217); |
| 2836 |
$self->{state} = DATA_STATE; |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2837 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
| 2838 |
## reconsume |
$self->{s_kwd} = ''; |
| 2839 |
|
$self->{ct}->{quirks} = 1; |
| 2840 |
$self->{ct}->{quirks} = 1; |
} else { |
| 2841 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (217.1); |
| 2842 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 2843 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2844 |
|
} |
| 2845 |
|
|
| 2846 |
|
## reconsume |
| 2847 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
| 2848 |
redo A; |
redo A; |
| 2849 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
| 2850 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
| 2851 |
|
$self->{nc} == 0x005B) { # [ |
| 2852 |
!!!cp (218.1); |
!!!cp (218.1); |
| 2853 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2854 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 2857 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2858 |
redo A; |
redo A; |
| 2859 |
} else { |
} else { |
|
!!!cp (218); |
|
| 2860 |
!!!parse-error (type => 'string after SYSTEM literal'); |
!!!parse-error (type => 'string after SYSTEM literal'); |
|
#$self->{ct}->{quirks} = 1; |
|
| 2861 |
|
|
| 2862 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
| 2863 |
|
!!!cp (218); |
| 2864 |
|
#$self->{ct}->{quirks} = 1; |
| 2865 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 2866 |
|
} else { |
| 2867 |
|
!!!cp (218.2); |
| 2868 |
|
$self->{state} = BOGUS_MD_STATE; |
| 2869 |
|
} |
| 2870 |
|
|
| 2871 |
!!!next-input-character; |
!!!next-input-character; |
| 2872 |
redo A; |
redo A; |
| 2873 |
} |
} |
| 3756 |
} elsif ($self->{kwd} eq 'ATTLIS' and |
} elsif ($self->{kwd} eq 'ATTLIS' and |
| 3757 |
$self->{nc} == 0x0054) { # T |
$self->{nc} == 0x0054) { # T |
| 3758 |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
| 3759 |
|
attrdefs => [], |
| 3760 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 3761 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 6}; |
| 3762 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 3899 |
## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state". |
## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state". |
| 3900 |
|
|
| 3901 |
if ($is_space->{$self->{nc}}) { |
if ($is_space->{$self->{nc}}) { |
| 3902 |
## TODO: |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 3903 |
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
| 3904 |
|
} elsif ($self->{ct}->{type} == ELEMENT_TOKEN) { |
| 3905 |
|
## TODO: ... |
| 3906 |
|
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
| 3907 |
|
} else { # ENTITY/NOTATION |
| 3908 |
|
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
| 3909 |
|
} |
| 3910 |
!!!next-input-character; |
!!!next-input-character; |
| 3911 |
redo A; |
redo A; |
| 3912 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 3913 |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
| 3914 |
# |
# |
| 3915 |
} else { |
} else { |
| 3916 |
!!!parse-error (type => 'no md body'); ## TODO: type |
!!!parse-error (type => 'no md def'); ## TODO: type |
| 3917 |
} |
} |
| 3918 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3919 |
!!!next-input-character; |
!!!next-input-character; |
| 3947 |
## XML5: No parse error. |
## XML5: No parse error. |
| 3948 |
!!!parse-error (type => 'unclosed md'); ## TODO: type |
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 3949 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 3950 |
|
!!!emit ($self->{ct}); |
| 3951 |
redo A; |
redo A; |
| 3952 |
} else { |
} else { |
| 3953 |
## XML5: Not defined yet. |
## XML5: Not defined yet. |
| 3954 |
|
$self->{ca} = {name => chr ($self->{nc}), # attrdef |
| 3955 |
## TODO: ... |
tokens => [], |
| 3956 |
|
line => $self->{line}, column => $self->{column}}; |
| 3957 |
$self->{state} = BOGUS_COMMENT_STATE; |
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE; |
| 3958 |
$self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded |
!!!next-input-character; |
| 3959 |
|
redo A; |
| 3960 |
|
} |
| 3961 |
|
} elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) { |
| 3962 |
|
if ($is_space->{$self->{nc}}) { |
| 3963 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE; |
| 3964 |
|
!!!next-input-character; |
| 3965 |
|
redo A; |
| 3966 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 3967 |
|
## XML5: Same as "anything else". |
| 3968 |
|
!!!parse-error (type => 'no attr type'); ## TODO: type |
| 3969 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3970 |
|
!!!next-input-character; |
| 3971 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 3972 |
|
redo A; |
| 3973 |
|
} elsif ($self->{nc} == 0x0028) { # ( |
| 3974 |
|
## XML5: Same as "anything else". |
| 3975 |
|
!!!parse-error (type => 'no space before paren'); ## TODO: type |
| 3976 |
|
$self->{state} = BEFORE_ALLOWED_TOKEN_STATE; |
| 3977 |
|
!!!next-input-character; |
| 3978 |
|
redo A; |
| 3979 |
|
} elsif ($self->{nc} == -1) { |
| 3980 |
|
## XML5: No parse error. |
| 3981 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 3982 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 3983 |
|
!!!next-input-character; |
| 3984 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 3985 |
|
redo A; |
| 3986 |
|
} else { |
| 3987 |
|
## XML5: Not defined yet. |
| 3988 |
|
$self->{ca}->{name} .= chr $self->{nc}; |
| 3989 |
|
## Stay in the state. |
| 3990 |
|
!!!next-input-character; |
| 3991 |
|
redo A; |
| 3992 |
|
} |
| 3993 |
|
} elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) { |
| 3994 |
|
if ($is_space->{$self->{nc}}) { |
| 3995 |
|
## Stay in the state. |
| 3996 |
|
!!!next-input-character; |
| 3997 |
|
redo A; |
| 3998 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 3999 |
|
## XML5: Same as "anything else". |
| 4000 |
|
!!!parse-error (type => 'no attr type'); ## TODO: type |
| 4001 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4002 |
|
!!!next-input-character; |
| 4003 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4004 |
|
redo A; |
| 4005 |
|
} elsif ($self->{nc} == 0x0028) { # ( |
| 4006 |
|
## XML5: Same as "anything else". |
| 4007 |
|
$self->{state} = BEFORE_ALLOWED_TOKEN_STATE; |
| 4008 |
|
!!!next-input-character; |
| 4009 |
|
redo A; |
| 4010 |
|
} elsif ($self->{nc} == -1) { |
| 4011 |
|
## XML5: No parse error. |
| 4012 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4013 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4014 |
|
!!!next-input-character; |
| 4015 |
|
!!!emit ($self->{ct}); |
| 4016 |
|
redo A; |
| 4017 |
|
} else { |
| 4018 |
|
## XML5: Not defined yet. |
| 4019 |
|
$self->{ca}->{type} = chr $self->{nc}; |
| 4020 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE; |
| 4021 |
|
!!!next-input-character; |
| 4022 |
|
redo A; |
| 4023 |
|
} |
| 4024 |
|
} elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) { |
| 4025 |
|
if ($is_space->{$self->{nc}}) { |
| 4026 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE; |
| 4027 |
|
!!!next-input-character; |
| 4028 |
|
redo A; |
| 4029 |
|
} elsif ($self->{nc} == 0x0023) { # # |
| 4030 |
|
## XML5: Same as "anything else". |
| 4031 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4032 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE; |
| 4033 |
|
!!!next-input-character; |
| 4034 |
|
redo A; |
| 4035 |
|
} elsif ($self->{nc} == 0x0022) { # " |
| 4036 |
|
## XML5: Same as "anything else". |
| 4037 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4038 |
|
$self->{ca}->{value} = ''; |
| 4039 |
|
$self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; |
| 4040 |
|
!!!next-input-character; |
| 4041 |
|
redo A; |
| 4042 |
|
} elsif ($self->{nc} == 0x0027) { # ' |
| 4043 |
|
## XML5: Same as "anything else". |
| 4044 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4045 |
|
$self->{ca}->{value} = ''; |
| 4046 |
|
$self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; |
| 4047 |
|
!!!next-input-character; |
| 4048 |
|
redo A; |
| 4049 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4050 |
|
## XML5: Same as "anything else". |
| 4051 |
|
!!!parse-error (type => 'no attr default'); ## TODO: type |
| 4052 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4053 |
|
!!!next-input-character; |
| 4054 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4055 |
|
redo A; |
| 4056 |
|
} elsif ($self->{nc} == 0x0028) { # ( |
| 4057 |
|
## XML5: Same as "anything else". |
| 4058 |
|
!!!parse-error (type => 'no space before paren'); ## TODO: type |
| 4059 |
|
$self->{state} = BEFORE_ALLOWED_TOKEN_STATE; |
| 4060 |
|
!!!next-input-character; |
| 4061 |
|
redo A; |
| 4062 |
|
} elsif ($self->{nc} == -1) { |
| 4063 |
|
## XML5: No parse error. |
| 4064 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4065 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4066 |
|
!!!next-input-character; |
| 4067 |
|
!!!emit ($self->{ct}); |
| 4068 |
|
redo A; |
| 4069 |
|
} else { |
| 4070 |
|
## XML5: Not defined yet. |
| 4071 |
|
$self->{ca}->{type} .= chr $self->{nc}; |
| 4072 |
|
## Stay in the state. |
| 4073 |
|
!!!next-input-character; |
| 4074 |
|
redo A; |
| 4075 |
|
} |
| 4076 |
|
} elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) { |
| 4077 |
|
if ($is_space->{$self->{nc}}) { |
| 4078 |
|
## Stay in the state. |
| 4079 |
|
!!!next-input-character; |
| 4080 |
|
redo A; |
| 4081 |
|
} elsif ($self->{nc} == 0x0028) { # ( |
| 4082 |
|
## XML5: Same as "anything else". |
| 4083 |
|
$self->{state} = BEFORE_ALLOWED_TOKEN_STATE; |
| 4084 |
|
!!!next-input-character; |
| 4085 |
|
redo A; |
| 4086 |
|
} elsif ($self->{nc} == 0x0023) { # # |
| 4087 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE; |
| 4088 |
|
!!!next-input-character; |
| 4089 |
|
redo A; |
| 4090 |
|
} elsif ($self->{nc} == 0x0022) { # " |
| 4091 |
|
## XML5: Same as "anything else". |
| 4092 |
|
$self->{ca}->{value} = ''; |
| 4093 |
|
$self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; |
| 4094 |
|
!!!next-input-character; |
| 4095 |
|
redo A; |
| 4096 |
|
} elsif ($self->{nc} == 0x0027) { # ' |
| 4097 |
|
## XML5: Same as "anything else". |
| 4098 |
|
$self->{ca}->{value} = ''; |
| 4099 |
|
$self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; |
| 4100 |
|
!!!next-input-character; |
| 4101 |
|
redo A; |
| 4102 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4103 |
|
## XML5: Same as "anything else". |
| 4104 |
|
!!!parse-error (type => 'no attr default'); ## TODO: type |
| 4105 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4106 |
|
!!!next-input-character; |
| 4107 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4108 |
|
redo A; |
| 4109 |
|
} elsif ($self->{nc} == -1) { |
| 4110 |
|
## XML5: No parse error. |
| 4111 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4112 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4113 |
|
!!!next-input-character; |
| 4114 |
|
!!!emit ($self->{ct}); |
| 4115 |
|
redo A; |
| 4116 |
|
} else { |
| 4117 |
|
## XML5: Switch to the "DOCTYPE bogus comment state". |
| 4118 |
|
!!!parse-error (type => 'unquoted attr value'); ## TODO: type |
| 4119 |
|
$self->{ca}->{value} = ''; |
| 4120 |
|
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
| 4121 |
|
## Reconsume. |
| 4122 |
|
redo A; |
| 4123 |
|
} |
| 4124 |
|
} elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) { |
| 4125 |
|
if ($is_space->{$self->{nc}}) { |
| 4126 |
|
## Stay in the state. |
| 4127 |
|
!!!next-input-character; |
| 4128 |
|
redo A; |
| 4129 |
|
} elsif ($self->{nc} == 0x007C) { # | |
| 4130 |
|
!!!parse-error (type => 'empty allowed token'); ## TODO: type |
| 4131 |
|
## Stay in the state. |
| 4132 |
|
!!!next-input-character; |
| 4133 |
|
redo A; |
| 4134 |
|
} elsif ($self->{nc} == 0x0029) { # ) |
| 4135 |
|
!!!parse-error (type => 'empty allowed token'); ## TODO: type |
| 4136 |
|
$self->{state} = AFTER_ALLOWED_TOKENS_STATE; |
| 4137 |
|
!!!next-input-character; |
| 4138 |
|
redo A; |
| 4139 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4140 |
|
!!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type |
| 4141 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4142 |
|
!!!next-input-character; |
| 4143 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4144 |
|
redo A; |
| 4145 |
|
} elsif ($self->{nc} == -1) { |
| 4146 |
|
## XML5: No parse error. |
| 4147 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4148 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4149 |
|
!!!next-input-character; |
| 4150 |
|
!!!emit ($self->{ct}); |
| 4151 |
|
redo A; |
| 4152 |
|
} else { |
| 4153 |
|
push @{$self->{ca}->{tokens}}, chr $self->{nc}; |
| 4154 |
|
$self->{state} = ALLOWED_TOKEN_STATE; |
| 4155 |
|
!!!next-input-character; |
| 4156 |
|
redo A; |
| 4157 |
|
} |
| 4158 |
|
} elsif ($self->{state} == ALLOWED_TOKEN_STATE) { |
| 4159 |
|
if ($is_space->{$self->{nc}}) { |
| 4160 |
|
$self->{state} = AFTER_ALLOWED_TOKEN_STATE; |
| 4161 |
|
!!!next-input-character; |
| 4162 |
|
redo A; |
| 4163 |
|
} elsif ($self->{nc} == 0x007C) { # | |
| 4164 |
|
$self->{state} = BEFORE_ALLOWED_TOKEN_STATE; |
| 4165 |
|
!!!next-input-character; |
| 4166 |
|
redo A; |
| 4167 |
|
} elsif ($self->{nc} == 0x0029) { # ) |
| 4168 |
|
$self->{state} = AFTER_ALLOWED_TOKENS_STATE; |
| 4169 |
|
!!!next-input-character; |
| 4170 |
|
redo A; |
| 4171 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4172 |
|
!!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type |
| 4173 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4174 |
|
!!!next-input-character; |
| 4175 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4176 |
|
redo A; |
| 4177 |
|
} elsif ($self->{nc} == -1) { |
| 4178 |
|
## XML5: No parse error. |
| 4179 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4180 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4181 |
|
!!!next-input-character; |
| 4182 |
|
!!!emit ($self->{ct}); |
| 4183 |
|
redo A; |
| 4184 |
|
} else { |
| 4185 |
|
$self->{ca}->{tokens}->[-1] .= chr $self->{nc}; |
| 4186 |
|
## Stay in the state. |
| 4187 |
|
!!!next-input-character; |
| 4188 |
|
redo A; |
| 4189 |
|
} |
| 4190 |
|
} elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) { |
| 4191 |
|
if ($is_space->{$self->{nc}}) { |
| 4192 |
|
## Stay in the state. |
| 4193 |
|
!!!next-input-character; |
| 4194 |
|
redo A; |
| 4195 |
|
} elsif ($self->{nc} == 0x007C) { # | |
| 4196 |
|
$self->{state} = BEFORE_ALLOWED_TOKEN_STATE; |
| 4197 |
|
!!!next-input-character; |
| 4198 |
|
redo A; |
| 4199 |
|
} elsif ($self->{nc} == 0x0029) { # ) |
| 4200 |
|
$self->{state} = AFTER_ALLOWED_TOKENS_STATE; |
| 4201 |
|
!!!next-input-character; |
| 4202 |
|
redo A; |
| 4203 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4204 |
|
!!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type |
| 4205 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4206 |
|
!!!next-input-character; |
| 4207 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4208 |
|
redo A; |
| 4209 |
|
} elsif ($self->{nc} == -1) { |
| 4210 |
|
## XML5: No parse error. |
| 4211 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4212 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4213 |
|
!!!next-input-character; |
| 4214 |
|
!!!emit ($self->{ct}); |
| 4215 |
|
redo A; |
| 4216 |
|
} else { |
| 4217 |
|
!!!parse-error (type => 'space in allowed token', ## TODO: type |
| 4218 |
|
line => $self->{line_prev}, |
| 4219 |
|
column => $self->{column_prev}); |
| 4220 |
|
$self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc}; |
| 4221 |
|
$self->{state} = ALLOWED_TOKEN_STATE; |
| 4222 |
|
!!!next-input-character; |
| 4223 |
|
redo A; |
| 4224 |
|
} |
| 4225 |
|
} elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) { |
| 4226 |
|
if ($is_space->{$self->{nc}}) { |
| 4227 |
|
$self->{state} = BEFORE_ATTR_DEFAULT_STATE; |
| 4228 |
|
!!!next-input-character; |
| 4229 |
|
redo A; |
| 4230 |
|
} elsif ($self->{nc} == 0x0023) { # # |
| 4231 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4232 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE; |
| 4233 |
|
!!!next-input-character; |
| 4234 |
|
redo A; |
| 4235 |
|
} elsif ($self->{nc} == 0x0022) { # " |
| 4236 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4237 |
|
$self->{ca}->{value} = ''; |
| 4238 |
|
$self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; |
| 4239 |
|
!!!next-input-character; |
| 4240 |
|
redo A; |
| 4241 |
|
} elsif ($self->{nc} == 0x0027) { # ' |
| 4242 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4243 |
|
$self->{ca}->{value} = ''; |
| 4244 |
|
$self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; |
| 4245 |
|
!!!next-input-character; |
| 4246 |
|
redo A; |
| 4247 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4248 |
|
!!!parse-error (type => 'no attr default'); ## TODO: type |
| 4249 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4250 |
|
!!!next-input-character; |
| 4251 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4252 |
|
redo A; |
| 4253 |
|
} elsif ($self->{nc} == -1) { |
| 4254 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4255 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4256 |
|
!!!next-input-character; |
| 4257 |
|
!!!emit ($self->{ct}); |
| 4258 |
|
redo A; |
| 4259 |
|
} else { |
| 4260 |
|
!!!parse-error (type => 'unquoted attr value'); ## TODO: type |
| 4261 |
|
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
| 4262 |
|
## Reconsume. |
| 4263 |
|
redo A; |
| 4264 |
|
} |
| 4265 |
|
} elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) { |
| 4266 |
|
if ($is_space->{$self->{nc}}) { |
| 4267 |
|
## Stay in the state. |
| 4268 |
|
!!!next-input-character; |
| 4269 |
|
redo A; |
| 4270 |
|
} elsif ($self->{nc} == 0x0023) { # # |
| 4271 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE; |
| 4272 |
|
!!!next-input-character; |
| 4273 |
|
redo A; |
| 4274 |
|
} elsif ($self->{nc} == 0x0022) { # " |
| 4275 |
|
$self->{ca}->{value} = ''; |
| 4276 |
|
$self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; |
| 4277 |
|
!!!next-input-character; |
| 4278 |
|
redo A; |
| 4279 |
|
} elsif ($self->{nc} == 0x0027) { # ' |
| 4280 |
|
$self->{ca}->{value} = ''; |
| 4281 |
|
$self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; |
| 4282 |
|
!!!next-input-character; |
| 4283 |
|
redo A; |
| 4284 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4285 |
|
!!!parse-error (type => 'no attr default'); ## TODO: type |
| 4286 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4287 |
|
!!!next-input-character; |
| 4288 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4289 |
|
redo A; |
| 4290 |
|
} elsif ($self->{nc} == -1) { |
| 4291 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4292 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4293 |
|
!!!next-input-character; |
| 4294 |
|
!!!emit ($self->{ct}); |
| 4295 |
|
redo A; |
| 4296 |
|
} else { |
| 4297 |
|
!!!parse-error (type => 'unquoted attr value'); ## TODO: type |
| 4298 |
|
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
| 4299 |
|
## Reconsume. |
| 4300 |
|
redo A; |
| 4301 |
|
} |
| 4302 |
|
} elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) { |
| 4303 |
|
if ($is_space->{$self->{nc}}) { |
| 4304 |
|
## XML5: No parse error. |
| 4305 |
|
!!!parse-error (type => 'no default type'); ## TODO: type |
| 4306 |
|
$self->{state} = BOGUS_MD_STATE; |
| 4307 |
|
## Reconsume. |
| 4308 |
|
redo A; |
| 4309 |
|
} elsif ($self->{nc} == 0x0022) { # " |
| 4310 |
|
## XML5: Same as "anything else". |
| 4311 |
|
$self->{ca}->{value} = ''; |
| 4312 |
|
$self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; |
| 4313 |
|
!!!next-input-character; |
| 4314 |
|
redo A; |
| 4315 |
|
} elsif ($self->{nc} == 0x0027) { # ' |
| 4316 |
|
## XML5: Same as "anything else". |
| 4317 |
|
$self->{ca}->{value} = ''; |
| 4318 |
|
$self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; |
| 4319 |
|
!!!next-input-character; |
| 4320 |
|
redo A; |
| 4321 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4322 |
|
## XML5: Same as "anything else". |
| 4323 |
|
!!!parse-error (type => 'no attr default'); ## TODO: type |
| 4324 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4325 |
|
!!!next-input-character; |
| 4326 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4327 |
|
redo A; |
| 4328 |
|
} elsif ($self->{nc} == -1) { |
| 4329 |
|
## XML5: No parse error. |
| 4330 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4331 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4332 |
|
!!!next-input-character; |
| 4333 |
|
!!!emit ($self->{ct}); |
| 4334 |
|
redo A; |
| 4335 |
|
} else { |
| 4336 |
|
$self->{ca}->{default} = chr $self->{nc}; |
| 4337 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE; |
| 4338 |
|
!!!next-input-character; |
| 4339 |
|
redo A; |
| 4340 |
|
} |
| 4341 |
|
} elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) { |
| 4342 |
|
if ($is_space->{$self->{nc}}) { |
| 4343 |
|
$self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE; |
| 4344 |
|
!!!next-input-character; |
| 4345 |
|
redo A; |
| 4346 |
|
} elsif ($self->{nc} == 0x0022) { # " |
| 4347 |
|
## XML5: Same as "anything else". |
| 4348 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4349 |
|
$self->{ca}->{value} = ''; |
| 4350 |
|
$self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; |
| 4351 |
|
!!!next-input-character; |
| 4352 |
|
redo A; |
| 4353 |
|
} elsif ($self->{nc} == 0x0027) { # ' |
| 4354 |
|
## XML5: Same as "anything else". |
| 4355 |
|
!!!parse-error (type => 'no space before default value'); ## TODO: type |
| 4356 |
|
$self->{ca}->{value} = ''; |
| 4357 |
|
$self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; |
| 4358 |
|
!!!next-input-character; |
| 4359 |
|
redo A; |
| 4360 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4361 |
|
## XML5: Same as "anything else". |
| 4362 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 4363 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4364 |
|
!!!next-input-character; |
| 4365 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4366 |
|
redo A; |
| 4367 |
|
} elsif ($self->{nc} == -1) { |
| 4368 |
|
## XML5: No parse error. |
| 4369 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4370 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 4371 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4372 |
|
!!!next-input-character; |
| 4373 |
|
!!!emit ($self->{ct}); |
| 4374 |
|
redo A; |
| 4375 |
|
} else { |
| 4376 |
|
$self->{ca}->{default} .= chr $self->{nc}; |
| 4377 |
|
## Stay in the state. |
| 4378 |
|
!!!next-input-character; |
| 4379 |
|
redo A; |
| 4380 |
|
} |
| 4381 |
|
} elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) { |
| 4382 |
|
if ($is_space->{$self->{nc}}) { |
| 4383 |
|
## Stay in the state. |
| 4384 |
|
!!!next-input-character; |
| 4385 |
|
redo A; |
| 4386 |
|
} elsif ($self->{nc} == 0x0022) { # " |
| 4387 |
|
$self->{ca}->{value} = ''; |
| 4388 |
|
$self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE; |
| 4389 |
|
!!!next-input-character; |
| 4390 |
|
redo A; |
| 4391 |
|
} elsif ($self->{nc} == 0x0027) { # ' |
| 4392 |
|
$self->{ca}->{value} = ''; |
| 4393 |
|
$self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE; |
| 4394 |
|
!!!next-input-character; |
| 4395 |
|
redo A; |
| 4396 |
|
} elsif ($self->{nc} == 0x003E) { # > |
| 4397 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 4398 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4399 |
|
!!!next-input-character; |
| 4400 |
|
!!!emit ($self->{ct}); # ATTLIST |
| 4401 |
|
redo A; |
| 4402 |
|
} elsif ($self->{nc} == -1) { |
| 4403 |
|
## XML5: No parse error. |
| 4404 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
| 4405 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 4406 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state". |
| 4407 |
|
!!!next-input-character; |
| 4408 |
|
!!!emit ($self->{ct}); |
| 4409 |
|
redo A; |
| 4410 |
|
} else { |
| 4411 |
|
## XML5: Not defined yet. |
| 4412 |
|
if ($self->{ca}->{default} eq 'FIXED') { |
| 4413 |
|
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
| 4414 |
|
} else { |
| 4415 |
|
push @{$self->{ct}->{attrdefs}}, $self->{ca}; |
| 4416 |
|
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
| 4417 |
|
} |
| 4418 |
|
## Reconsume. |
| 4419 |
|
redo A; |
| 4420 |
|
} |
| 4421 |
|
} elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) { |
| 4422 |
|
if ($is_space->{$self->{nc}} or |
| 4423 |
|
$self->{nc} == -1 or |
| 4424 |
|
$self->{nc} == 0x003E) { # > |
| 4425 |
|
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
| 4426 |
|
## Reconsume. |
| 4427 |
|
redo A; |
| 4428 |
|
} else { |
| 4429 |
|
!!!parse-error (type => 'no space before attr name'); ## TODO: type |
| 4430 |
|
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
| 4431 |
## Reconsume. |
## Reconsume. |
| 4432 |
redo A; |
redo A; |
| 4433 |
} |
} |
| 4434 |
|
|
| 4435 |
|
} elsif ($self->{state} == BOGUS_MD_STATE) { |
| 4436 |
|
if ($self->{nc} == 0x003E) { # > |
| 4437 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4438 |
|
!!!next-input-character; |
| 4439 |
|
!!!emit ($self->{ct}); # ATTLIST/ENTITY/NOTATION |
| 4440 |
|
redo A; |
| 4441 |
|
} elsif ($self->{nc} == -1) { |
| 4442 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4443 |
|
## Reconsume. |
| 4444 |
|
!!!emit ($self->{ct}); # ATTLIST/ENTITY/NOTATION |
| 4445 |
|
redo A; |
| 4446 |
|
} else { |
| 4447 |
|
## Stay in the state. |
| 4448 |
|
!!!next-input-character; |
| 4449 |
|
redo A; |
| 4450 |
|
} |
| 4451 |
} else { |
} else { |
| 4452 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
| 4453 |
} |
} |
| 4458 |
|
|
| 4459 |
1; |
1; |
| 4460 |
## $Date$ |
## $Date$ |
| 4461 |
|
|