177 |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 } |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 } |
178 |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 } |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 } |
179 |
sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 } |
sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 } |
180 |
|
sub BOGUS_MD_STATE () { 85 } |
181 |
|
|
182 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
183 |
## list and descriptions) |
## list and descriptions) |
2213 |
!!!next-input-character; |
!!!next-input-character; |
2214 |
redo A; |
redo A; |
2215 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2216 |
!!!cp (166); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2217 |
$self->{state} = DATA_STATE; |
!!!cp (166); |
2218 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
2219 |
|
$self->{s_kwd} = ''; |
2220 |
|
} else { |
2221 |
|
!!!cp (166.1); |
2222 |
|
!!!parse-error (type => 'no md def'); ## TODO: type |
2223 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2224 |
|
} |
2225 |
|
|
2226 |
!!!next-input-character; |
!!!next-input-character; |
2227 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2228 |
redo A; |
redo A; |
2229 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2230 |
!!!cp (167); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2231 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (167); |
2232 |
$self->{state} = DATA_STATE; |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2233 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
2234 |
## reconsume |
$self->{s_kwd} = ''; |
2235 |
|
$self->{ct}->{quirks} = 1; |
2236 |
$self->{ct}->{quirks} = 1; |
} else { |
2237 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (167.12); |
2238 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
2239 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2240 |
|
} |
2241 |
|
|
2242 |
|
## Reconsume. |
2243 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2244 |
redo A; |
redo A; |
2245 |
} elsif ($self->{nc} == 0x0050 or # P |
} elsif ($self->{nc} == 0x0050 or # P |
2246 |
$self->{nc} == 0x0070) { # p |
$self->{nc} == 0x0070) { # p |
2256 |
$self->{kwd} = chr $self->{nc}; |
$self->{kwd} = chr $self->{nc}; |
2257 |
!!!next-input-character; |
!!!next-input-character; |
2258 |
redo A; |
redo A; |
2259 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
## TODO: " and ' for ENTITY |
2260 |
|
} elsif ($self->{is_xml} and |
2261 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
2262 |
|
$self->{nc} == 0x005B) { # [ |
2263 |
!!!cp (167.3); |
!!!cp (167.3); |
2264 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2265 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
2268 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2269 |
redo A; |
redo A; |
2270 |
} else { |
} else { |
2271 |
!!!cp (180); |
!!!parse-error (type => 'string after DOCTYPE name'); ## TODO: type |
2272 |
!!!parse-error (type => 'string after DOCTYPE name'); |
|
2273 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2274 |
|
!!!cp (180); |
2275 |
|
$self->{ct}->{quirks} = 1; |
2276 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
2277 |
|
} else { |
2278 |
|
!!!cp (180.1); |
2279 |
|
$self->{state} = BOGUS_MD_STATE; |
2280 |
|
} |
2281 |
|
|
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
|
2282 |
!!!next-input-character; |
!!!next-input-character; |
2283 |
redo A; |
redo A; |
2284 |
} |
} |
2320 |
!!!next-input-character; |
!!!next-input-character; |
2321 |
redo A; |
redo A; |
2322 |
} else { |
} else { |
2323 |
!!!cp (169); |
!!!parse-error (type => 'string after DOCTYPE name', ## TODO: type |
|
!!!parse-error (type => 'string after DOCTYPE name', |
|
2324 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2325 |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
2326 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2327 |
|
!!!cp (169); |
2328 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{ct}->{quirks} = 1; |
2329 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
2330 |
|
} else { |
2331 |
|
!!!cp (169.1); |
2332 |
|
$self->{state} = BOGUS_MD_STATE; |
2333 |
|
} |
2334 |
## Reconsume. |
## Reconsume. |
2335 |
redo A; |
redo A; |
2336 |
} |
} |
2372 |
!!!next-input-character; |
!!!next-input-character; |
2373 |
redo A; |
redo A; |
2374 |
} else { |
} else { |
2375 |
!!!cp (172); |
!!!parse-error (type => 'string after DOCTYPE name', ## TODO: type |
|
!!!parse-error (type => 'string after DOCTYPE name', |
|
2376 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2377 |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
column => $self->{column_prev} + 1 - length $self->{kwd}); |
2378 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2379 |
|
!!!cp (172); |
2380 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{ct}->{quirks} = 1; |
2381 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
2382 |
|
} else { |
2383 |
|
!!!cp (172.1); |
2384 |
|
$self->{state} = BOGUS_MD_STATE; |
2385 |
|
} |
2386 |
## Reconsume. |
## Reconsume. |
2387 |
redo A; |
redo A; |
2388 |
} |
} |
2405 |
!!!next-input-character; |
!!!next-input-character; |
2406 |
redo A; |
redo A; |
2407 |
} elsif ($self->{nc} eq 0x003E) { # > |
} elsif ($self->{nc} eq 0x003E) { # > |
|
!!!cp (184); |
|
2408 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
2409 |
|
|
2410 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2411 |
$self->{s_kwd} = ''; |
!!!cp (184); |
2412 |
|
$self->{state} = DATA_STATE; |
2413 |
|
$self->{s_kwd} = ''; |
2414 |
|
$self->{ct}->{quirks} = 1; |
2415 |
|
} else { |
2416 |
|
!!!cp (184.1); |
2417 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2418 |
|
} |
2419 |
|
|
2420 |
!!!next-input-character; |
!!!next-input-character; |
2421 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2422 |
redo A; |
redo A; |
2423 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2424 |
!!!cp (185); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2425 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (185); |
2426 |
|
!!!parse-error (type => 'unclosed DOCTYPE'); |
2427 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2428 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
2429 |
|
$self->{ct}->{quirks} = 1; |
2430 |
|
} else { |
2431 |
|
!!!cp (185.1); |
2432 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
2433 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2434 |
|
} |
2435 |
|
|
2436 |
## reconsume |
## reconsume |
|
|
|
|
$self->{ct}->{quirks} = 1; |
|
2437 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
2438 |
redo A; |
redo A; |
2439 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
2440 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
2441 |
|
$self->{nc} == 0x005B) { # [ |
2442 |
!!!cp (186.1); |
!!!cp (186.1); |
2443 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
2444 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2448 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2449 |
redo A; |
redo A; |
2450 |
} else { |
} else { |
|
!!!cp (186); |
|
2451 |
!!!parse-error (type => 'string after PUBLIC'); |
!!!parse-error (type => 'string after PUBLIC'); |
|
$self->{ct}->{quirks} = 1; |
|
2452 |
|
|
2453 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2454 |
|
!!!cp (186); |
2455 |
|
$self->{ct}->{quirks} = 1; |
2456 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
2457 |
|
} else { |
2458 |
|
!!!cp (186.2); |
2459 |
|
$self->{state} = BOGUS_MD_STATE; |
2460 |
|
} |
2461 |
|
|
2462 |
!!!next-input-character; |
!!!next-input-character; |
2463 |
redo A; |
redo A; |
2464 |
} |
} |
2469 |
!!!next-input-character; |
!!!next-input-character; |
2470 |
redo A; |
redo A; |
2471 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
!!!cp (188); |
|
2472 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2473 |
|
|
2474 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2475 |
$self->{s_kwd} = ''; |
!!!cp (188); |
2476 |
!!!next-input-character; |
$self->{state} = DATA_STATE; |
2477 |
|
$self->{s_kwd} = ''; |
2478 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2479 |
!!!emit ($self->{ct}); # DOCTYPE |
} else { |
2480 |
|
!!!cp (188.1); |
2481 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2482 |
|
} |
2483 |
|
|
2484 |
|
!!!next-input-character; |
2485 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2486 |
redo A; |
redo A; |
2487 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (189); |
|
2488 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2489 |
|
|
2490 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2491 |
$self->{s_kwd} = ''; |
!!!cp (189); |
2492 |
## reconsume |
$self->{state} = DATA_STATE; |
2493 |
|
$self->{s_kwd} = ''; |
2494 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2495 |
|
} else { |
2496 |
|
!!!cp (189.1); |
2497 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2498 |
|
} |
2499 |
|
|
2500 |
|
## Reconsume. |
2501 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
2502 |
redo A; |
redo A; |
2503 |
} else { |
} else { |
2504 |
!!!cp (190); |
!!!cp (190); |
2505 |
$self->{ct}->{pubid} # DOCTYPE |
$self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
2506 |
$self->{read_until}->($self->{ct}->{pubid}, q[">], |
$self->{read_until}->($self->{ct}->{pubid}, q[">], |
2507 |
length $self->{ct}->{pubid}); |
length $self->{ct}->{pubid}); |
2508 |
|
|
2517 |
!!!next-input-character; |
!!!next-input-character; |
2518 |
redo A; |
redo A; |
2519 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
!!!cp (192); |
|
2520 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2521 |
|
|
2522 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2523 |
$self->{s_kwd} = ''; |
!!!cp (192); |
2524 |
!!!next-input-character; |
$self->{state} = DATA_STATE; |
2525 |
|
$self->{s_kwd} = ''; |
2526 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2527 |
!!!emit ($self->{ct}); # DOCTYPE |
} else { |
2528 |
|
!!!cp (192.1); |
2529 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2530 |
|
} |
2531 |
|
|
2532 |
|
!!!next-input-character; |
2533 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2534 |
redo A; |
redo A; |
2535 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (193); |
|
2536 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2537 |
|
|
2538 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2539 |
$self->{s_kwd} = ''; |
!!!cp (193); |
2540 |
|
$self->{state} = DATA_STATE; |
2541 |
|
$self->{s_kwd} = ''; |
2542 |
|
$self->{ct}->{quirks} = 1; |
2543 |
|
} else { |
2544 |
|
!!!cp (193.1); |
2545 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2546 |
|
} |
2547 |
|
|
2548 |
## reconsume |
## reconsume |
2549 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2550 |
redo A; |
redo A; |
2551 |
} else { |
} else { |
2552 |
!!!cp (194); |
!!!cp (194); |
2553 |
$self->{ct}->{pubid} # DOCTYPE |
$self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
2554 |
$self->{read_until}->($self->{ct}->{pubid}, q['>], |
$self->{read_until}->($self->{ct}->{pubid}, q['>], |
2555 |
length $self->{ct}->{pubid}); |
length $self->{ct}->{pubid}); |
2556 |
|
|
2566 |
redo A; |
redo A; |
2567 |
} elsif ($self->{nc} == 0x0022) { # " |
} elsif ($self->{nc} == 0x0022) { # " |
2568 |
!!!cp (196); |
!!!cp (196); |
2569 |
$self->{ct}->{sysid} = ''; # DOCTYPE |
$self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION |
2570 |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE; |
2571 |
!!!next-input-character; |
!!!next-input-character; |
2572 |
redo A; |
redo A; |
2573 |
} elsif ($self->{nc} == 0x0027) { # ' |
} elsif ($self->{nc} == 0x0027) { # ' |
2574 |
!!!cp (197); |
!!!cp (197); |
2575 |
$self->{ct}->{sysid} = ''; # DOCTYPE |
$self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION |
2576 |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; |
$self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE; |
2577 |
!!!next-input-character; |
!!!next-input-character; |
2578 |
redo A; |
redo A; |
2579 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2580 |
if ($self->{is_xml}) { |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2581 |
!!!cp (198.1); |
if ($self->{is_xml}) { |
2582 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!cp (198.1); |
2583 |
|
!!!parse-error (type => 'no SYSTEM literal'); |
2584 |
|
} else { |
2585 |
|
!!!cp (198); |
2586 |
|
} |
2587 |
|
$self->{state} = DATA_STATE; |
2588 |
|
$self->{s_kwd} = ''; |
2589 |
} else { |
} else { |
2590 |
!!!cp (198); |
if ($self->{ct}->{type} == NOTATION_TOKEN) { |
2591 |
|
!!!cp (198.2); |
2592 |
|
} else { |
2593 |
|
!!!cp (198.3); |
2594 |
|
!!!parse-error (type => 'no SYSTEM literal'); |
2595 |
|
} |
2596 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2597 |
} |
} |
2598 |
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
2599 |
!!!next-input-character; |
!!!next-input-character; |
2600 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2601 |
redo A; |
redo A; |
2602 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2603 |
!!!cp (199); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2604 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (199); |
2605 |
|
!!!parse-error (type => 'unclosed DOCTYPE'); |
2606 |
$self->{state} = DATA_STATE; |
|
2607 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
2608 |
|
$self->{s_kwd} = ''; |
2609 |
|
$self->{ct}->{quirks} = 1; |
2610 |
|
} else { |
2611 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
2612 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2613 |
|
} |
2614 |
|
|
2615 |
## reconsume |
## reconsume |
2616 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2617 |
redo A; |
redo A; |
2618 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
2619 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
2620 |
|
$self->{nc} == 0x005B) { # [ |
2621 |
!!!cp (200.1); |
!!!cp (200.1); |
2622 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
2623 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2627 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2628 |
redo A; |
redo A; |
2629 |
} else { |
} else { |
|
!!!cp (200); |
|
2630 |
!!!parse-error (type => 'string after PUBLIC literal'); |
!!!parse-error (type => 'string after PUBLIC literal'); |
|
$self->{ct}->{quirks} = 1; |
|
2631 |
|
|
2632 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2633 |
|
!!!cp (200); |
2634 |
|
$self->{ct}->{quirks} = 1; |
2635 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
2636 |
|
} else { |
2637 |
|
!!!cp (200.2); |
2638 |
|
$self->{state} = BOGUS_MD_STATE; |
2639 |
|
} |
2640 |
|
|
2641 |
!!!next-input-character; |
!!!next-input-character; |
2642 |
redo A; |
redo A; |
2643 |
} |
} |
2660 |
!!!next-input-character; |
!!!next-input-character; |
2661 |
redo A; |
redo A; |
2662 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
!!!cp (204); |
|
2663 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
|
$self->{state} = DATA_STATE; |
|
|
$self->{s_kwd} = ''; |
|
2664 |
!!!next-input-character; |
!!!next-input-character; |
2665 |
|
|
2666 |
$self->{ct}->{quirks} = 1; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2667 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (204); |
2668 |
|
$self->{state} = DATA_STATE; |
2669 |
|
$self->{s_kwd} = ''; |
2670 |
|
$self->{ct}->{quirks} = 1; |
2671 |
|
} else { |
2672 |
|
!!!cp (204.1); |
2673 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2674 |
|
} |
2675 |
|
|
2676 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2677 |
redo A; |
redo A; |
2678 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2679 |
!!!cp (205); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2680 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (205); |
2681 |
|
!!!parse-error (type => 'unclosed DOCTYPE'); |
2682 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2683 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
2684 |
|
$self->{ct}->{quirks} = 1; |
2685 |
|
} else { |
2686 |
|
!!!cp (205.1); |
2687 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
2688 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2689 |
|
} |
2690 |
|
|
2691 |
## reconsume |
## reconsume |
2692 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2693 |
redo A; |
redo A; |
2694 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
2695 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
2696 |
|
$self->{nc} == 0x005B) { # [ |
2697 |
!!!cp (206.1); |
!!!cp (206.1); |
2698 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
2699 |
|
|
2704 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2705 |
redo A; |
redo A; |
2706 |
} else { |
} else { |
|
!!!cp (206); |
|
2707 |
!!!parse-error (type => 'string after SYSTEM'); |
!!!parse-error (type => 'string after SYSTEM'); |
|
$self->{ct}->{quirks} = 1; |
|
2708 |
|
|
2709 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2710 |
|
!!!cp (206); |
2711 |
|
$self->{ct}->{quirks} = 1; |
2712 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
2713 |
|
} else { |
2714 |
|
!!!cp (206.2); |
2715 |
|
$self->{state} = BOGUS_MD_STATE; |
2716 |
|
} |
2717 |
|
|
2718 |
!!!next-input-character; |
!!!next-input-character; |
2719 |
redo A; |
redo A; |
2720 |
} |
} |
2725 |
!!!next-input-character; |
!!!next-input-character; |
2726 |
redo A; |
redo A; |
2727 |
} elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # > |
} elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # > |
|
!!!cp (208); |
|
2728 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2729 |
|
|
2730 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2731 |
$self->{s_kwd} = ''; |
!!!cp (208); |
2732 |
|
$self->{state} = DATA_STATE; |
2733 |
|
$self->{s_kwd} = ''; |
2734 |
|
$self->{ct}->{quirks} = 1; |
2735 |
|
} else { |
2736 |
|
!!!cp (208.1); |
2737 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2738 |
|
} |
2739 |
|
|
2740 |
!!!next-input-character; |
!!!next-input-character; |
2741 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2742 |
redo A; |
redo A; |
2743 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (209); |
|
2744 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2745 |
|
|
2746 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2747 |
$self->{s_kwd} = ''; |
!!!cp (209); |
2748 |
|
$self->{state} = DATA_STATE; |
2749 |
|
$self->{s_kwd} = ''; |
2750 |
|
$self->{ct}->{quirks} = 1; |
2751 |
|
} else { |
2752 |
|
!!!cp (209.1); |
2753 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2754 |
|
} |
2755 |
|
|
2756 |
## reconsume |
## reconsume |
2757 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
|
$self->{ct}->{quirks} = 1; |
|
|
!!!emit ($self->{ct}); # DOCTYPE |
|
|
|
|
2758 |
redo A; |
redo A; |
2759 |
} else { |
} else { |
2760 |
!!!cp (210); |
!!!cp (210); |
2761 |
$self->{ct}->{sysid} # DOCTYPE |
$self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
2762 |
$self->{read_until}->($self->{ct}->{sysid}, q[">], |
$self->{read_until}->($self->{ct}->{sysid}, q[">], |
2763 |
length $self->{ct}->{sysid}); |
length $self->{ct}->{sysid}); |
2764 |
|
|
2785 |
|
|
2786 |
redo A; |
redo A; |
2787 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
!!!cp (213); |
|
2788 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2789 |
|
|
2790 |
$self->{state} = DATA_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2791 |
$self->{s_kwd} = ''; |
!!!cp (213); |
2792 |
## reconsume |
$self->{state} = DATA_STATE; |
2793 |
|
$self->{s_kwd} = ''; |
2794 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2795 |
!!!emit ($self->{ct}); # DOCTYPE |
} else { |
2796 |
|
!!!cp (213.1); |
2797 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2798 |
|
} |
2799 |
|
|
2800 |
|
## reconsume |
2801 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2802 |
redo A; |
redo A; |
2803 |
} else { |
} else { |
2804 |
!!!cp (214); |
!!!cp (214); |
2805 |
$self->{ct}->{sysid} # DOCTYPE |
$self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION |
|
.= chr $self->{nc}; |
|
2806 |
$self->{read_until}->($self->{ct}->{sysid}, q['>], |
$self->{read_until}->($self->{ct}->{sysid}, q['>], |
2807 |
length $self->{ct}->{sysid}); |
length $self->{ct}->{sysid}); |
2808 |
|
|
2817 |
!!!next-input-character; |
!!!next-input-character; |
2818 |
redo A; |
redo A; |
2819 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2820 |
!!!cp (216); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2821 |
$self->{state} = DATA_STATE; |
!!!cp (216); |
2822 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
2823 |
!!!next-input-character; |
$self->{s_kwd} = ''; |
2824 |
|
} else { |
2825 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (216.1); |
2826 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2827 |
|
} |
2828 |
|
|
2829 |
|
!!!next-input-character; |
2830 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2831 |
redo A; |
redo A; |
2832 |
|
## TODO: "NDATA" |
2833 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2834 |
!!!cp (217); |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2835 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!cp (217); |
2836 |
$self->{state} = DATA_STATE; |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2837 |
$self->{s_kwd} = ''; |
$self->{state} = DATA_STATE; |
2838 |
## reconsume |
$self->{s_kwd} = ''; |
2839 |
|
$self->{ct}->{quirks} = 1; |
2840 |
$self->{ct}->{quirks} = 1; |
} else { |
2841 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!cp (217.1); |
2842 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
2843 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2844 |
|
} |
2845 |
|
|
2846 |
|
## reconsume |
2847 |
|
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2848 |
redo A; |
redo A; |
2849 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and |
2850 |
|
$self->{ct}->{type} == DOCTYPE_TOKEN and |
2851 |
|
$self->{nc} == 0x005B) { # [ |
2852 |
!!!cp (218.1); |
!!!cp (218.1); |
2853 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2854 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
2857 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2858 |
redo A; |
redo A; |
2859 |
} else { |
} else { |
|
!!!cp (218); |
|
2860 |
!!!parse-error (type => 'string after SYSTEM literal'); |
!!!parse-error (type => 'string after SYSTEM literal'); |
|
#$self->{ct}->{quirks} = 1; |
|
2861 |
|
|
2862 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2863 |
|
!!!cp (218); |
2864 |
|
#$self->{ct}->{quirks} = 1; |
2865 |
|
$self->{state} = BOGUS_DOCTYPE_STATE; |
2866 |
|
} else { |
2867 |
|
!!!cp (218.2); |
2868 |
|
$self->{state} = BOGUS_MD_STATE; |
2869 |
|
} |
2870 |
|
|
2871 |
!!!next-input-character; |
!!!next-input-character; |
2872 |
redo A; |
redo A; |
2873 |
} |
} |
3899 |
## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state". |
## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state". |
3900 |
|
|
3901 |
if ($is_space->{$self->{nc}}) { |
if ($is_space->{$self->{nc}}) { |
3902 |
## TODO: |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
3903 |
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
3904 |
|
} elsif ($self->{ct}->{type} == ELEMENT_TOKEN) { |
3905 |
|
## TODO: ... |
3906 |
|
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
3907 |
|
} else { # ENTITY/NOTATION |
3908 |
|
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
3909 |
|
} |
3910 |
!!!next-input-character; |
!!!next-input-character; |
3911 |
redo A; |
redo A; |
3912 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
3913 |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
if ($self->{ct}->{type} == ATTLIST_TOKEN) { |
3914 |
# |
# |
3915 |
} else { |
} else { |
3916 |
!!!parse-error (type => 'no md body'); ## TODO: type |
!!!parse-error (type => 'no md def'); ## TODO: type |
3917 |
} |
} |
3918 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
3919 |
!!!next-input-character; |
!!!next-input-character; |
4303 |
if ($is_space->{$self->{nc}}) { |
if ($is_space->{$self->{nc}}) { |
4304 |
## XML5: No parse error. |
## XML5: No parse error. |
4305 |
!!!parse-error (type => 'no default type'); ## TODO: type |
!!!parse-error (type => 'no default type'); ## TODO: type |
4306 |
$self->{state} = BOGUS_COMMENT_STATE; |
$self->{state} = BOGUS_MD_STATE; |
|
$self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded |
|
4307 |
## Reconsume. |
## Reconsume. |
4308 |
redo A; |
redo A; |
4309 |
} elsif ($self->{nc} == 0x0022) { # " |
} elsif ($self->{nc} == 0x0022) { # " |
4430 |
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
$self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE; |
4431 |
## Reconsume. |
## Reconsume. |
4432 |
redo A; |
redo A; |
4433 |
} |
} |
4434 |
|
|
4435 |
|
} elsif ($self->{state} == BOGUS_MD_STATE) { |
4436 |
|
if ($self->{nc} == 0x003E) { # > |
4437 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4438 |
|
!!!next-input-character; |
4439 |
|
!!!emit ($self->{ct}); # ATTLIST/ENTITY/NOTATION |
4440 |
|
redo A; |
4441 |
|
} elsif ($self->{nc} == -1) { |
4442 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4443 |
|
## Reconsume. |
4444 |
|
!!!emit ($self->{ct}); # ATTLIST/ENTITY/NOTATION |
4445 |
|
redo A; |
4446 |
|
} else { |
4447 |
|
## Stay in the state. |
4448 |
|
!!!next-input-character; |
4449 |
|
redo A; |
4450 |
|
} |
4451 |
} else { |
} else { |
4452 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
4453 |
} |
} |