| 114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
| 115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
| 116 |
|
|
| 117 |
|
## XML states |
| 118 |
|
sub PI_STATE () { 51 } |
| 119 |
|
sub PI_TARGET_STATE () { 52 } |
| 120 |
|
sub PI_TARGET_AFTER_STATE () { 53 } |
| 121 |
|
sub PI_DATA_STATE () { 54 } |
| 122 |
|
sub PI_AFTER_STATE () { 55 } |
| 123 |
|
sub PI_DATA_AFTER_STATE () { 56 } |
| 124 |
|
|
| 125 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
| 126 |
## list and descriptions) |
## list and descriptions) |
| 127 |
|
|
| 183 |
#$self->{level} |
#$self->{level} |
| 184 |
#$self->{set_nc} |
#$self->{set_nc} |
| 185 |
#$self->{parse_error} |
#$self->{parse_error} |
| 186 |
|
#$self->{is_xml} (if XML) |
| 187 |
|
|
| 188 |
$self->{state} = DATA_STATE; # MUST |
$self->{state} = DATA_STATE; # MUST |
| 189 |
#$self->{s_kwd}; # state keyword - initialized when used |
$self->{s_kwd} = ''; # state keyword |
| 190 |
#$self->{entity__value}; # initialized when used |
#$self->{entity__value}; # initialized when used |
| 191 |
#$self->{entity__match}; # initialized when used |
#$self->{entity__match}; # initialized when used |
| 192 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
| 217 |
## ->{value} |
## ->{value} |
| 218 |
## ->{has_reference} == 1 or 0 |
## ->{has_reference} == 1 or 0 |
| 219 |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
| 220 |
|
## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN) |
| 221 |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
| 222 |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
| 223 |
## while the token is pushed back to the stack. |
## while the token is pushed back to the stack. |
| 321 |
} |
} |
| 322 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 323 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 324 |
$self->{s_kwd} .= '-'; |
if ($self->{s_kwd} eq '<!-') { |
|
|
|
|
if ($self->{s_kwd} eq '<!--') { |
|
| 325 |
!!!cp (3); |
!!!cp (3); |
| 326 |
$self->{escape} = 1; # unless $self->{escape}; |
$self->{escape} = 1; # unless $self->{escape}; |
| 327 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
| 328 |
# |
# |
| 329 |
} elsif ($self->{s_kwd} eq '---') { |
} elsif ($self->{s_kwd} eq '-') { |
| 330 |
!!!cp (4); |
!!!cp (4); |
| 331 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
| 332 |
# |
# |
| 333 |
|
} elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') { |
| 334 |
|
!!!cp (4.1); |
| 335 |
|
$self->{s_kwd} .= '-'; |
| 336 |
|
# |
| 337 |
} else { |
} else { |
| 338 |
!!!cp (5); |
!!!cp (5); |
| 339 |
|
$self->{s_kwd} = '-'; |
| 340 |
# |
# |
| 341 |
} |
} |
| 342 |
} |
} |
| 372 |
if ($self->{s_kwd} eq '--') { |
if ($self->{s_kwd} eq '--') { |
| 373 |
!!!cp (8); |
!!!cp (8); |
| 374 |
delete $self->{escape}; |
delete $self->{escape}; |
| 375 |
|
# |
| 376 |
} else { |
} else { |
| 377 |
!!!cp (9); |
!!!cp (9); |
| 378 |
|
# |
| 379 |
} |
} |
| 380 |
|
} elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') { |
| 381 |
|
!!!cp (9.1); |
| 382 |
|
!!!parse-error (type => 'unmatched mse', ## TODO: type |
| 383 |
|
line => $self->{line_prev}, |
| 384 |
|
column => $self->{column_prev} - 1); |
| 385 |
|
# |
| 386 |
} else { |
} else { |
| 387 |
!!!cp (10); |
!!!cp (10); |
| 388 |
|
# |
| 389 |
} |
} |
| 390 |
|
|
| 391 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 392 |
# |
# |
| 393 |
|
} elsif ($self->{nc} == 0x005D) { # ] |
| 394 |
|
if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') { |
| 395 |
|
!!!cp (10.1); |
| 396 |
|
$self->{s_kwd} .= ']'; |
| 397 |
|
} elsif ($self->{s_kwd} eq ']]') { |
| 398 |
|
!!!cp (10.2); |
| 399 |
|
# |
| 400 |
|
} else { |
| 401 |
|
!!!cp (10.3); |
| 402 |
|
$self->{s_kwd} = ''; |
| 403 |
|
} |
| 404 |
|
# |
| 405 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 406 |
!!!cp (11); |
!!!cp (11); |
| 407 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 419 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
| 420 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
| 421 |
}; |
}; |
| 422 |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
if ($self->{read_until}->($token->{data}, q{-!<>&\]}, |
| 423 |
length $token->{data})) { |
length $token->{data})) { |
| 424 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 425 |
} |
} |
| 426 |
|
|
| 427 |
## Stay in the data state. |
## Stay in the data state. |
| 428 |
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
if (not $self->{is_xml} and |
| 429 |
|
$self->{content_model} == PCDATA_CONTENT_MODEL) { |
| 430 |
!!!cp (13); |
!!!cp (13); |
| 431 |
$self->{state} = PCDATA_STATE; |
$self->{state} = PCDATA_STATE; |
| 432 |
} else { |
} else { |
| 454 |
|
|
| 455 |
## reconsume |
## reconsume |
| 456 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 457 |
|
$self->{s_kwd} = ''; |
| 458 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
| 459 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 460 |
column => $self->{column_prev}, |
column => $self->{column_prev}, |
| 476 |
!!!cp (19); |
!!!cp (19); |
| 477 |
$self->{ct} |
$self->{ct} |
| 478 |
= {type => START_TAG_TOKEN, |
= {type => START_TAG_TOKEN, |
| 479 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 480 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 481 |
column => $self->{column_prev}}; |
column => $self->{column_prev}}; |
| 482 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
| 498 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 499 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 500 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 501 |
|
$self->{s_kwd} = ''; |
| 502 |
!!!next-input-character; |
!!!next-input-character; |
| 503 |
|
|
| 504 |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
| 508 |
|
|
| 509 |
redo A; |
redo A; |
| 510 |
} elsif ($self->{nc} == 0x003F) { # ? |
} elsif ($self->{nc} == 0x003F) { # ? |
| 511 |
!!!cp (22); |
if ($self->{is_xml}) { |
| 512 |
!!!parse-error (type => 'pio', |
!!!cp (22.1); |
| 513 |
line => $self->{line_prev}, |
$self->{state} = PI_STATE; |
| 514 |
column => $self->{column_prev}); |
!!!next-input-character; |
| 515 |
$self->{state} = BOGUS_COMMENT_STATE; |
redo A; |
| 516 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
} else { |
| 517 |
line => $self->{line_prev}, |
!!!cp (22); |
| 518 |
column => $self->{column_prev}, |
!!!parse-error (type => 'pio', |
| 519 |
}; |
line => $self->{line_prev}, |
| 520 |
## $self->{nc} is intentionally left as is |
column => $self->{column_prev}); |
| 521 |
redo A; |
$self->{state} = BOGUS_COMMENT_STATE; |
| 522 |
} else { |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
| 523 |
|
line => $self->{line_prev}, |
| 524 |
|
column => $self->{column_prev}, |
| 525 |
|
}; |
| 526 |
|
## $self->{nc} is intentionally left as is |
| 527 |
|
redo A; |
| 528 |
|
} |
| 529 |
|
} elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) { |
| 530 |
!!!cp (23); |
!!!cp (23); |
| 531 |
!!!parse-error (type => 'bare stago', |
!!!parse-error (type => 'bare stago', |
| 532 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 533 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 534 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 535 |
|
$self->{s_kwd} = ''; |
| 536 |
## reconsume |
## reconsume |
| 537 |
|
|
| 538 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
| 541 |
}); |
}); |
| 542 |
|
|
| 543 |
redo A; |
redo A; |
| 544 |
|
} else { |
| 545 |
|
## XML5: "<:" is a parse error. |
| 546 |
|
!!!cp (23.1); |
| 547 |
|
$self->{ct} = {type => START_TAG_TOKEN, |
| 548 |
|
tag_name => chr ($self->{nc}), |
| 549 |
|
line => $self->{line_prev}, |
| 550 |
|
column => $self->{column_prev}}; |
| 551 |
|
$self->{state} = TAG_NAME_STATE; |
| 552 |
|
!!!next-input-character; |
| 553 |
|
redo A; |
| 554 |
} |
} |
| 555 |
} else { |
} else { |
| 556 |
die "$0: $self->{content_model} in tag open"; |
die "$0: $self->{content_model} in tag open"; |
| 571 |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
| 572 |
!!!cp (28); |
!!!cp (28); |
| 573 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 574 |
|
$self->{s_kwd} = ''; |
| 575 |
## Reconsume. |
## Reconsume. |
| 576 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
| 577 |
line => $l, column => $c, |
line => $l, column => $c, |
| 585 |
!!!cp (29); |
!!!cp (29); |
| 586 |
$self->{ct} |
$self->{ct} |
| 587 |
= {type => END_TAG_TOKEN, |
= {type => END_TAG_TOKEN, |
| 588 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 589 |
line => $l, column => $c}; |
line => $l, column => $c}; |
| 590 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
| 591 |
!!!next-input-character; |
!!!next-input-character; |
| 605 |
line => $self->{line_prev}, ## "<" in "</>" |
line => $self->{line_prev}, ## "<" in "</>" |
| 606 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
| 607 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 608 |
|
$self->{s_kwd} = ''; |
| 609 |
!!!next-input-character; |
!!!next-input-character; |
| 610 |
redo A; |
redo A; |
| 611 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 612 |
!!!cp (32); |
!!!cp (32); |
| 613 |
!!!parse-error (type => 'bare etago'); |
!!!parse-error (type => 'bare etago'); |
| 614 |
|
$self->{s_kwd} = ''; |
| 615 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 616 |
# reconsume |
# reconsume |
| 617 |
|
|
| 651 |
} else { |
} else { |
| 652 |
!!!cp (25); |
!!!cp (25); |
| 653 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 654 |
|
$self->{s_kwd} = ''; |
| 655 |
## Reconsume. |
## Reconsume. |
| 656 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 657 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
| 670 |
!!!cp (26); |
!!!cp (26); |
| 671 |
## Reconsume. |
## Reconsume. |
| 672 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 673 |
|
$self->{s_kwd} = ''; |
| 674 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 675 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
| 676 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 712 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 713 |
} |
} |
| 714 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 715 |
|
$self->{s_kwd} = ''; |
| 716 |
!!!next-input-character; |
!!!next-input-character; |
| 717 |
|
|
| 718 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 721 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
| 722 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 723 |
!!!cp (38); |
!!!cp (38); |
| 724 |
$self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020); |
$self->{ct}->{tag_name} |
| 725 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 726 |
# start tag or end tag |
# start tag or end tag |
| 727 |
## Stay in this state |
## Stay in this state |
| 728 |
!!!next-input-character; |
!!!next-input-character; |
| 745 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 746 |
} |
} |
| 747 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 748 |
|
$self->{s_kwd} = ''; |
| 749 |
# reconsume |
# reconsume |
| 750 |
|
|
| 751 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 786 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 787 |
} |
} |
| 788 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 789 |
|
$self->{s_kwd} = ''; |
| 790 |
!!!next-input-character; |
!!!next-input-character; |
| 791 |
|
|
| 792 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 796 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 797 |
!!!cp (49); |
!!!cp (49); |
| 798 |
$self->{ca} |
$self->{ca} |
| 799 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 800 |
value => '', |
value => '', |
| 801 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
| 802 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 824 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 825 |
} |
} |
| 826 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 827 |
|
$self->{s_kwd} = ''; |
| 828 |
# reconsume |
# reconsume |
| 829 |
|
|
| 830 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 890 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 891 |
} |
} |
| 892 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 893 |
|
$self->{s_kwd} = ''; |
| 894 |
!!!next-input-character; |
!!!next-input-character; |
| 895 |
|
|
| 896 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 899 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
| 900 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 901 |
!!!cp (63); |
!!!cp (63); |
| 902 |
$self->{ca}->{name} .= chr ($self->{nc} + 0x0020); |
$self->{ca}->{name} |
| 903 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 904 |
## Stay in the state |
## Stay in the state |
| 905 |
!!!next-input-character; |
!!!next-input-character; |
| 906 |
redo A; |
redo A; |
| 929 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 930 |
} |
} |
| 931 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 932 |
|
$self->{s_kwd} = ''; |
| 933 |
# reconsume |
# reconsume |
| 934 |
|
|
| 935 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 976 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 977 |
} |
} |
| 978 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 979 |
|
$self->{s_kwd} = ''; |
| 980 |
!!!next-input-character; |
!!!next-input-character; |
| 981 |
|
|
| 982 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 986 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 987 |
!!!cp (76); |
!!!cp (76); |
| 988 |
$self->{ca} |
$self->{ca} |
| 989 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 990 |
value => '', |
value => '', |
| 991 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
| 992 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 1014 |
} else { |
} else { |
| 1015 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1016 |
} |
} |
| 1017 |
|
$self->{s_kwd} = ''; |
| 1018 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1019 |
# reconsume |
# reconsume |
| 1020 |
|
|
| 1076 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1077 |
} |
} |
| 1078 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1079 |
|
$self->{s_kwd} = ''; |
| 1080 |
!!!next-input-character; |
!!!next-input-character; |
| 1081 |
|
|
| 1082 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1100 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1101 |
} |
} |
| 1102 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1103 |
|
$self->{s_kwd} = ''; |
| 1104 |
## reconsume |
## reconsume |
| 1105 |
|
|
| 1106 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1153 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1154 |
} |
} |
| 1155 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1156 |
|
$self->{s_kwd} = ''; |
| 1157 |
## reconsume |
## reconsume |
| 1158 |
|
|
| 1159 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1205 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1206 |
} |
} |
| 1207 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1208 |
|
$self->{s_kwd} = ''; |
| 1209 |
## reconsume |
## reconsume |
| 1210 |
|
|
| 1211 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1256 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1257 |
} |
} |
| 1258 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1259 |
|
$self->{s_kwd} = ''; |
| 1260 |
!!!next-input-character; |
!!!next-input-character; |
| 1261 |
|
|
| 1262 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1280 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1281 |
} |
} |
| 1282 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1283 |
|
$self->{s_kwd} = ''; |
| 1284 |
## reconsume |
## reconsume |
| 1285 |
|
|
| 1286 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1329 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1330 |
} |
} |
| 1331 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1332 |
|
$self->{s_kwd} = ''; |
| 1333 |
!!!next-input-character; |
!!!next-input-character; |
| 1334 |
|
|
| 1335 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1357 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1358 |
} |
} |
| 1359 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1360 |
|
$self->{s_kwd} = ''; |
| 1361 |
## Reconsume. |
## Reconsume. |
| 1362 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1363 |
redo A; |
redo A; |
| 1388 |
} |
} |
| 1389 |
|
|
| 1390 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1391 |
|
$self->{s_kwd} = ''; |
| 1392 |
!!!next-input-character; |
!!!next-input-character; |
| 1393 |
|
|
| 1394 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1411 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1412 |
} |
} |
| 1413 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1414 |
|
$self->{s_kwd} = ''; |
| 1415 |
## Reconsume. |
## Reconsume. |
| 1416 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
| 1417 |
redo A; |
redo A; |
| 1432 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 1433 |
!!!cp (124); |
!!!cp (124); |
| 1434 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1435 |
|
$self->{s_kwd} = ''; |
| 1436 |
!!!next-input-character; |
!!!next-input-character; |
| 1437 |
|
|
| 1438 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1440 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1441 |
!!!cp (125); |
!!!cp (125); |
| 1442 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1443 |
|
$self->{s_kwd} = ''; |
| 1444 |
## reconsume |
## reconsume |
| 1445 |
|
|
| 1446 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1472 |
$self->{s_kwd} = chr $self->{nc}; |
$self->{s_kwd} = chr $self->{nc}; |
| 1473 |
!!!next-input-character; |
!!!next-input-character; |
| 1474 |
redo A; |
redo A; |
| 1475 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
} elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
| 1476 |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL and |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL) or |
| 1477 |
|
$self->{is_xml}) and |
| 1478 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
| 1479 |
!!!cp (135.4); |
!!!cp (135.4); |
| 1480 |
$self->{state} = MD_CDATA_STATE; |
$self->{state} = MD_CDATA_STATE; |
| 1583 |
redo A; |
redo A; |
| 1584 |
} elsif ($self->{s_kwd} eq '[CDATA' and |
} elsif ($self->{s_kwd} eq '[CDATA' and |
| 1585 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
| 1586 |
!!!cp (135.2); |
if ($self->{is_xml} and |
| 1587 |
|
not $self->{tainted} and |
| 1588 |
|
@{$self->{open_elements} or []} == 0) { |
| 1589 |
|
!!!cp (135.2); |
| 1590 |
|
!!!parse-error (type => 'cdata outside of root element', |
| 1591 |
|
line => $self->{line_prev}, |
| 1592 |
|
column => $self->{column_prev} - 7); |
| 1593 |
|
$self->{tainted} = 1; |
| 1594 |
|
} else { |
| 1595 |
|
!!!cp (135.21); |
| 1596 |
|
} |
| 1597 |
|
|
| 1598 |
$self->{ct} = {type => CHARACTER_TOKEN, |
$self->{ct} = {type => CHARACTER_TOKEN, |
| 1599 |
data => '', |
data => '', |
| 1600 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 1626 |
!!!cp (138); |
!!!cp (138); |
| 1627 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
| 1628 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1629 |
|
$self->{s_kwd} = ''; |
| 1630 |
!!!next-input-character; |
!!!next-input-character; |
| 1631 |
|
|
| 1632 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1636 |
!!!cp (139); |
!!!cp (139); |
| 1637 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
| 1638 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1639 |
|
$self->{s_kwd} = ''; |
| 1640 |
## reconsume |
## reconsume |
| 1641 |
|
|
| 1642 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1660 |
!!!cp (142); |
!!!cp (142); |
| 1661 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
| 1662 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1663 |
|
$self->{s_kwd} = ''; |
| 1664 |
!!!next-input-character; |
!!!next-input-character; |
| 1665 |
|
|
| 1666 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1670 |
!!!cp (143); |
!!!cp (143); |
| 1671 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
| 1672 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1673 |
|
$self->{s_kwd} = ''; |
| 1674 |
## reconsume |
## reconsume |
| 1675 |
|
|
| 1676 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1694 |
!!!cp (146); |
!!!cp (146); |
| 1695 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
| 1696 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1697 |
|
$self->{s_kwd} = ''; |
| 1698 |
## reconsume |
## reconsume |
| 1699 |
|
|
| 1700 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1720 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1721 |
!!!cp (149); |
!!!cp (149); |
| 1722 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
| 1723 |
|
$self->{s_kwd} = ''; |
| 1724 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1725 |
|
$self->{s_kwd} = ''; |
| 1726 |
## reconsume |
## reconsume |
| 1727 |
|
|
| 1728 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1739 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 1740 |
!!!cp (151); |
!!!cp (151); |
| 1741 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1742 |
|
$self->{s_kwd} = ''; |
| 1743 |
!!!next-input-character; |
!!!next-input-character; |
| 1744 |
|
|
| 1745 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1758 |
!!!cp (153); |
!!!cp (153); |
| 1759 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
| 1760 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1761 |
|
$self->{s_kwd} = ''; |
| 1762 |
## reconsume |
## reconsume |
| 1763 |
|
|
| 1764 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
| 1797 |
!!!cp (158); |
!!!cp (158); |
| 1798 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
| 1799 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1800 |
|
$self->{s_kwd} = ''; |
| 1801 |
!!!next-input-character; |
!!!next-input-character; |
| 1802 |
|
|
| 1803 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
| 1807 |
!!!cp (159); |
!!!cp (159); |
| 1808 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
| 1809 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1810 |
|
$self->{s_kwd} = ''; |
| 1811 |
## reconsume |
## reconsume |
| 1812 |
|
|
| 1813 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
| 1831 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 1832 |
!!!cp (162); |
!!!cp (162); |
| 1833 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1834 |
|
$self->{s_kwd} = ''; |
| 1835 |
!!!next-input-character; |
!!!next-input-character; |
| 1836 |
|
|
| 1837 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 1841 |
!!!cp (163); |
!!!cp (163); |
| 1842 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 1843 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1844 |
|
$self->{s_kwd} = ''; |
| 1845 |
## reconsume |
## reconsume |
| 1846 |
|
|
| 1847 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 1865 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 1866 |
!!!cp (166); |
!!!cp (166); |
| 1867 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1868 |
|
$self->{s_kwd} = ''; |
| 1869 |
!!!next-input-character; |
!!!next-input-character; |
| 1870 |
|
|
| 1871 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 1875 |
!!!cp (167); |
!!!cp (167); |
| 1876 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 1877 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1878 |
|
$self->{s_kwd} = ''; |
| 1879 |
## reconsume |
## reconsume |
| 1880 |
|
|
| 1881 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2004 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
| 2005 |
|
|
| 2006 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2007 |
|
$self->{s_kwd} = ''; |
| 2008 |
!!!next-input-character; |
!!!next-input-character; |
| 2009 |
|
|
| 2010 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2016 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2017 |
|
|
| 2018 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2019 |
|
$self->{s_kwd} = ''; |
| 2020 |
## reconsume |
## reconsume |
| 2021 |
|
|
| 2022 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2043 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2044 |
|
|
| 2045 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2046 |
|
$self->{s_kwd} = ''; |
| 2047 |
!!!next-input-character; |
!!!next-input-character; |
| 2048 |
|
|
| 2049 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2055 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2056 |
|
|
| 2057 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2058 |
|
$self->{s_kwd} = ''; |
| 2059 |
## reconsume |
## reconsume |
| 2060 |
|
|
| 2061 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2084 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2085 |
|
|
| 2086 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2087 |
|
$self->{s_kwd} = ''; |
| 2088 |
!!!next-input-character; |
!!!next-input-character; |
| 2089 |
|
|
| 2090 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2096 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 2097 |
|
|
| 2098 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2099 |
|
$self->{s_kwd} = ''; |
| 2100 |
## reconsume |
## reconsume |
| 2101 |
|
|
| 2102 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2135 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2136 |
!!!cp (198); |
!!!cp (198); |
| 2137 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2138 |
|
$self->{s_kwd} = ''; |
| 2139 |
!!!next-input-character; |
!!!next-input-character; |
| 2140 |
|
|
| 2141 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2146 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2147 |
|
|
| 2148 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2149 |
|
$self->{s_kwd} = ''; |
| 2150 |
## reconsume |
## reconsume |
| 2151 |
|
|
| 2152 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2184 |
!!!cp (204); |
!!!cp (204); |
| 2185 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
| 2186 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2187 |
|
$self->{s_kwd} = ''; |
| 2188 |
!!!next-input-character; |
!!!next-input-character; |
| 2189 |
|
|
| 2190 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2196 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2197 |
|
|
| 2198 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2199 |
|
$self->{s_kwd} = ''; |
| 2200 |
## reconsume |
## reconsume |
| 2201 |
|
|
| 2202 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2223 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 2224 |
|
|
| 2225 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2226 |
|
$self->{s_kwd} = ''; |
| 2227 |
!!!next-input-character; |
!!!next-input-character; |
| 2228 |
|
|
| 2229 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2235 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 2236 |
|
|
| 2237 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2238 |
|
$self->{s_kwd} = ''; |
| 2239 |
## reconsume |
## reconsume |
| 2240 |
|
|
| 2241 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2264 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 2265 |
|
|
| 2266 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2267 |
|
$self->{s_kwd} = ''; |
| 2268 |
!!!next-input-character; |
!!!next-input-character; |
| 2269 |
|
|
| 2270 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2276 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 2277 |
|
|
| 2278 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2279 |
|
$self->{s_kwd} = ''; |
| 2280 |
## reconsume |
## reconsume |
| 2281 |
|
|
| 2282 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2303 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2304 |
!!!cp (216); |
!!!cp (216); |
| 2305 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2306 |
|
$self->{s_kwd} = ''; |
| 2307 |
!!!next-input-character; |
!!!next-input-character; |
| 2308 |
|
|
| 2309 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2313 |
!!!cp (217); |
!!!cp (217); |
| 2314 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
| 2315 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2316 |
|
$self->{s_kwd} = ''; |
| 2317 |
## reconsume |
## reconsume |
| 2318 |
|
|
| 2319 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2333 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2334 |
!!!cp (219); |
!!!cp (219); |
| 2335 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2336 |
|
$self->{s_kwd} = ''; |
| 2337 |
!!!next-input-character; |
!!!next-input-character; |
| 2338 |
|
|
| 2339 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2342 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2343 |
!!!cp (220); |
!!!cp (220); |
| 2344 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2345 |
|
$self->{s_kwd} = ''; |
| 2346 |
## reconsume |
## reconsume |
| 2347 |
|
|
| 2348 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
| 2368 |
!!!next-input-character; |
!!!next-input-character; |
| 2369 |
redo A; |
redo A; |
| 2370 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2371 |
|
if ($self->{is_xml}) { |
| 2372 |
|
!!!cp (221.11); |
| 2373 |
|
!!!parse-error (type => 'no mse'); ## TODO: type |
| 2374 |
|
} else { |
| 2375 |
|
!!!cp (221.12); |
| 2376 |
|
} |
| 2377 |
|
|
| 2378 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2379 |
|
$self->{s_kwd} = ''; |
| 2380 |
!!!next-input-character; |
!!!next-input-character; |
| 2381 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
| 2382 |
!!!cp (221.2); |
!!!cp (221.2); |
| 2415 |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
| 2416 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2417 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2418 |
|
$self->{s_kwd} = ''; |
| 2419 |
!!!next-input-character; |
!!!next-input-character; |
| 2420 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
| 2421 |
!!!cp (221.7); |
!!!cp (221.7); |
| 2483 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 2484 |
!!!cp (997); |
!!!cp (997); |
| 2485 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2486 |
|
$self->{s_kwd} = ''; |
| 2487 |
## Reconsume. |
## Reconsume. |
| 2488 |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
| 2489 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2494 |
!!!cp (996); |
!!!cp (996); |
| 2495 |
$self->{ca}->{value} .= '&'; |
$self->{ca}->{value} .= '&'; |
| 2496 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2497 |
|
$self->{s_kwd} = ''; |
| 2498 |
## Reconsume. |
## Reconsume. |
| 2499 |
redo A; |
redo A; |
| 2500 |
} |
} |
| 2525 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 2526 |
!!!cp (1019); |
!!!cp (1019); |
| 2527 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2528 |
|
$self->{s_kwd} = ''; |
| 2529 |
## Reconsume. |
## Reconsume. |
| 2530 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 2531 |
data => '&#', |
data => '&#', |
| 2537 |
!!!cp (993); |
!!!cp (993); |
| 2538 |
$self->{ca}->{value} .= '&#'; |
$self->{ca}->{value} .= '&#'; |
| 2539 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2540 |
|
$self->{s_kwd} = ''; |
| 2541 |
## Reconsume. |
## Reconsume. |
| 2542 |
redo A; |
redo A; |
| 2543 |
} |
} |
| 2583 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 2584 |
!!!cp (992); |
!!!cp (992); |
| 2585 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2586 |
|
$self->{s_kwd} = ''; |
| 2587 |
## Reconsume. |
## Reconsume. |
| 2588 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
| 2589 |
|
has_reference => 1, |
| 2590 |
line => $l, column => $c, |
line => $l, column => $c, |
| 2591 |
}); |
}); |
| 2592 |
redo A; |
redo A; |
| 2595 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
| 2596 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
| 2597 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2598 |
|
$self->{s_kwd} = ''; |
| 2599 |
## Reconsume. |
## Reconsume. |
| 2600 |
redo A; |
redo A; |
| 2601 |
} |
} |
| 2621 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 2622 |
!!!cp (1005); |
!!!cp (1005); |
| 2623 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2624 |
|
$self->{s_kwd} = ''; |
| 2625 |
## Reconsume. |
## Reconsume. |
| 2626 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 2627 |
data => '&' . $self->{s_kwd}, |
data => '&' . $self->{s_kwd}, |
| 2633 |
!!!cp (989); |
!!!cp (989); |
| 2634 |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
| 2635 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2636 |
|
$self->{s_kwd} = ''; |
| 2637 |
## Reconsume. |
## Reconsume. |
| 2638 |
redo A; |
redo A; |
| 2639 |
} |
} |
| 2696 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 2697 |
!!!cp (988); |
!!!cp (988); |
| 2698 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2699 |
|
$self->{s_kwd} = ''; |
| 2700 |
## Reconsume. |
## Reconsume. |
| 2701 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
| 2702 |
|
has_reference => 1, |
| 2703 |
line => $l, column => $c, |
line => $l, column => $c, |
| 2704 |
}); |
}); |
| 2705 |
redo A; |
redo A; |
| 2708 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
| 2709 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
| 2710 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2711 |
|
$self->{s_kwd} = ''; |
| 2712 |
## Reconsume. |
## Reconsume. |
| 2713 |
redo A; |
redo A; |
| 2714 |
} |
} |
| 2791 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 2792 |
!!!cp (986); |
!!!cp (986); |
| 2793 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2794 |
|
$self->{s_kwd} = ''; |
| 2795 |
## Reconsume. |
## Reconsume. |
| 2796 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 2797 |
data => $data, |
data => $data, |
| 2798 |
|
has_reference => $has_ref, |
| 2799 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2800 |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
| 2801 |
}); |
}); |
| 2805 |
$self->{ca}->{value} .= $data; |
$self->{ca}->{value} .= $data; |
| 2806 |
$self->{ca}->{has_reference} = 1 if $has_ref; |
$self->{ca}->{has_reference} = 1 if $has_ref; |
| 2807 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2808 |
|
$self->{s_kwd} = ''; |
| 2809 |
|
## Reconsume. |
| 2810 |
|
redo A; |
| 2811 |
|
} |
| 2812 |
|
|
| 2813 |
|
## XML-only states |
| 2814 |
|
|
| 2815 |
|
} elsif ($self->{state} == PI_STATE) { |
| 2816 |
|
if ($is_space->{$self->{nc}} or |
| 2817 |
|
$self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else" |
| 2818 |
|
$self->{nc} == -1) { |
| 2819 |
|
!!!parse-error (type => 'bare pio', ## TODO: type |
| 2820 |
|
line => $self->{line_prev}, |
| 2821 |
|
column => $self->{column_prev} |
| 2822 |
|
- 1 * ($self->{nc} != -1)); |
| 2823 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 2824 |
## Reconsume. |
## Reconsume. |
| 2825 |
|
$self->{ct} = {type => COMMENT_TOKEN, |
| 2826 |
|
data => '?', |
| 2827 |
|
line => $self->{line_prev}, |
| 2828 |
|
column => $self->{column_prev} |
| 2829 |
|
- 1 * ($self->{nc} != -1), |
| 2830 |
|
}; |
| 2831 |
|
redo A; |
| 2832 |
|
} else { |
| 2833 |
|
$self->{ct} = {type => PI_TOKEN, |
| 2834 |
|
target => chr $self->{nc}, |
| 2835 |
|
data => '', |
| 2836 |
|
line => $self->{line_prev}, |
| 2837 |
|
column => $self->{column_prev} - 1, |
| 2838 |
|
}; |
| 2839 |
|
$self->{state} = PI_TARGET_STATE; |
| 2840 |
|
!!!next-input-character; |
| 2841 |
redo A; |
redo A; |
| 2842 |
} |
} |
| 2843 |
|
} elsif ($self->{state} == PI_TARGET_STATE) { |
| 2844 |
|
if ($is_space->{$self->{nc}}) { |
| 2845 |
|
$self->{state} = PI_TARGET_AFTER_STATE; |
| 2846 |
|
!!!next-input-character; |
| 2847 |
|
redo A; |
| 2848 |
|
} elsif ($self->{nc} == -1) { |
| 2849 |
|
!!!parse-error (type => 'no pic'); ## TODO: type |
| 2850 |
|
$self->{state} = DATA_STATE; |
| 2851 |
|
$self->{s_kwd} = ''; |
| 2852 |
|
## Reconsume. |
| 2853 |
|
!!!emit ($self->{ct}); # pi |
| 2854 |
|
redo A; |
| 2855 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 2856 |
|
$self->{state} = PI_AFTER_STATE; |
| 2857 |
|
!!!next-input-character; |
| 2858 |
|
redo A; |
| 2859 |
|
} else { |
| 2860 |
|
## XML5: typo ("tag name" -> "target") |
| 2861 |
|
$self->{ct}->{target} .= chr $self->{nc}; # pi |
| 2862 |
|
!!!next-input-character; |
| 2863 |
|
redo A; |
| 2864 |
|
} |
| 2865 |
|
} elsif ($self->{state} == PI_TARGET_AFTER_STATE) { |
| 2866 |
|
if ($is_space->{$self->{nc}}) { |
| 2867 |
|
## Stay in the state. |
| 2868 |
|
!!!next-input-character; |
| 2869 |
|
redo A; |
| 2870 |
|
} else { |
| 2871 |
|
$self->{state} = PI_DATA_STATE; |
| 2872 |
|
## Reprocess. |
| 2873 |
|
redo A; |
| 2874 |
|
} |
| 2875 |
|
} elsif ($self->{state} == PI_DATA_STATE) { |
| 2876 |
|
if ($self->{nc} == 0x003F) { # ? |
| 2877 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 2878 |
|
!!!next-input-character; |
| 2879 |
|
redo A; |
| 2880 |
|
} elsif ($self->{nc} == -1) { |
| 2881 |
|
!!!parse-error (type => 'no pic'); ## TODO: type |
| 2882 |
|
$self->{state} = DATA_STATE; |
| 2883 |
|
$self->{s_kwd} = ''; |
| 2884 |
|
## Reprocess. |
| 2885 |
|
!!!emit ($self->{ct}); # pi |
| 2886 |
|
redo A; |
| 2887 |
|
} else { |
| 2888 |
|
$self->{ct}->{data} .= chr $self->{nc}; # pi |
| 2889 |
|
$self->{read_until}->($self->{ct}->{data}, q[?], |
| 2890 |
|
length $self->{ct}->{data}); |
| 2891 |
|
## Stay in the state. |
| 2892 |
|
!!!next-input-character; |
| 2893 |
|
## Reprocess. |
| 2894 |
|
redo A; |
| 2895 |
|
} |
| 2896 |
|
} elsif ($self->{state} == PI_AFTER_STATE) { |
| 2897 |
|
if ($self->{nc} == 0x003E) { # > |
| 2898 |
|
$self->{state} = DATA_STATE; |
| 2899 |
|
$self->{s_kwd} = ''; |
| 2900 |
|
!!!next-input-character; |
| 2901 |
|
!!!emit ($self->{ct}); # pi |
| 2902 |
|
redo A; |
| 2903 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 2904 |
|
!!!parse-error (type => 'no s after target', ## TODO: type |
| 2905 |
|
line => $self->{line_prev}, |
| 2906 |
|
column => $self->{column_prev}); ## XML5: no error |
| 2907 |
|
$self->{ct}->{data} .= '?'; |
| 2908 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 2909 |
|
!!!next-input-character; |
| 2910 |
|
redo A; |
| 2911 |
|
} else { |
| 2912 |
|
!!!parse-error (type => 'no s after target', ## TODO: type |
| 2913 |
|
line => $self->{line_prev}, |
| 2914 |
|
column => $self->{column_prev} |
| 2915 |
|
+ 1 * ($self->{nc} == -1)); ## XML5: no error |
| 2916 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 2917 |
|
$self->{state} = PI_DATA_STATE; |
| 2918 |
|
## Reprocess. |
| 2919 |
|
redo A; |
| 2920 |
|
} |
| 2921 |
|
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
| 2922 |
|
## XML5: Same as "pi after state" in XML5 |
| 2923 |
|
if ($self->{nc} == 0x003E) { # > |
| 2924 |
|
$self->{state} = DATA_STATE; |
| 2925 |
|
$self->{s_kwd} = ''; |
| 2926 |
|
!!!next-input-character; |
| 2927 |
|
!!!emit ($self->{ct}); # pi |
| 2928 |
|
redo A; |
| 2929 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 2930 |
|
$self->{ct}->{data} .= '?'; |
| 2931 |
|
## Stay in the state. |
| 2932 |
|
!!!next-input-character; |
| 2933 |
|
redo A; |
| 2934 |
|
} else { |
| 2935 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 2936 |
|
$self->{state} = PI_DATA_STATE; |
| 2937 |
|
## Reprocess. |
| 2938 |
|
redo A; |
| 2939 |
|
} |
| 2940 |
|
|
| 2941 |
} else { |
} else { |
| 2942 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
| 2943 |
} |
} |