| 114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
| 115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
| 116 |
|
|
| 117 |
|
## XML states |
| 118 |
|
sub PI_STATE () { 51 } |
| 119 |
|
sub PI_TARGET_STATE () { 52 } |
| 120 |
|
sub PI_TARGET_AFTER_STATE () { 53 } |
| 121 |
|
sub PI_DATA_STATE () { 54 } |
| 122 |
|
sub PI_AFTER_STATE () { 55 } |
| 123 |
|
sub PI_DATA_AFTER_STATE () { 56 } |
| 124 |
|
|
| 125 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
| 126 |
## list and descriptions) |
## list and descriptions) |
| 127 |
|
|
| 186 |
#$self->{is_xml} (if XML) |
#$self->{is_xml} (if XML) |
| 187 |
|
|
| 188 |
$self->{state} = DATA_STATE; # MUST |
$self->{state} = DATA_STATE; # MUST |
| 189 |
#$self->{s_kwd}; # state keyword - initialized when used |
$self->{s_kwd} = ''; # state keyword |
| 190 |
#$self->{entity__value}; # initialized when used |
#$self->{entity__value}; # initialized when used |
| 191 |
#$self->{entity__match}; # initialized when used |
#$self->{entity__match}; # initialized when used |
| 192 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
| 227 |
## ->{value} |
## ->{value} |
| 228 |
## ->{has_reference} == 1 or 0 |
## ->{has_reference} == 1 or 0 |
| 229 |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
| 230 |
|
## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN) |
| 231 |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
| 232 |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
| 233 |
## while the token is pushed back to the stack. |
## while the token is pushed back to the stack. |
| 371 |
} |
} |
| 372 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 373 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 374 |
$self->{s_kwd} .= '-'; |
if ($self->{s_kwd} eq '<!-') { |
|
|
|
|
if ($self->{s_kwd} eq '<!--') { |
|
| 375 |
|
|
| 376 |
$self->{escape} = 1; # unless $self->{escape}; |
$self->{escape} = 1; # unless $self->{escape}; |
| 377 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
| 378 |
# |
# |
| 379 |
} elsif ($self->{s_kwd} eq '---') { |
} elsif ($self->{s_kwd} eq '-') { |
| 380 |
|
|
| 381 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
| 382 |
# |
# |
| 383 |
|
} elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') { |
| 384 |
|
|
| 385 |
|
$self->{s_kwd} .= '-'; |
| 386 |
|
# |
| 387 |
} else { |
} else { |
| 388 |
|
|
| 389 |
|
$self->{s_kwd} = '-'; |
| 390 |
# |
# |
| 391 |
} |
} |
| 392 |
} |
} |
| 432 |
if ($self->{s_kwd} eq '--') { |
if ($self->{s_kwd} eq '--') { |
| 433 |
|
|
| 434 |
delete $self->{escape}; |
delete $self->{escape}; |
| 435 |
|
# |
| 436 |
} else { |
} else { |
| 437 |
|
|
| 438 |
|
# |
| 439 |
} |
} |
| 440 |
|
} elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') { |
| 441 |
|
|
| 442 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched mse', ## TODO: type |
| 443 |
|
line => $self->{line_prev}, |
| 444 |
|
column => $self->{column_prev} - 1); |
| 445 |
|
# |
| 446 |
} else { |
} else { |
| 447 |
|
|
| 448 |
|
# |
| 449 |
} |
} |
| 450 |
|
|
| 451 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 452 |
# |
# |
| 453 |
|
} elsif ($self->{nc} == 0x005D) { # ] |
| 454 |
|
if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') { |
| 455 |
|
|
| 456 |
|
$self->{s_kwd} .= ']'; |
| 457 |
|
} elsif ($self->{s_kwd} eq ']]') { |
| 458 |
|
|
| 459 |
|
# |
| 460 |
|
} else { |
| 461 |
|
|
| 462 |
|
$self->{s_kwd} = ''; |
| 463 |
|
} |
| 464 |
|
# |
| 465 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 466 |
|
|
| 467 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 479 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
| 480 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
| 481 |
}; |
}; |
| 482 |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
if ($self->{read_until}->($token->{data}, q{-!<>&\]}, |
| 483 |
length $token->{data})) { |
length $token->{data})) { |
| 484 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 485 |
} |
} |
| 486 |
|
|
| 487 |
## Stay in the data state. |
## Stay in the data state. |
| 488 |
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
if (not $self->{is_xml} and |
| 489 |
|
$self->{content_model} == PCDATA_CONTENT_MODEL) { |
| 490 |
|
|
| 491 |
$self->{state} = PCDATA_STATE; |
$self->{state} = PCDATA_STATE; |
| 492 |
} else { |
} else { |
| 507 |
return ($token); |
return ($token); |
| 508 |
redo A; |
redo A; |
| 509 |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
| 510 |
|
## XML5: "tag state". |
| 511 |
|
|
| 512 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 513 |
if ($self->{nc} == 0x002F) { # / |
if ($self->{nc} == 0x002F) { # / |
| 514 |
|
|
| 536 |
|
|
| 537 |
## reconsume |
## reconsume |
| 538 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 539 |
|
$self->{s_kwd} = ''; |
| 540 |
return ({type => CHARACTER_TOKEN, data => '<', |
return ({type => CHARACTER_TOKEN, data => '<', |
| 541 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 542 |
column => $self->{column_prev}, |
column => $self->{column_prev}, |
| 620 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 621 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 622 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 623 |
|
$self->{s_kwd} = ''; |
| 624 |
|
|
| 625 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 626 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 640 |
|
|
| 641 |
redo A; |
redo A; |
| 642 |
} elsif ($self->{nc} == 0x003F) { # ? |
} elsif ($self->{nc} == 0x003F) { # ? |
| 643 |
|
if ($self->{is_xml}) { |
| 644 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'pio', |
|
| 645 |
line => $self->{line_prev}, |
$self->{state} = PI_STATE; |
| 646 |
column => $self->{column_prev}); |
|
| 647 |
$self->{state} = BOGUS_COMMENT_STATE; |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 648 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
$self->{line_prev} = $self->{line}; |
| 649 |
line => $self->{line_prev}, |
$self->{column_prev} = $self->{column}; |
| 650 |
column => $self->{column_prev}, |
$self->{column}++; |
| 651 |
}; |
$self->{nc} |
| 652 |
## $self->{nc} is intentionally left as is |
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 653 |
redo A; |
} else { |
| 654 |
} else { |
$self->{set_nc}->($self); |
| 655 |
|
} |
| 656 |
|
|
| 657 |
|
redo A; |
| 658 |
|
} else { |
| 659 |
|
|
| 660 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'pio', |
| 661 |
|
line => $self->{line_prev}, |
| 662 |
|
column => $self->{column_prev}); |
| 663 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 664 |
|
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
| 665 |
|
line => $self->{line_prev}, |
| 666 |
|
column => $self->{column_prev}, |
| 667 |
|
}; |
| 668 |
|
## $self->{nc} is intentionally left as is |
| 669 |
|
redo A; |
| 670 |
|
} |
| 671 |
|
} elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) { |
| 672 |
|
|
| 673 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', |
| 674 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 675 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 676 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 677 |
|
$self->{s_kwd} = ''; |
| 678 |
## reconsume |
## reconsume |
| 679 |
|
|
| 680 |
return ({type => CHARACTER_TOKEN, data => '<', |
return ({type => CHARACTER_TOKEN, data => '<', |
| 683 |
}); |
}); |
| 684 |
|
|
| 685 |
redo A; |
redo A; |
| 686 |
|
} else { |
| 687 |
|
## XML5: "<:" is a parse error. |
| 688 |
|
|
| 689 |
|
$self->{ct} = {type => START_TAG_TOKEN, |
| 690 |
|
tag_name => chr ($self->{nc}), |
| 691 |
|
line => $self->{line_prev}, |
| 692 |
|
column => $self->{column_prev}}; |
| 693 |
|
$self->{state} = TAG_NAME_STATE; |
| 694 |
|
|
| 695 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 696 |
|
$self->{line_prev} = $self->{line}; |
| 697 |
|
$self->{column_prev} = $self->{column}; |
| 698 |
|
$self->{column}++; |
| 699 |
|
$self->{nc} |
| 700 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 701 |
|
} else { |
| 702 |
|
$self->{set_nc}->($self); |
| 703 |
|
} |
| 704 |
|
|
| 705 |
|
redo A; |
| 706 |
} |
} |
| 707 |
} else { |
} else { |
| 708 |
die "$0: $self->{content_model} in tag open"; |
die "$0: $self->{content_model} in tag open"; |
| 711 |
## NOTE: The "close tag open state" in the spec is implemented as |
## NOTE: The "close tag open state" in the spec is implemented as |
| 712 |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|. |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|. |
| 713 |
|
|
| 714 |
|
## XML5: "end tag state". |
| 715 |
|
|
| 716 |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
| 717 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 718 |
if (defined $self->{last_stag_name}) { |
if (defined $self->{last_stag_name}) { |
| 725 |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
| 726 |
|
|
| 727 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 728 |
|
$self->{s_kwd} = ''; |
| 729 |
## Reconsume. |
## Reconsume. |
| 730 |
return ({type => CHARACTER_TOKEN, data => '</', |
return ({type => CHARACTER_TOKEN, data => '</', |
| 731 |
line => $l, column => $c, |
line => $l, column => $c, |
| 774 |
|
|
| 775 |
redo A; |
redo A; |
| 776 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
|
|
| 777 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag', |
| 778 |
line => $self->{line_prev}, ## "<" in "</>" |
line => $self->{line_prev}, ## "<" in "</>" |
| 779 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
| 780 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 781 |
|
$self->{s_kwd} = ''; |
| 782 |
|
if ($self->{is_xml}) { |
| 783 |
|
|
| 784 |
|
## XML5: No parse error. |
| 785 |
|
|
| 786 |
|
## NOTE: This parser raises a parse error, since it supports |
| 787 |
|
## XML1, not XML5. |
| 788 |
|
|
| 789 |
|
## NOTE: A short end tag token. |
| 790 |
|
my $ct = {type => END_TAG_TOKEN, |
| 791 |
|
tag_name => '', |
| 792 |
|
line => $self->{line_prev}, |
| 793 |
|
column => $self->{column_prev} - 1, |
| 794 |
|
}; |
| 795 |
|
|
| 796 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 797 |
|
$self->{line_prev} = $self->{line}; |
| 798 |
|
$self->{column_prev} = $self->{column}; |
| 799 |
|
$self->{column}++; |
| 800 |
|
$self->{nc} |
| 801 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 802 |
|
} else { |
| 803 |
|
$self->{set_nc}->($self); |
| 804 |
|
} |
| 805 |
|
|
| 806 |
|
return ($ct); |
| 807 |
|
} else { |
| 808 |
|
|
| 809 |
|
|
| 810 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 811 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 812 |
$self->{column_prev} = $self->{column}; |
$self->{column_prev} = $self->{column}; |
| 817 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 818 |
} |
} |
| 819 |
|
|
| 820 |
|
} |
| 821 |
redo A; |
redo A; |
| 822 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 823 |
|
|
| 824 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago'); |
| 825 |
|
$self->{s_kwd} = ''; |
| 826 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 827 |
# reconsume |
# reconsume |
| 828 |
|
|
| 831 |
}); |
}); |
| 832 |
|
|
| 833 |
redo A; |
redo A; |
| 834 |
} else { |
} elsif (not $self->{is_xml} or |
| 835 |
|
$is_space->{$self->{nc}}) { |
| 836 |
|
|
| 837 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag', |
| 838 |
|
line => $self->{line_prev}, # "<" of "</" |
| 839 |
|
column => $self->{column_prev} - 1); |
| 840 |
$self->{state} = BOGUS_COMMENT_STATE; |
$self->{state} = BOGUS_COMMENT_STATE; |
| 841 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
| 842 |
line => $self->{line_prev}, # "<" of "</" |
line => $self->{line_prev}, # "<" of "</" |
| 849 |
## generated from the bogus end tag, as defined in the |
## generated from the bogus end tag, as defined in the |
| 850 |
## "bogus comment state" entry. |
## "bogus comment state" entry. |
| 851 |
redo A; |
redo A; |
| 852 |
|
} else { |
| 853 |
|
## XML5: "</:" is a parse error. |
| 854 |
|
|
| 855 |
|
$self->{ct} = {type => END_TAG_TOKEN, |
| 856 |
|
tag_name => chr ($self->{nc}), |
| 857 |
|
line => $l, column => $c}; |
| 858 |
|
$self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state". |
| 859 |
|
|
| 860 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 861 |
|
$self->{line_prev} = $self->{line}; |
| 862 |
|
$self->{column_prev} = $self->{column}; |
| 863 |
|
$self->{column}++; |
| 864 |
|
$self->{nc} |
| 865 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 866 |
|
} else { |
| 867 |
|
$self->{set_nc}->($self); |
| 868 |
|
} |
| 869 |
|
|
| 870 |
|
redo A; |
| 871 |
} |
} |
| 872 |
} elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) { |
} elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) { |
| 873 |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
| 894 |
} else { |
} else { |
| 895 |
|
|
| 896 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 897 |
|
$self->{s_kwd} = ''; |
| 898 |
## Reconsume. |
## Reconsume. |
| 899 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 900 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
| 913 |
|
|
| 914 |
## Reconsume. |
## Reconsume. |
| 915 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 916 |
|
$self->{s_kwd} = ''; |
| 917 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 918 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
| 919 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 965 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 966 |
} |
} |
| 967 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 968 |
|
$self->{s_kwd} = ''; |
| 969 |
|
|
| 970 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 971 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1018 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1019 |
} |
} |
| 1020 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1021 |
|
$self->{s_kwd} = ''; |
| 1022 |
# reconsume |
# reconsume |
| 1023 |
|
|
| 1024 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1089 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1090 |
} |
} |
| 1091 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1092 |
|
$self->{s_kwd} = ''; |
| 1093 |
|
|
| 1094 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1095 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1157 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1158 |
} |
} |
| 1159 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1160 |
|
$self->{s_kwd} = ''; |
| 1161 |
# reconsume |
# reconsume |
| 1162 |
|
|
| 1163 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1253 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1254 |
} |
} |
| 1255 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1256 |
|
$self->{s_kwd} = ''; |
| 1257 |
|
|
| 1258 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1259 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1322 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1323 |
} |
} |
| 1324 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1325 |
|
$self->{s_kwd} = ''; |
| 1326 |
# reconsume |
# reconsume |
| 1327 |
|
|
| 1328 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1399 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1400 |
} |
} |
| 1401 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1402 |
|
$self->{s_kwd} = ''; |
| 1403 |
|
|
| 1404 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1405 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1467 |
} else { |
} else { |
| 1468 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1469 |
} |
} |
| 1470 |
|
$self->{s_kwd} = ''; |
| 1471 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1472 |
# reconsume |
# reconsume |
| 1473 |
|
|
| 1569 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1570 |
} |
} |
| 1571 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1572 |
|
$self->{s_kwd} = ''; |
| 1573 |
|
|
| 1574 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1575 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1603 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1604 |
} |
} |
| 1605 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1606 |
|
$self->{s_kwd} = ''; |
| 1607 |
## reconsume |
## reconsume |
| 1608 |
|
|
| 1609 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1686 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1687 |
} |
} |
| 1688 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1689 |
|
$self->{s_kwd} = ''; |
| 1690 |
## reconsume |
## reconsume |
| 1691 |
|
|
| 1692 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1768 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1769 |
} |
} |
| 1770 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1771 |
|
$self->{s_kwd} = ''; |
| 1772 |
## reconsume |
## reconsume |
| 1773 |
|
|
| 1774 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1849 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1850 |
} |
} |
| 1851 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1852 |
|
$self->{s_kwd} = ''; |
| 1853 |
|
|
| 1854 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1855 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1883 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1884 |
} |
} |
| 1885 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1886 |
|
$self->{s_kwd} = ''; |
| 1887 |
## reconsume |
## reconsume |
| 1888 |
|
|
| 1889 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1952 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1953 |
} |
} |
| 1954 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1955 |
|
$self->{s_kwd} = ''; |
| 1956 |
|
|
| 1957 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1958 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2000 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 2001 |
} |
} |
| 2002 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2003 |
|
$self->{s_kwd} = ''; |
| 2004 |
## Reconsume. |
## Reconsume. |
| 2005 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 2006 |
redo A; |
redo A; |
| 2031 |
} |
} |
| 2032 |
|
|
| 2033 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2034 |
|
$self->{s_kwd} = ''; |
| 2035 |
|
|
| 2036 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2037 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2064 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 2065 |
} |
} |
| 2066 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2067 |
|
$self->{s_kwd} = ''; |
| 2068 |
## Reconsume. |
## Reconsume. |
| 2069 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 2070 |
redo A; |
redo A; |
| 2085 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2086 |
|
|
| 2087 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2088 |
|
$self->{s_kwd} = ''; |
| 2089 |
|
|
| 2090 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2091 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2103 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2104 |
|
|
| 2105 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2106 |
|
$self->{s_kwd} = ''; |
| 2107 |
## reconsume |
## reconsume |
| 2108 |
|
|
| 2109 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2205 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2206 |
column => $self->{column_prev} - 2, |
column => $self->{column_prev} - 2, |
| 2207 |
}; |
}; |
| 2208 |
$self->{state} = COMMENT_START_STATE; |
$self->{state} = COMMENT_START_STATE; ## XML5: "comment state". |
| 2209 |
|
|
| 2210 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2211 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2268 |
} elsif ((length $self->{s_kwd}) == 6 and |
} elsif ((length $self->{s_kwd}) == 6 and |
| 2269 |
($self->{nc} == 0x0045 or # E |
($self->{nc} == 0x0045 or # E |
| 2270 |
$self->{nc} == 0x0065)) { # e |
$self->{nc} == 0x0065)) { # e |
| 2271 |
|
if ($self->{s_kwd} ne 'DOCTYP') { |
| 2272 |
|
|
| 2273 |
|
## XML5: case-sensitive. |
| 2274 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO |
| 2275 |
|
text => 'DOCTYPE', |
| 2276 |
|
line => $self->{line_prev}, |
| 2277 |
|
column => $self->{column_prev} - 5); |
| 2278 |
|
} else { |
| 2279 |
|
|
| 2280 |
|
} |
| 2281 |
$self->{state} = DOCTYPE_STATE; |
$self->{state} = DOCTYPE_STATE; |
| 2282 |
$self->{ct} = {type => DOCTYPE_TOKEN, |
$self->{ct} = {type => DOCTYPE_TOKEN, |
| 2283 |
quirks => 1, |
quirks => 1, |
| 2335 |
redo A; |
redo A; |
| 2336 |
} elsif ($self->{s_kwd} eq '[CDATA' and |
} elsif ($self->{s_kwd} eq '[CDATA' and |
| 2337 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
| 2338 |
|
if ($self->{is_xml} and |
| 2339 |
|
not $self->{tainted} and |
| 2340 |
|
@{$self->{open_elements} or []} == 0) { |
| 2341 |
|
|
| 2342 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element', |
| 2343 |
|
line => $self->{line_prev}, |
| 2344 |
|
column => $self->{column_prev} - 7); |
| 2345 |
|
$self->{tainted} = 1; |
| 2346 |
|
} else { |
| 2347 |
|
|
| 2348 |
|
} |
| 2349 |
|
|
| 2350 |
$self->{ct} = {type => CHARACTER_TOKEN, |
$self->{ct} = {type => CHARACTER_TOKEN, |
| 2351 |
data => '', |
data => '', |
| 2352 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2398 |
|
|
| 2399 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
| 2400 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2401 |
|
$self->{s_kwd} = ''; |
| 2402 |
|
|
| 2403 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2404 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2418 |
|
|
| 2419 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2420 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2421 |
|
$self->{s_kwd} = ''; |
| 2422 |
## reconsume |
## reconsume |
| 2423 |
|
|
| 2424 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2462 |
|
|
| 2463 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
| 2464 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2465 |
|
$self->{s_kwd} = ''; |
| 2466 |
|
|
| 2467 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2468 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2482 |
|
|
| 2483 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2484 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2485 |
|
$self->{s_kwd} = ''; |
| 2486 |
## reconsume |
## reconsume |
| 2487 |
|
|
| 2488 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2526 |
|
|
| 2527 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2528 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2529 |
|
$self->{s_kwd} = ''; |
| 2530 |
## reconsume |
## reconsume |
| 2531 |
|
|
| 2532 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2554 |
redo A; |
redo A; |
| 2555 |
} |
} |
| 2556 |
} elsif ($self->{state} == COMMENT_END_DASH_STATE) { |
} elsif ($self->{state} == COMMENT_END_DASH_STATE) { |
| 2557 |
|
## XML5: "comment dash state". |
| 2558 |
|
|
| 2559 |
if ($self->{nc} == 0x002D) { # - |
if ($self->{nc} == 0x002D) { # - |
| 2560 |
|
|
| 2561 |
$self->{state} = COMMENT_END_STATE; |
$self->{state} = COMMENT_END_STATE; |
| 2574 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2575 |
|
|
| 2576 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2577 |
|
$self->{s_kwd} = ''; |
| 2578 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2579 |
|
$self->{s_kwd} = ''; |
| 2580 |
## reconsume |
## reconsume |
| 2581 |
|
|
| 2582 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2603 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2604 |
|
|
| 2605 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2606 |
|
$self->{s_kwd} = ''; |
| 2607 |
|
|
| 2608 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2609 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2621 |
redo A; |
redo A; |
| 2622 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 2623 |
|
|
| 2624 |
|
## XML5: Not a parse error. |
| 2625 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
| 2626 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2627 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 2643 |
|
|
| 2644 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2645 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2646 |
|
$self->{s_kwd} = ''; |
| 2647 |
## reconsume |
## reconsume |
| 2648 |
|
|
| 2649 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2651 |
redo A; |
redo A; |
| 2652 |
} else { |
} else { |
| 2653 |
|
|
| 2654 |
|
## XML5: Not a parse error. |
| 2655 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
| 2656 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2657 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 2713 |
|
|
| 2714 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 2715 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2716 |
|
$self->{s_kwd} = ''; |
| 2717 |
|
|
| 2718 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2719 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2733 |
|
|
| 2734 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 2735 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2736 |
|
$self->{s_kwd} = ''; |
| 2737 |
## reconsume |
## reconsume |
| 2738 |
|
|
| 2739 |
return ($self->{ct}); # DOCTYPE (quirks) |
return ($self->{ct}); # DOCTYPE (quirks) |
| 2777 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2778 |
|
|
| 2779 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2780 |
|
$self->{s_kwd} = ''; |
| 2781 |
|
|
| 2782 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2783 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2797 |
|
|
| 2798 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 2799 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2800 |
|
$self->{s_kwd} = ''; |
| 2801 |
## reconsume |
## reconsume |
| 2802 |
|
|
| 2803 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2841 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2842 |
|
|
| 2843 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2844 |
|
$self->{s_kwd} = ''; |
| 2845 |
|
|
| 2846 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2847 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2861 |
|
|
| 2862 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 2863 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2864 |
|
$self->{s_kwd} = ''; |
| 2865 |
## reconsume |
## reconsume |
| 2866 |
|
|
| 2867 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3090 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal'); |
| 3091 |
|
|
| 3092 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3093 |
|
$self->{s_kwd} = ''; |
| 3094 |
|
|
| 3095 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3096 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3112 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3113 |
|
|
| 3114 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3115 |
|
$self->{s_kwd} = ''; |
| 3116 |
## reconsume |
## reconsume |
| 3117 |
|
|
| 3118 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3159 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3160 |
|
|
| 3161 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3162 |
|
$self->{s_kwd} = ''; |
| 3163 |
|
|
| 3164 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3165 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3181 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3182 |
|
|
| 3183 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3184 |
|
$self->{s_kwd} = ''; |
| 3185 |
## reconsume |
## reconsume |
| 3186 |
|
|
| 3187 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3230 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3231 |
|
|
| 3232 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3233 |
|
$self->{s_kwd} = ''; |
| 3234 |
|
|
| 3235 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3236 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3252 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3253 |
|
|
| 3254 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3255 |
|
$self->{s_kwd} = ''; |
| 3256 |
## reconsume |
## reconsume |
| 3257 |
|
|
| 3258 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3331 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 3332 |
|
|
| 3333 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3334 |
|
$self->{s_kwd} = ''; |
| 3335 |
|
|
| 3336 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3337 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3352 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3353 |
|
|
| 3354 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3355 |
|
$self->{s_kwd} = ''; |
| 3356 |
## reconsume |
## reconsume |
| 3357 |
|
|
| 3358 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3430 |
|
|
| 3431 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal'); |
| 3432 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3433 |
|
$self->{s_kwd} = ''; |
| 3434 |
|
|
| 3435 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3436 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3452 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3453 |
|
|
| 3454 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3455 |
|
$self->{s_kwd} = ''; |
| 3456 |
## reconsume |
## reconsume |
| 3457 |
|
|
| 3458 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3499 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3500 |
|
|
| 3501 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3502 |
|
$self->{s_kwd} = ''; |
| 3503 |
|
|
| 3504 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3505 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3521 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3522 |
|
|
| 3523 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3524 |
|
$self->{s_kwd} = ''; |
| 3525 |
## reconsume |
## reconsume |
| 3526 |
|
|
| 3527 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3570 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3571 |
|
|
| 3572 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3573 |
|
$self->{s_kwd} = ''; |
| 3574 |
|
|
| 3575 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3576 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3592 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3593 |
|
|
| 3594 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3595 |
|
$self->{s_kwd} = ''; |
| 3596 |
## reconsume |
## reconsume |
| 3597 |
|
|
| 3598 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3639 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 3640 |
|
|
| 3641 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3642 |
|
$self->{s_kwd} = ''; |
| 3643 |
|
|
| 3644 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3645 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3659 |
|
|
| 3660 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3661 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3662 |
|
$self->{s_kwd} = ''; |
| 3663 |
## reconsume |
## reconsume |
| 3664 |
|
|
| 3665 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3689 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 3690 |
|
|
| 3691 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3692 |
|
$self->{s_kwd} = ''; |
| 3693 |
|
|
| 3694 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3695 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3708 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3709 |
|
|
| 3710 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3711 |
|
$self->{s_kwd} = ''; |
| 3712 |
## reconsume |
## reconsume |
| 3713 |
|
|
| 3714 |
return ($self->{ct}); # DOCTYPE |
return ($self->{ct}); # DOCTYPE |
| 3737 |
## NOTE: "CDATA section state" in the state is jointly implemented |
## NOTE: "CDATA section state" in the state is jointly implemented |
| 3738 |
## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|, |
## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|, |
| 3739 |
## and |CDATA_SECTION_MSE2_STATE|. |
## and |CDATA_SECTION_MSE2_STATE|. |
| 3740 |
|
|
| 3741 |
|
## XML5: "CDATA state". |
| 3742 |
|
|
| 3743 |
if ($self->{nc} == 0x005D) { # ] |
if ($self->{nc} == 0x005D) { # ] |
| 3744 |
|
|
| 3756 |
|
|
| 3757 |
redo A; |
redo A; |
| 3758 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3759 |
|
if ($self->{is_xml}) { |
| 3760 |
|
|
| 3761 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type |
| 3762 |
|
} else { |
| 3763 |
|
|
| 3764 |
|
} |
| 3765 |
|
|
| 3766 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3767 |
|
$self->{s_kwd} = ''; |
| 3768 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
## Reconsume. |
|
$self->{line_prev} = $self->{line}; |
|
|
$self->{column_prev} = $self->{column}; |
|
|
$self->{column}++; |
|
|
$self->{nc} |
|
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
|
|
} else { |
|
|
$self->{set_nc}->($self); |
|
|
} |
|
|
|
|
| 3769 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
| 3770 |
|
|
| 3771 |
return ($self->{ct}); # character |
return ($self->{ct}); # character |
| 3798 |
|
|
| 3799 |
## ISSUE: "text tokens" in spec. |
## ISSUE: "text tokens" in spec. |
| 3800 |
} elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) { |
| 3801 |
|
## XML5: "CDATA bracket state". |
| 3802 |
|
|
| 3803 |
if ($self->{nc} == 0x005D) { # ] |
if ($self->{nc} == 0x005D) { # ] |
| 3804 |
|
|
| 3805 |
$self->{state} = CDATA_SECTION_MSE2_STATE; |
$self->{state} = CDATA_SECTION_MSE2_STATE; |
| 3817 |
redo A; |
redo A; |
| 3818 |
} else { |
} else { |
| 3819 |
|
|
| 3820 |
|
## XML5: If EOF, "]" is not appended and changed to the data state. |
| 3821 |
$self->{ct}->{data} .= ']'; |
$self->{ct}->{data} .= ']'; |
| 3822 |
$self->{state} = CDATA_SECTION_STATE; |
$self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state. |
| 3823 |
## Reconsume. |
## Reconsume. |
| 3824 |
redo A; |
redo A; |
| 3825 |
} |
} |
| 3826 |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
| 3827 |
|
## XML5: "CDATA end state". |
| 3828 |
|
|
| 3829 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 3830 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3831 |
|
$self->{s_kwd} = ''; |
| 3832 |
|
|
| 3833 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3834 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3868 |
|
|
| 3869 |
$self->{ct}->{data} .= ']]'; # character |
$self->{ct}->{data} .= ']]'; # character |
| 3870 |
$self->{state} = CDATA_SECTION_STATE; |
$self->{state} = CDATA_SECTION_STATE; |
| 3871 |
## Reconsume. |
## Reconsume. ## XML5: Emit. |
| 3872 |
redo A; |
redo A; |
| 3873 |
} |
} |
| 3874 |
} elsif ($self->{state} == ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_STATE) { |
| 3936 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3937 |
|
|
| 3938 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3939 |
|
$self->{s_kwd} = ''; |
| 3940 |
## Reconsume. |
## Reconsume. |
| 3941 |
return ({type => CHARACTER_TOKEN, data => '&', |
return ({type => CHARACTER_TOKEN, data => '&', |
| 3942 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 3947 |
|
|
| 3948 |
$self->{ca}->{value} .= '&'; |
$self->{ca}->{value} .= '&'; |
| 3949 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3950 |
|
$self->{s_kwd} = ''; |
| 3951 |
## Reconsume. |
## Reconsume. |
| 3952 |
redo A; |
redo A; |
| 3953 |
} |
} |
| 3998 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3999 |
|
|
| 4000 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4001 |
|
$self->{s_kwd} = ''; |
| 4002 |
## Reconsume. |
## Reconsume. |
| 4003 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 4004 |
data => '&#', |
data => '&#', |
| 4010 |
|
|
| 4011 |
$self->{ca}->{value} .= '&#'; |
$self->{ca}->{value} .= '&#'; |
| 4012 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4013 |
|
$self->{s_kwd} = ''; |
| 4014 |
## Reconsume. |
## Reconsume. |
| 4015 |
redo A; |
redo A; |
| 4016 |
} |
} |
| 4076 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 4077 |
|
|
| 4078 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4079 |
|
$self->{s_kwd} = ''; |
| 4080 |
## Reconsume. |
## Reconsume. |
| 4081 |
return ({type => CHARACTER_TOKEN, data => chr $code, |
return ({type => CHARACTER_TOKEN, data => chr $code, |
| 4082 |
|
has_reference => 1, |
| 4083 |
line => $l, column => $c, |
line => $l, column => $c, |
| 4084 |
}); |
}); |
| 4085 |
redo A; |
redo A; |
| 4088 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
| 4089 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
| 4090 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4091 |
|
$self->{s_kwd} = ''; |
| 4092 |
## Reconsume. |
## Reconsume. |
| 4093 |
redo A; |
redo A; |
| 4094 |
} |
} |
| 4114 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 4115 |
|
|
| 4116 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4117 |
|
$self->{s_kwd} = ''; |
| 4118 |
## Reconsume. |
## Reconsume. |
| 4119 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 4120 |
data => '&' . $self->{s_kwd}, |
data => '&' . $self->{s_kwd}, |
| 4126 |
|
|
| 4127 |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
| 4128 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4129 |
|
$self->{s_kwd} = ''; |
| 4130 |
## Reconsume. |
## Reconsume. |
| 4131 |
redo A; |
redo A; |
| 4132 |
} |
} |
| 4229 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 4230 |
|
|
| 4231 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4232 |
|
$self->{s_kwd} = ''; |
| 4233 |
## Reconsume. |
## Reconsume. |
| 4234 |
return ({type => CHARACTER_TOKEN, data => chr $code, |
return ({type => CHARACTER_TOKEN, data => chr $code, |
| 4235 |
|
has_reference => 1, |
| 4236 |
line => $l, column => $c, |
line => $l, column => $c, |
| 4237 |
}); |
}); |
| 4238 |
redo A; |
redo A; |
| 4241 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
| 4242 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
| 4243 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4244 |
|
$self->{s_kwd} = ''; |
| 4245 |
## Reconsume. |
## Reconsume. |
| 4246 |
redo A; |
redo A; |
| 4247 |
} |
} |
| 4354 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 4355 |
|
|
| 4356 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4357 |
|
$self->{s_kwd} = ''; |
| 4358 |
## Reconsume. |
## Reconsume. |
| 4359 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 4360 |
data => $data, |
data => $data, |
| 4361 |
|
has_reference => $has_ref, |
| 4362 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 4363 |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
| 4364 |
}); |
}); |
| 4368 |
$self->{ca}->{value} .= $data; |
$self->{ca}->{value} .= $data; |
| 4369 |
$self->{ca}->{has_reference} = 1 if $has_ref; |
$self->{ca}->{has_reference} = 1 if $has_ref; |
| 4370 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4371 |
|
$self->{s_kwd} = ''; |
| 4372 |
|
## Reconsume. |
| 4373 |
|
redo A; |
| 4374 |
|
} |
| 4375 |
|
|
| 4376 |
|
## XML-only states |
| 4377 |
|
|
| 4378 |
|
} elsif ($self->{state} == PI_STATE) { |
| 4379 |
|
if ($is_space->{$self->{nc}} or |
| 4380 |
|
$self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else" |
| 4381 |
|
$self->{nc} == -1) { |
| 4382 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type |
| 4383 |
|
line => $self->{line_prev}, |
| 4384 |
|
column => $self->{column_prev} |
| 4385 |
|
- 1 * ($self->{nc} != -1)); |
| 4386 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 4387 |
|
## Reconsume. |
| 4388 |
|
$self->{ct} = {type => COMMENT_TOKEN, |
| 4389 |
|
data => '?', |
| 4390 |
|
line => $self->{line_prev}, |
| 4391 |
|
column => $self->{column_prev} |
| 4392 |
|
- 1 * ($self->{nc} != -1), |
| 4393 |
|
}; |
| 4394 |
|
redo A; |
| 4395 |
|
} else { |
| 4396 |
|
$self->{ct} = {type => PI_TOKEN, |
| 4397 |
|
target => chr $self->{nc}, |
| 4398 |
|
data => '', |
| 4399 |
|
line => $self->{line_prev}, |
| 4400 |
|
column => $self->{column_prev} - 1, |
| 4401 |
|
}; |
| 4402 |
|
$self->{state} = PI_TARGET_STATE; |
| 4403 |
|
|
| 4404 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4405 |
|
$self->{line_prev} = $self->{line}; |
| 4406 |
|
$self->{column_prev} = $self->{column}; |
| 4407 |
|
$self->{column}++; |
| 4408 |
|
$self->{nc} |
| 4409 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4410 |
|
} else { |
| 4411 |
|
$self->{set_nc}->($self); |
| 4412 |
|
} |
| 4413 |
|
|
| 4414 |
|
redo A; |
| 4415 |
|
} |
| 4416 |
|
} elsif ($self->{state} == PI_TARGET_STATE) { |
| 4417 |
|
if ($is_space->{$self->{nc}}) { |
| 4418 |
|
$self->{state} = PI_TARGET_AFTER_STATE; |
| 4419 |
|
|
| 4420 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4421 |
|
$self->{line_prev} = $self->{line}; |
| 4422 |
|
$self->{column_prev} = $self->{column}; |
| 4423 |
|
$self->{column}++; |
| 4424 |
|
$self->{nc} |
| 4425 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4426 |
|
} else { |
| 4427 |
|
$self->{set_nc}->($self); |
| 4428 |
|
} |
| 4429 |
|
|
| 4430 |
|
redo A; |
| 4431 |
|
} elsif ($self->{nc} == -1) { |
| 4432 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4433 |
|
$self->{state} = DATA_STATE; |
| 4434 |
|
$self->{s_kwd} = ''; |
| 4435 |
## Reconsume. |
## Reconsume. |
| 4436 |
|
return ($self->{ct}); # pi |
| 4437 |
|
redo A; |
| 4438 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4439 |
|
$self->{state} = PI_AFTER_STATE; |
| 4440 |
|
|
| 4441 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4442 |
|
$self->{line_prev} = $self->{line}; |
| 4443 |
|
$self->{column_prev} = $self->{column}; |
| 4444 |
|
$self->{column}++; |
| 4445 |
|
$self->{nc} |
| 4446 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4447 |
|
} else { |
| 4448 |
|
$self->{set_nc}->($self); |
| 4449 |
|
} |
| 4450 |
|
|
| 4451 |
|
redo A; |
| 4452 |
|
} else { |
| 4453 |
|
## XML5: typo ("tag name" -> "target") |
| 4454 |
|
$self->{ct}->{target} .= chr $self->{nc}; # pi |
| 4455 |
|
|
| 4456 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4457 |
|
$self->{line_prev} = $self->{line}; |
| 4458 |
|
$self->{column_prev} = $self->{column}; |
| 4459 |
|
$self->{column}++; |
| 4460 |
|
$self->{nc} |
| 4461 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4462 |
|
} else { |
| 4463 |
|
$self->{set_nc}->($self); |
| 4464 |
|
} |
| 4465 |
|
|
| 4466 |
|
redo A; |
| 4467 |
|
} |
| 4468 |
|
} elsif ($self->{state} == PI_TARGET_AFTER_STATE) { |
| 4469 |
|
if ($is_space->{$self->{nc}}) { |
| 4470 |
|
## Stay in the state. |
| 4471 |
|
|
| 4472 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4473 |
|
$self->{line_prev} = $self->{line}; |
| 4474 |
|
$self->{column_prev} = $self->{column}; |
| 4475 |
|
$self->{column}++; |
| 4476 |
|
$self->{nc} |
| 4477 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4478 |
|
} else { |
| 4479 |
|
$self->{set_nc}->($self); |
| 4480 |
|
} |
| 4481 |
|
|
| 4482 |
|
redo A; |
| 4483 |
|
} else { |
| 4484 |
|
$self->{state} = PI_DATA_STATE; |
| 4485 |
|
## Reprocess. |
| 4486 |
|
redo A; |
| 4487 |
|
} |
| 4488 |
|
} elsif ($self->{state} == PI_DATA_STATE) { |
| 4489 |
|
if ($self->{nc} == 0x003F) { # ? |
| 4490 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 4491 |
|
|
| 4492 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4493 |
|
$self->{line_prev} = $self->{line}; |
| 4494 |
|
$self->{column_prev} = $self->{column}; |
| 4495 |
|
$self->{column}++; |
| 4496 |
|
$self->{nc} |
| 4497 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4498 |
|
} else { |
| 4499 |
|
$self->{set_nc}->($self); |
| 4500 |
|
} |
| 4501 |
|
|
| 4502 |
|
redo A; |
| 4503 |
|
} elsif ($self->{nc} == -1) { |
| 4504 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4505 |
|
$self->{state} = DATA_STATE; |
| 4506 |
|
$self->{s_kwd} = ''; |
| 4507 |
|
## Reprocess. |
| 4508 |
|
return ($self->{ct}); # pi |
| 4509 |
|
redo A; |
| 4510 |
|
} else { |
| 4511 |
|
$self->{ct}->{data} .= chr $self->{nc}; # pi |
| 4512 |
|
$self->{read_until}->($self->{ct}->{data}, q[?], |
| 4513 |
|
length $self->{ct}->{data}); |
| 4514 |
|
## Stay in the state. |
| 4515 |
|
|
| 4516 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4517 |
|
$self->{line_prev} = $self->{line}; |
| 4518 |
|
$self->{column_prev} = $self->{column}; |
| 4519 |
|
$self->{column}++; |
| 4520 |
|
$self->{nc} |
| 4521 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4522 |
|
} else { |
| 4523 |
|
$self->{set_nc}->($self); |
| 4524 |
|
} |
| 4525 |
|
|
| 4526 |
|
## Reprocess. |
| 4527 |
|
redo A; |
| 4528 |
|
} |
| 4529 |
|
} elsif ($self->{state} == PI_AFTER_STATE) { |
| 4530 |
|
if ($self->{nc} == 0x003E) { # > |
| 4531 |
|
$self->{state} = DATA_STATE; |
| 4532 |
|
$self->{s_kwd} = ''; |
| 4533 |
|
|
| 4534 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4535 |
|
$self->{line_prev} = $self->{line}; |
| 4536 |
|
$self->{column_prev} = $self->{column}; |
| 4537 |
|
$self->{column}++; |
| 4538 |
|
$self->{nc} |
| 4539 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4540 |
|
} else { |
| 4541 |
|
$self->{set_nc}->($self); |
| 4542 |
|
} |
| 4543 |
|
|
| 4544 |
|
return ($self->{ct}); # pi |
| 4545 |
|
redo A; |
| 4546 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4547 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type |
| 4548 |
|
line => $self->{line_prev}, |
| 4549 |
|
column => $self->{column_prev}); ## XML5: no error |
| 4550 |
|
$self->{ct}->{data} .= '?'; |
| 4551 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 4552 |
|
|
| 4553 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4554 |
|
$self->{line_prev} = $self->{line}; |
| 4555 |
|
$self->{column_prev} = $self->{column}; |
| 4556 |
|
$self->{column}++; |
| 4557 |
|
$self->{nc} |
| 4558 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4559 |
|
} else { |
| 4560 |
|
$self->{set_nc}->($self); |
| 4561 |
|
} |
| 4562 |
|
|
| 4563 |
|
redo A; |
| 4564 |
|
} else { |
| 4565 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type |
| 4566 |
|
line => $self->{line_prev}, |
| 4567 |
|
column => $self->{column_prev} |
| 4568 |
|
+ 1 * ($self->{nc} == -1)); ## XML5: no error |
| 4569 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 4570 |
|
$self->{state} = PI_DATA_STATE; |
| 4571 |
|
## Reprocess. |
| 4572 |
redo A; |
redo A; |
| 4573 |
} |
} |
| 4574 |
|
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
| 4575 |
|
## XML5: Same as "pi after state" in XML5 |
| 4576 |
|
if ($self->{nc} == 0x003E) { # > |
| 4577 |
|
$self->{state} = DATA_STATE; |
| 4578 |
|
$self->{s_kwd} = ''; |
| 4579 |
|
|
| 4580 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4581 |
|
$self->{line_prev} = $self->{line}; |
| 4582 |
|
$self->{column_prev} = $self->{column}; |
| 4583 |
|
$self->{column}++; |
| 4584 |
|
$self->{nc} |
| 4585 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4586 |
|
} else { |
| 4587 |
|
$self->{set_nc}->($self); |
| 4588 |
|
} |
| 4589 |
|
|
| 4590 |
|
return ($self->{ct}); # pi |
| 4591 |
|
redo A; |
| 4592 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4593 |
|
$self->{ct}->{data} .= '?'; |
| 4594 |
|
## Stay in the state. |
| 4595 |
|
|
| 4596 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4597 |
|
$self->{line_prev} = $self->{line}; |
| 4598 |
|
$self->{column_prev} = $self->{column}; |
| 4599 |
|
$self->{column}++; |
| 4600 |
|
$self->{nc} |
| 4601 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4602 |
|
} else { |
| 4603 |
|
$self->{set_nc}->($self); |
| 4604 |
|
} |
| 4605 |
|
|
| 4606 |
|
redo A; |
| 4607 |
|
} else { |
| 4608 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 4609 |
|
$self->{state} = PI_DATA_STATE; |
| 4610 |
|
## Reprocess. |
| 4611 |
|
redo A; |
| 4612 |
|
} |
| 4613 |
|
|
| 4614 |
} else { |
} else { |
| 4615 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
| 4616 |
} |
} |