| 2 |
use strict; |
use strict; |
| 3 |
our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
| 4 |
|
|
| 5 |
|
BEGIN { |
| 6 |
|
require Exporter; |
| 7 |
|
push our @ISA, 'Exporter'; |
| 8 |
|
|
| 9 |
|
our @EXPORT_OK = qw( |
| 10 |
|
DOCTYPE_TOKEN |
| 11 |
|
COMMENT_TOKEN |
| 12 |
|
START_TAG_TOKEN |
| 13 |
|
END_TAG_TOKEN |
| 14 |
|
END_OF_FILE_TOKEN |
| 15 |
|
CHARACTER_TOKEN |
| 16 |
|
PI_TOKEN |
| 17 |
|
ABORT_TOKEN |
| 18 |
|
); |
| 19 |
|
|
| 20 |
|
our %EXPORT_TAGS = ( |
| 21 |
|
token => [qw( |
| 22 |
|
DOCTYPE_TOKEN |
| 23 |
|
COMMENT_TOKEN |
| 24 |
|
START_TAG_TOKEN |
| 25 |
|
END_TAG_TOKEN |
| 26 |
|
END_OF_FILE_TOKEN |
| 27 |
|
CHARACTER_TOKEN |
| 28 |
|
PI_TOKEN |
| 29 |
|
ABORT_TOKEN |
| 30 |
|
)], |
| 31 |
|
); |
| 32 |
|
} |
| 33 |
|
|
| 34 |
|
## Token types |
| 35 |
|
|
| 36 |
|
sub DOCTYPE_TOKEN () { 1 } |
| 37 |
|
sub COMMENT_TOKEN () { 2 } |
| 38 |
|
sub START_TAG_TOKEN () { 3 } |
| 39 |
|
sub END_TAG_TOKEN () { 4 } |
| 40 |
|
sub END_OF_FILE_TOKEN () { 5 } |
| 41 |
|
sub CHARACTER_TOKEN () { 6 } |
| 42 |
|
sub PI_TOKEN () { 7 } # XML5 |
| 43 |
|
sub ABORT_TOKEN () { 8 } # Not a token actually |
| 44 |
|
|
| 45 |
package Whatpm::HTML; |
package Whatpm::HTML; |
| 46 |
|
|
| 47 |
|
BEGIN { Whatpm::HTML::Tokenizer->import (':token') } |
| 48 |
|
|
| 49 |
## Content model flags |
## Content model flags |
| 50 |
|
|
| 51 |
sub CM_ENTITY () { 0b001 } # & markup in data |
sub CM_ENTITY () { 0b001 } # & markup in data |
| 114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
| 115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
| 116 |
|
|
| 117 |
## Token types |
## XML states |
| 118 |
|
sub PI_STATE () { 51 } |
| 119 |
sub DOCTYPE_TOKEN () { 1 } |
sub PI_TARGET_STATE () { 52 } |
| 120 |
sub COMMENT_TOKEN () { 2 } |
sub PI_TARGET_AFTER_STATE () { 53 } |
| 121 |
sub START_TAG_TOKEN () { 3 } |
sub PI_DATA_STATE () { 54 } |
| 122 |
sub END_TAG_TOKEN () { 4 } |
sub PI_AFTER_STATE () { 55 } |
| 123 |
sub END_OF_FILE_TOKEN () { 5 } |
sub PI_DATA_AFTER_STATE () { 56 } |
|
sub CHARACTER_TOKEN () { 6 } |
|
| 124 |
|
|
| 125 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
| 126 |
## list and descriptions) |
## list and descriptions) |
| 183 |
#$self->{level} |
#$self->{level} |
| 184 |
#$self->{set_nc} |
#$self->{set_nc} |
| 185 |
#$self->{parse_error} |
#$self->{parse_error} |
| 186 |
|
#$self->{is_xml} (if XML) |
| 187 |
|
|
| 188 |
$self->{state} = DATA_STATE; # MUST |
$self->{state} = DATA_STATE; # MUST |
| 189 |
#$self->{s_kwd}; # state keyword - initialized when used |
$self->{s_kwd} = ''; # state keyword |
| 190 |
#$self->{entity__value}; # initialized when used |
#$self->{entity__value}; # initialized when used |
| 191 |
#$self->{entity__match}; # initialized when used |
#$self->{entity__match}; # initialized when used |
| 192 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
| 227 |
## ->{value} |
## ->{value} |
| 228 |
## ->{has_reference} == 1 or 0 |
## ->{has_reference} == 1 or 0 |
| 229 |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
| 230 |
|
## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN) |
| 231 |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
| 232 |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
| 233 |
## while the token is pushed back to the stack. |
## while the token is pushed back to the stack. |
| 371 |
} |
} |
| 372 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 373 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 374 |
$self->{s_kwd} .= '-'; |
if ($self->{s_kwd} eq '<!-') { |
|
|
|
|
if ($self->{s_kwd} eq '<!--') { |
|
| 375 |
|
|
| 376 |
$self->{escape} = 1; # unless $self->{escape}; |
$self->{escape} = 1; # unless $self->{escape}; |
| 377 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
| 378 |
# |
# |
| 379 |
} elsif ($self->{s_kwd} eq '---') { |
} elsif ($self->{s_kwd} eq '-') { |
| 380 |
|
|
| 381 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
| 382 |
# |
# |
| 383 |
|
} elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') { |
| 384 |
|
|
| 385 |
|
$self->{s_kwd} .= '-'; |
| 386 |
|
# |
| 387 |
} else { |
} else { |
| 388 |
|
|
| 389 |
|
$self->{s_kwd} = '-'; |
| 390 |
# |
# |
| 391 |
} |
} |
| 392 |
} |
} |
| 432 |
if ($self->{s_kwd} eq '--') { |
if ($self->{s_kwd} eq '--') { |
| 433 |
|
|
| 434 |
delete $self->{escape}; |
delete $self->{escape}; |
| 435 |
|
# |
| 436 |
} else { |
} else { |
| 437 |
|
|
| 438 |
|
# |
| 439 |
} |
} |
| 440 |
|
} elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') { |
| 441 |
|
|
| 442 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched mse', ## TODO: type |
| 443 |
|
line => $self->{line_prev}, |
| 444 |
|
column => $self->{column_prev} - 1); |
| 445 |
|
# |
| 446 |
} else { |
} else { |
| 447 |
|
|
| 448 |
|
# |
| 449 |
} |
} |
| 450 |
|
|
| 451 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 452 |
# |
# |
| 453 |
|
} elsif ($self->{nc} == 0x005D) { # ] |
| 454 |
|
if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') { |
| 455 |
|
|
| 456 |
|
$self->{s_kwd} .= ']'; |
| 457 |
|
} elsif ($self->{s_kwd} eq ']]') { |
| 458 |
|
|
| 459 |
|
# |
| 460 |
|
} else { |
| 461 |
|
|
| 462 |
|
$self->{s_kwd} = ''; |
| 463 |
|
} |
| 464 |
|
# |
| 465 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 466 |
|
|
| 467 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 479 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
| 480 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
| 481 |
}; |
}; |
| 482 |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
if ($self->{read_until}->($token->{data}, q{-!<>&\]}, |
| 483 |
length $token->{data})) { |
length $token->{data})) { |
| 484 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 485 |
} |
} |
| 486 |
|
|
| 487 |
## Stay in the data state. |
## Stay in the data state. |
| 488 |
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
if (not $self->{is_xml} and |
| 489 |
|
$self->{content_model} == PCDATA_CONTENT_MODEL) { |
| 490 |
|
|
| 491 |
$self->{state} = PCDATA_STATE; |
$self->{state} = PCDATA_STATE; |
| 492 |
} else { |
} else { |
| 534 |
|
|
| 535 |
## reconsume |
## reconsume |
| 536 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 537 |
|
$self->{s_kwd} = ''; |
| 538 |
return ({type => CHARACTER_TOKEN, data => '<', |
return ({type => CHARACTER_TOKEN, data => '<', |
| 539 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 540 |
column => $self->{column_prev}, |
column => $self->{column_prev}, |
| 576 |
|
|
| 577 |
$self->{ct} |
$self->{ct} |
| 578 |
= {type => START_TAG_TOKEN, |
= {type => START_TAG_TOKEN, |
| 579 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 580 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 581 |
column => $self->{column_prev}}; |
column => $self->{column_prev}}; |
| 582 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
| 618 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 619 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 620 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 621 |
|
$self->{s_kwd} = ''; |
| 622 |
|
|
| 623 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 624 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 638 |
|
|
| 639 |
redo A; |
redo A; |
| 640 |
} elsif ($self->{nc} == 0x003F) { # ? |
} elsif ($self->{nc} == 0x003F) { # ? |
| 641 |
|
if ($self->{is_xml}) { |
| 642 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'pio', |
|
| 643 |
line => $self->{line_prev}, |
$self->{state} = PI_STATE; |
| 644 |
column => $self->{column_prev}); |
|
| 645 |
$self->{state} = BOGUS_COMMENT_STATE; |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 646 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
$self->{line_prev} = $self->{line}; |
| 647 |
line => $self->{line_prev}, |
$self->{column_prev} = $self->{column}; |
| 648 |
column => $self->{column_prev}, |
$self->{column}++; |
| 649 |
}; |
$self->{nc} |
| 650 |
## $self->{nc} is intentionally left as is |
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 651 |
redo A; |
} else { |
| 652 |
|
$self->{set_nc}->($self); |
| 653 |
|
} |
| 654 |
|
|
| 655 |
|
redo A; |
| 656 |
|
} else { |
| 657 |
|
|
| 658 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'pio', |
| 659 |
|
line => $self->{line_prev}, |
| 660 |
|
column => $self->{column_prev}); |
| 661 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 662 |
|
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
| 663 |
|
line => $self->{line_prev}, |
| 664 |
|
column => $self->{column_prev}, |
| 665 |
|
}; |
| 666 |
|
## $self->{nc} is intentionally left as is |
| 667 |
|
redo A; |
| 668 |
|
} |
| 669 |
} else { |
} else { |
| 670 |
|
|
| 671 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', |
| 672 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 673 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 674 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 675 |
|
$self->{s_kwd} = ''; |
| 676 |
## reconsume |
## reconsume |
| 677 |
|
|
| 678 |
return ({type => CHARACTER_TOKEN, data => '<', |
return ({type => CHARACTER_TOKEN, data => '<', |
| 701 |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
| 702 |
|
|
| 703 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 704 |
|
$self->{s_kwd} = ''; |
| 705 |
## Reconsume. |
## Reconsume. |
| 706 |
return ({type => CHARACTER_TOKEN, data => '</', |
return ({type => CHARACTER_TOKEN, data => '</', |
| 707 |
line => $l, column => $c, |
line => $l, column => $c, |
| 715 |
|
|
| 716 |
$self->{ct} |
$self->{ct} |
| 717 |
= {type => END_TAG_TOKEN, |
= {type => END_TAG_TOKEN, |
| 718 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 719 |
line => $l, column => $c}; |
line => $l, column => $c}; |
| 720 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
| 721 |
|
|
| 755 |
line => $self->{line_prev}, ## "<" in "</>" |
line => $self->{line_prev}, ## "<" in "</>" |
| 756 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
| 757 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 758 |
|
$self->{s_kwd} = ''; |
| 759 |
|
|
| 760 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 761 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 771 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 772 |
|
|
| 773 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago'); |
| 774 |
|
$self->{s_kwd} = ''; |
| 775 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 776 |
# reconsume |
# reconsume |
| 777 |
|
|
| 821 |
} else { |
} else { |
| 822 |
|
|
| 823 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 824 |
|
$self->{s_kwd} = ''; |
| 825 |
## Reconsume. |
## Reconsume. |
| 826 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 827 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
| 840 |
|
|
| 841 |
## Reconsume. |
## Reconsume. |
| 842 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 843 |
|
$self->{s_kwd} = ''; |
| 844 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 845 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
| 846 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 892 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 893 |
} |
} |
| 894 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 895 |
|
$self->{s_kwd} = ''; |
| 896 |
|
|
| 897 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 898 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 911 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
| 912 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 913 |
|
|
| 914 |
$self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020); |
$self->{ct}->{tag_name} |
| 915 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 916 |
# start tag or end tag |
# start tag or end tag |
| 917 |
## Stay in this state |
## Stay in this state |
| 918 |
|
|
| 945 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 946 |
} |
} |
| 947 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 948 |
|
$self->{s_kwd} = ''; |
| 949 |
# reconsume |
# reconsume |
| 950 |
|
|
| 951 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1016 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1017 |
} |
} |
| 1018 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1019 |
|
$self->{s_kwd} = ''; |
| 1020 |
|
|
| 1021 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1022 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1036 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 1037 |
|
|
| 1038 |
$self->{ca} |
$self->{ca} |
| 1039 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 1040 |
value => '', |
value => '', |
| 1041 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
| 1042 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 1084 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1085 |
} |
} |
| 1086 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1087 |
|
$self->{s_kwd} = ''; |
| 1088 |
# reconsume |
# reconsume |
| 1089 |
|
|
| 1090 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1180 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1181 |
} |
} |
| 1182 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1183 |
|
$self->{s_kwd} = ''; |
| 1184 |
|
|
| 1185 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1186 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1199 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
| 1200 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 1201 |
|
|
| 1202 |
$self->{ca}->{name} .= chr ($self->{nc} + 0x0020); |
$self->{ca}->{name} |
| 1203 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 1204 |
## Stay in the state |
## Stay in the state |
| 1205 |
|
|
| 1206 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1249 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1250 |
} |
} |
| 1251 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1252 |
|
$self->{s_kwd} = ''; |
| 1253 |
# reconsume |
# reconsume |
| 1254 |
|
|
| 1255 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1326 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1327 |
} |
} |
| 1328 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1329 |
|
$self->{s_kwd} = ''; |
| 1330 |
|
|
| 1331 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1332 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1346 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
| 1347 |
|
|
| 1348 |
$self->{ca} |
$self->{ca} |
| 1349 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
| 1350 |
value => '', |
value => '', |
| 1351 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
| 1352 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 1394 |
} else { |
} else { |
| 1395 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1396 |
} |
} |
| 1397 |
|
$self->{s_kwd} = ''; |
| 1398 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1399 |
# reconsume |
# reconsume |
| 1400 |
|
|
| 1496 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1497 |
} |
} |
| 1498 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1499 |
|
$self->{s_kwd} = ''; |
| 1500 |
|
|
| 1501 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1502 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1530 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1531 |
} |
} |
| 1532 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1533 |
|
$self->{s_kwd} = ''; |
| 1534 |
## reconsume |
## reconsume |
| 1535 |
|
|
| 1536 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1613 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1614 |
} |
} |
| 1615 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1616 |
|
$self->{s_kwd} = ''; |
| 1617 |
## reconsume |
## reconsume |
| 1618 |
|
|
| 1619 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1695 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1696 |
} |
} |
| 1697 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1698 |
|
$self->{s_kwd} = ''; |
| 1699 |
## reconsume |
## reconsume |
| 1700 |
|
|
| 1701 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1776 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1777 |
} |
} |
| 1778 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1779 |
|
$self->{s_kwd} = ''; |
| 1780 |
|
|
| 1781 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1782 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1810 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1811 |
} |
} |
| 1812 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1813 |
|
$self->{s_kwd} = ''; |
| 1814 |
## reconsume |
## reconsume |
| 1815 |
|
|
| 1816 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1879 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1880 |
} |
} |
| 1881 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1882 |
|
$self->{s_kwd} = ''; |
| 1883 |
|
|
| 1884 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1885 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1927 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1928 |
} |
} |
| 1929 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1930 |
|
$self->{s_kwd} = ''; |
| 1931 |
## Reconsume. |
## Reconsume. |
| 1932 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1933 |
redo A; |
redo A; |
| 1958 |
} |
} |
| 1959 |
|
|
| 1960 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1961 |
|
$self->{s_kwd} = ''; |
| 1962 |
|
|
| 1963 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1964 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 1991 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
| 1992 |
} |
} |
| 1993 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1994 |
|
$self->{s_kwd} = ''; |
| 1995 |
## Reconsume. |
## Reconsume. |
| 1996 |
return ($self->{ct}); # start tag or end tag |
return ($self->{ct}); # start tag or end tag |
| 1997 |
redo A; |
redo A; |
| 2012 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2013 |
|
|
| 2014 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2015 |
|
$self->{s_kwd} = ''; |
| 2016 |
|
|
| 2017 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2018 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2030 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2031 |
|
|
| 2032 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2033 |
|
$self->{s_kwd} = ''; |
| 2034 |
## reconsume |
## reconsume |
| 2035 |
|
|
| 2036 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2092 |
} |
} |
| 2093 |
|
|
| 2094 |
redo A; |
redo A; |
| 2095 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
} elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
| 2096 |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL and |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL) or |
| 2097 |
|
$self->{is_xml}) and |
| 2098 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
| 2099 |
|
|
| 2100 |
$self->{state} = MD_CDATA_STATE; |
$self->{state} = MD_CDATA_STATE; |
| 2253 |
redo A; |
redo A; |
| 2254 |
} elsif ($self->{s_kwd} eq '[CDATA' and |
} elsif ($self->{s_kwd} eq '[CDATA' and |
| 2255 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
| 2256 |
|
if ($self->{is_xml} and |
| 2257 |
|
not $self->{tainted} and |
| 2258 |
|
@{$self->{open_elements} or []} == 0) { |
| 2259 |
|
|
| 2260 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element', |
| 2261 |
|
line => $self->{line_prev}, |
| 2262 |
|
column => $self->{column_prev} - 7); |
| 2263 |
|
$self->{tainted} = 1; |
| 2264 |
|
} else { |
| 2265 |
|
|
| 2266 |
|
} |
| 2267 |
|
|
| 2268 |
$self->{ct} = {type => CHARACTER_TOKEN, |
$self->{ct} = {type => CHARACTER_TOKEN, |
| 2269 |
data => '', |
data => '', |
| 2270 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2316 |
|
|
| 2317 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
| 2318 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2319 |
|
$self->{s_kwd} = ''; |
| 2320 |
|
|
| 2321 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2322 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2336 |
|
|
| 2337 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2338 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2339 |
|
$self->{s_kwd} = ''; |
| 2340 |
## reconsume |
## reconsume |
| 2341 |
|
|
| 2342 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2380 |
|
|
| 2381 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
| 2382 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2383 |
|
$self->{s_kwd} = ''; |
| 2384 |
|
|
| 2385 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2386 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2400 |
|
|
| 2401 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2402 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2403 |
|
$self->{s_kwd} = ''; |
| 2404 |
## reconsume |
## reconsume |
| 2405 |
|
|
| 2406 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2444 |
|
|
| 2445 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2446 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2447 |
|
$self->{s_kwd} = ''; |
| 2448 |
## reconsume |
## reconsume |
| 2449 |
|
|
| 2450 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2490 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2491 |
|
|
| 2492 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2493 |
|
$self->{s_kwd} = ''; |
| 2494 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2495 |
|
$self->{s_kwd} = ''; |
| 2496 |
## reconsume |
## reconsume |
| 2497 |
|
|
| 2498 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2519 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2520 |
|
|
| 2521 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2522 |
|
$self->{s_kwd} = ''; |
| 2523 |
|
|
| 2524 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2525 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2558 |
|
|
| 2559 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2560 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2561 |
|
$self->{s_kwd} = ''; |
| 2562 |
## reconsume |
## reconsume |
| 2563 |
|
|
| 2564 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2627 |
|
|
| 2628 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 2629 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2630 |
|
$self->{s_kwd} = ''; |
| 2631 |
|
|
| 2632 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2633 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2647 |
|
|
| 2648 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 2649 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2650 |
|
$self->{s_kwd} = ''; |
| 2651 |
## reconsume |
## reconsume |
| 2652 |
|
|
| 2653 |
return ($self->{ct}); # DOCTYPE (quirks) |
return ($self->{ct}); # DOCTYPE (quirks) |
| 2691 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2692 |
|
|
| 2693 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2694 |
|
$self->{s_kwd} = ''; |
| 2695 |
|
|
| 2696 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2697 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2711 |
|
|
| 2712 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 2713 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2714 |
|
$self->{s_kwd} = ''; |
| 2715 |
## reconsume |
## reconsume |
| 2716 |
|
|
| 2717 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 2755 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 2756 |
|
|
| 2757 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2758 |
|
$self->{s_kwd} = ''; |
| 2759 |
|
|
| 2760 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2761 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2775 |
|
|
| 2776 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 2777 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2778 |
|
$self->{s_kwd} = ''; |
| 2779 |
## reconsume |
## reconsume |
| 2780 |
|
|
| 2781 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3004 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal'); |
| 3005 |
|
|
| 3006 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3007 |
|
$self->{s_kwd} = ''; |
| 3008 |
|
|
| 3009 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3010 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3026 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3027 |
|
|
| 3028 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3029 |
|
$self->{s_kwd} = ''; |
| 3030 |
## reconsume |
## reconsume |
| 3031 |
|
|
| 3032 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3073 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3074 |
|
|
| 3075 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3076 |
|
$self->{s_kwd} = ''; |
| 3077 |
|
|
| 3078 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3079 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3095 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3096 |
|
|
| 3097 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3098 |
|
$self->{s_kwd} = ''; |
| 3099 |
## reconsume |
## reconsume |
| 3100 |
|
|
| 3101 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3144 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3145 |
|
|
| 3146 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3147 |
|
$self->{s_kwd} = ''; |
| 3148 |
|
|
| 3149 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3150 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3166 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal'); |
| 3167 |
|
|
| 3168 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3169 |
|
$self->{s_kwd} = ''; |
| 3170 |
## reconsume |
## reconsume |
| 3171 |
|
|
| 3172 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3245 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 3246 |
|
|
| 3247 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3248 |
|
$self->{s_kwd} = ''; |
| 3249 |
|
|
| 3250 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3251 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3266 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3267 |
|
|
| 3268 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3269 |
|
$self->{s_kwd} = ''; |
| 3270 |
## reconsume |
## reconsume |
| 3271 |
|
|
| 3272 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3344 |
|
|
| 3345 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal'); |
| 3346 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3347 |
|
$self->{s_kwd} = ''; |
| 3348 |
|
|
| 3349 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3350 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3366 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3367 |
|
|
| 3368 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3369 |
|
$self->{s_kwd} = ''; |
| 3370 |
## reconsume |
## reconsume |
| 3371 |
|
|
| 3372 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3413 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3414 |
|
|
| 3415 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3416 |
|
$self->{s_kwd} = ''; |
| 3417 |
|
|
| 3418 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3419 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3435 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3436 |
|
|
| 3437 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3438 |
|
$self->{s_kwd} = ''; |
| 3439 |
## reconsume |
## reconsume |
| 3440 |
|
|
| 3441 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3484 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3485 |
|
|
| 3486 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3487 |
|
$self->{s_kwd} = ''; |
| 3488 |
|
|
| 3489 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3490 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3506 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal'); |
| 3507 |
|
|
| 3508 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3509 |
|
$self->{s_kwd} = ''; |
| 3510 |
## reconsume |
## reconsume |
| 3511 |
|
|
| 3512 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3553 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 3554 |
|
|
| 3555 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3556 |
|
$self->{s_kwd} = ''; |
| 3557 |
|
|
| 3558 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3559 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3573 |
|
|
| 3574 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3575 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3576 |
|
$self->{s_kwd} = ''; |
| 3577 |
## reconsume |
## reconsume |
| 3578 |
|
|
| 3579 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
| 3603 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 3604 |
|
|
| 3605 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3606 |
|
$self->{s_kwd} = ''; |
| 3607 |
|
|
| 3608 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3609 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3622 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3623 |
|
|
| 3624 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3625 |
|
$self->{s_kwd} = ''; |
| 3626 |
## reconsume |
## reconsume |
| 3627 |
|
|
| 3628 |
return ($self->{ct}); # DOCTYPE |
return ($self->{ct}); # DOCTYPE |
| 3668 |
|
|
| 3669 |
redo A; |
redo A; |
| 3670 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3671 |
|
if ($self->{is_xml}) { |
| 3672 |
|
|
| 3673 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type |
| 3674 |
|
} else { |
| 3675 |
|
|
| 3676 |
|
} |
| 3677 |
|
|
| 3678 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3679 |
|
$self->{s_kwd} = ''; |
| 3680 |
|
|
| 3681 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3682 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3745 |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
| 3746 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 3747 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3748 |
|
$self->{s_kwd} = ''; |
| 3749 |
|
|
| 3750 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3751 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3853 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3854 |
|
|
| 3855 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3856 |
|
$self->{s_kwd} = ''; |
| 3857 |
## Reconsume. |
## Reconsume. |
| 3858 |
return ({type => CHARACTER_TOKEN, data => '&', |
return ({type => CHARACTER_TOKEN, data => '&', |
| 3859 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 3864 |
|
|
| 3865 |
$self->{ca}->{value} .= '&'; |
$self->{ca}->{value} .= '&'; |
| 3866 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3867 |
|
$self->{s_kwd} = ''; |
| 3868 |
## Reconsume. |
## Reconsume. |
| 3869 |
redo A; |
redo A; |
| 3870 |
} |
} |
| 3915 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3916 |
|
|
| 3917 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3918 |
|
$self->{s_kwd} = ''; |
| 3919 |
## Reconsume. |
## Reconsume. |
| 3920 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 3921 |
data => '&#', |
data => '&#', |
| 3927 |
|
|
| 3928 |
$self->{ca}->{value} .= '&#'; |
$self->{ca}->{value} .= '&#'; |
| 3929 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3930 |
|
$self->{s_kwd} = ''; |
| 3931 |
## Reconsume. |
## Reconsume. |
| 3932 |
redo A; |
redo A; |
| 3933 |
} |
} |
| 3993 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3994 |
|
|
| 3995 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3996 |
|
$self->{s_kwd} = ''; |
| 3997 |
## Reconsume. |
## Reconsume. |
| 3998 |
return ({type => CHARACTER_TOKEN, data => chr $code, |
return ({type => CHARACTER_TOKEN, data => chr $code, |
| 3999 |
|
has_reference => 1, |
| 4000 |
line => $l, column => $c, |
line => $l, column => $c, |
| 4001 |
}); |
}); |
| 4002 |
redo A; |
redo A; |
| 4005 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
| 4006 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
| 4007 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4008 |
|
$self->{s_kwd} = ''; |
| 4009 |
## Reconsume. |
## Reconsume. |
| 4010 |
redo A; |
redo A; |
| 4011 |
} |
} |
| 4031 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 4032 |
|
|
| 4033 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4034 |
|
$self->{s_kwd} = ''; |
| 4035 |
## Reconsume. |
## Reconsume. |
| 4036 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 4037 |
data => '&' . $self->{s_kwd}, |
data => '&' . $self->{s_kwd}, |
| 4043 |
|
|
| 4044 |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
| 4045 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4046 |
|
$self->{s_kwd} = ''; |
| 4047 |
## Reconsume. |
## Reconsume. |
| 4048 |
redo A; |
redo A; |
| 4049 |
} |
} |
| 4146 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 4147 |
|
|
| 4148 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4149 |
|
$self->{s_kwd} = ''; |
| 4150 |
## Reconsume. |
## Reconsume. |
| 4151 |
return ({type => CHARACTER_TOKEN, data => chr $code, |
return ({type => CHARACTER_TOKEN, data => chr $code, |
| 4152 |
|
has_reference => 1, |
| 4153 |
line => $l, column => $c, |
line => $l, column => $c, |
| 4154 |
}); |
}); |
| 4155 |
redo A; |
redo A; |
| 4158 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
| 4159 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
| 4160 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4161 |
|
$self->{s_kwd} = ''; |
| 4162 |
## Reconsume. |
## Reconsume. |
| 4163 |
redo A; |
redo A; |
| 4164 |
} |
} |
| 4271 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 4272 |
|
|
| 4273 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4274 |
|
$self->{s_kwd} = ''; |
| 4275 |
## Reconsume. |
## Reconsume. |
| 4276 |
return ({type => CHARACTER_TOKEN, |
return ({type => CHARACTER_TOKEN, |
| 4277 |
data => $data, |
data => $data, |
| 4278 |
|
has_reference => $has_ref, |
| 4279 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 4280 |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
| 4281 |
}); |
}); |
| 4285 |
$self->{ca}->{value} .= $data; |
$self->{ca}->{value} .= $data; |
| 4286 |
$self->{ca}->{has_reference} = 1 if $has_ref; |
$self->{ca}->{has_reference} = 1 if $has_ref; |
| 4287 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 4288 |
|
$self->{s_kwd} = ''; |
| 4289 |
## Reconsume. |
## Reconsume. |
| 4290 |
redo A; |
redo A; |
| 4291 |
} |
} |
| 4292 |
|
|
| 4293 |
|
## XML-only states |
| 4294 |
|
|
| 4295 |
|
} elsif ($self->{state} == PI_STATE) { |
| 4296 |
|
if ($is_space->{$self->{nc}} or |
| 4297 |
|
$self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else" |
| 4298 |
|
$self->{nc} == -1) { |
| 4299 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type |
| 4300 |
|
line => $self->{line_prev}, |
| 4301 |
|
column => $self->{column_prev} |
| 4302 |
|
- 1 * ($self->{nc} != -1)); |
| 4303 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 4304 |
|
## Reconsume. |
| 4305 |
|
$self->{ct} = {type => COMMENT_TOKEN, |
| 4306 |
|
data => '?', |
| 4307 |
|
line => $self->{line_prev}, |
| 4308 |
|
column => $self->{column_prev} |
| 4309 |
|
- 1 * ($self->{nc} != -1), |
| 4310 |
|
}; |
| 4311 |
|
redo A; |
| 4312 |
|
} else { |
| 4313 |
|
$self->{ct} = {type => PI_TOKEN, |
| 4314 |
|
target => chr $self->{nc}, |
| 4315 |
|
data => '', |
| 4316 |
|
line => $self->{line_prev}, |
| 4317 |
|
column => $self->{column_prev} - 1, |
| 4318 |
|
}; |
| 4319 |
|
$self->{state} = PI_TARGET_STATE; |
| 4320 |
|
|
| 4321 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4322 |
|
$self->{line_prev} = $self->{line}; |
| 4323 |
|
$self->{column_prev} = $self->{column}; |
| 4324 |
|
$self->{column}++; |
| 4325 |
|
$self->{nc} |
| 4326 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4327 |
|
} else { |
| 4328 |
|
$self->{set_nc}->($self); |
| 4329 |
|
} |
| 4330 |
|
|
| 4331 |
|
redo A; |
| 4332 |
|
} |
| 4333 |
|
} elsif ($self->{state} == PI_TARGET_STATE) { |
| 4334 |
|
if ($is_space->{$self->{nc}}) { |
| 4335 |
|
$self->{state} = PI_TARGET_AFTER_STATE; |
| 4336 |
|
|
| 4337 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4338 |
|
$self->{line_prev} = $self->{line}; |
| 4339 |
|
$self->{column_prev} = $self->{column}; |
| 4340 |
|
$self->{column}++; |
| 4341 |
|
$self->{nc} |
| 4342 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4343 |
|
} else { |
| 4344 |
|
$self->{set_nc}->($self); |
| 4345 |
|
} |
| 4346 |
|
|
| 4347 |
|
redo A; |
| 4348 |
|
} elsif ($self->{nc} == -1) { |
| 4349 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4350 |
|
$self->{state} = DATA_STATE; |
| 4351 |
|
$self->{s_kwd} = ''; |
| 4352 |
|
## Reconsume. |
| 4353 |
|
return ($self->{ct}); # pi |
| 4354 |
|
redo A; |
| 4355 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4356 |
|
$self->{state} = PI_AFTER_STATE; |
| 4357 |
|
|
| 4358 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4359 |
|
$self->{line_prev} = $self->{line}; |
| 4360 |
|
$self->{column_prev} = $self->{column}; |
| 4361 |
|
$self->{column}++; |
| 4362 |
|
$self->{nc} |
| 4363 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4364 |
|
} else { |
| 4365 |
|
$self->{set_nc}->($self); |
| 4366 |
|
} |
| 4367 |
|
|
| 4368 |
|
redo A; |
| 4369 |
|
} else { |
| 4370 |
|
## XML5: typo ("tag name" -> "target") |
| 4371 |
|
$self->{ct}->{target} .= chr $self->{nc}; # pi |
| 4372 |
|
|
| 4373 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4374 |
|
$self->{line_prev} = $self->{line}; |
| 4375 |
|
$self->{column_prev} = $self->{column}; |
| 4376 |
|
$self->{column}++; |
| 4377 |
|
$self->{nc} |
| 4378 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4379 |
|
} else { |
| 4380 |
|
$self->{set_nc}->($self); |
| 4381 |
|
} |
| 4382 |
|
|
| 4383 |
|
redo A; |
| 4384 |
|
} |
| 4385 |
|
} elsif ($self->{state} == PI_TARGET_AFTER_STATE) { |
| 4386 |
|
if ($is_space->{$self->{nc}}) { |
| 4387 |
|
## Stay in the state. |
| 4388 |
|
|
| 4389 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4390 |
|
$self->{line_prev} = $self->{line}; |
| 4391 |
|
$self->{column_prev} = $self->{column}; |
| 4392 |
|
$self->{column}++; |
| 4393 |
|
$self->{nc} |
| 4394 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4395 |
|
} else { |
| 4396 |
|
$self->{set_nc}->($self); |
| 4397 |
|
} |
| 4398 |
|
|
| 4399 |
|
redo A; |
| 4400 |
|
} else { |
| 4401 |
|
$self->{state} = PI_DATA_STATE; |
| 4402 |
|
## Reprocess. |
| 4403 |
|
redo A; |
| 4404 |
|
} |
| 4405 |
|
} elsif ($self->{state} == PI_DATA_STATE) { |
| 4406 |
|
if ($self->{nc} == 0x003F) { # ? |
| 4407 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 4408 |
|
|
| 4409 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4410 |
|
$self->{line_prev} = $self->{line}; |
| 4411 |
|
$self->{column_prev} = $self->{column}; |
| 4412 |
|
$self->{column}++; |
| 4413 |
|
$self->{nc} |
| 4414 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4415 |
|
} else { |
| 4416 |
|
$self->{set_nc}->($self); |
| 4417 |
|
} |
| 4418 |
|
|
| 4419 |
|
redo A; |
| 4420 |
|
} elsif ($self->{nc} == -1) { |
| 4421 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4422 |
|
$self->{state} = DATA_STATE; |
| 4423 |
|
$self->{s_kwd} = ''; |
| 4424 |
|
## Reprocess. |
| 4425 |
|
return ($self->{ct}); # pi |
| 4426 |
|
redo A; |
| 4427 |
|
} else { |
| 4428 |
|
$self->{ct}->{data} .= chr $self->{nc}; # pi |
| 4429 |
|
$self->{read_until}->($self->{ct}->{data}, q[?], |
| 4430 |
|
length $self->{ct}->{data}); |
| 4431 |
|
## Stay in the state. |
| 4432 |
|
|
| 4433 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4434 |
|
$self->{line_prev} = $self->{line}; |
| 4435 |
|
$self->{column_prev} = $self->{column}; |
| 4436 |
|
$self->{column}++; |
| 4437 |
|
$self->{nc} |
| 4438 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4439 |
|
} else { |
| 4440 |
|
$self->{set_nc}->($self); |
| 4441 |
|
} |
| 4442 |
|
|
| 4443 |
|
## Reprocess. |
| 4444 |
|
redo A; |
| 4445 |
|
} |
| 4446 |
|
} elsif ($self->{state} == PI_AFTER_STATE) { |
| 4447 |
|
if ($self->{nc} == 0x003E) { # > |
| 4448 |
|
$self->{state} = DATA_STATE; |
| 4449 |
|
$self->{s_kwd} = ''; |
| 4450 |
|
|
| 4451 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4452 |
|
$self->{line_prev} = $self->{line}; |
| 4453 |
|
$self->{column_prev} = $self->{column}; |
| 4454 |
|
$self->{column}++; |
| 4455 |
|
$self->{nc} |
| 4456 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4457 |
|
} else { |
| 4458 |
|
$self->{set_nc}->($self); |
| 4459 |
|
} |
| 4460 |
|
|
| 4461 |
|
return ($self->{ct}); # pi |
| 4462 |
|
redo A; |
| 4463 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4464 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type |
| 4465 |
|
line => $self->{line_prev}, |
| 4466 |
|
column => $self->{column_prev}); ## XML5: no error |
| 4467 |
|
$self->{ct}->{data} .= '?'; |
| 4468 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 4469 |
|
|
| 4470 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4471 |
|
$self->{line_prev} = $self->{line}; |
| 4472 |
|
$self->{column_prev} = $self->{column}; |
| 4473 |
|
$self->{column}++; |
| 4474 |
|
$self->{nc} |
| 4475 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4476 |
|
} else { |
| 4477 |
|
$self->{set_nc}->($self); |
| 4478 |
|
} |
| 4479 |
|
|
| 4480 |
|
redo A; |
| 4481 |
|
} else { |
| 4482 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type |
| 4483 |
|
line => $self->{line_prev}, |
| 4484 |
|
column => $self->{column_prev} |
| 4485 |
|
+ 1 * ($self->{nc} == -1)); ## XML5: no error |
| 4486 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 4487 |
|
$self->{state} = PI_DATA_STATE; |
| 4488 |
|
## Reprocess. |
| 4489 |
|
redo A; |
| 4490 |
|
} |
| 4491 |
|
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
| 4492 |
|
## XML5: Same as "pi after state" in XML5 |
| 4493 |
|
if ($self->{nc} == 0x003E) { # > |
| 4494 |
|
$self->{state} = DATA_STATE; |
| 4495 |
|
$self->{s_kwd} = ''; |
| 4496 |
|
|
| 4497 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4498 |
|
$self->{line_prev} = $self->{line}; |
| 4499 |
|
$self->{column_prev} = $self->{column}; |
| 4500 |
|
$self->{column}++; |
| 4501 |
|
$self->{nc} |
| 4502 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4503 |
|
} else { |
| 4504 |
|
$self->{set_nc}->($self); |
| 4505 |
|
} |
| 4506 |
|
|
| 4507 |
|
return ($self->{ct}); # pi |
| 4508 |
|
redo A; |
| 4509 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4510 |
|
$self->{ct}->{data} .= '?'; |
| 4511 |
|
## Stay in the state. |
| 4512 |
|
|
| 4513 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4514 |
|
$self->{line_prev} = $self->{line}; |
| 4515 |
|
$self->{column_prev} = $self->{column}; |
| 4516 |
|
$self->{column}++; |
| 4517 |
|
$self->{nc} |
| 4518 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4519 |
|
} else { |
| 4520 |
|
$self->{set_nc}->($self); |
| 4521 |
|
} |
| 4522 |
|
|
| 4523 |
|
redo A; |
| 4524 |
|
} else { |
| 4525 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 4526 |
|
$self->{state} = PI_DATA_STATE; |
| 4527 |
|
## Reprocess. |
| 4528 |
|
redo A; |
| 4529 |
|
} |
| 4530 |
|
|
| 4531 |
} else { |
} else { |
| 4532 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
| 4533 |
} |
} |