| 114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
| 115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
| 116 |
|
|
| 117 |
|
## XML states |
| 118 |
|
sub PI_STATE () { 51 } |
| 119 |
|
sub PI_TARGET_STATE () { 52 } |
| 120 |
|
sub PI_TARGET_AFTER_STATE () { 53 } |
| 121 |
|
sub PI_DATA_STATE () { 54 } |
| 122 |
|
sub PI_AFTER_STATE () { 55 } |
| 123 |
|
sub PI_DATA_AFTER_STATE () { 56 } |
| 124 |
|
|
| 125 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
| 126 |
## list and descriptions) |
## list and descriptions) |
| 127 |
|
|
| 507 |
return ($token); |
return ($token); |
| 508 |
redo A; |
redo A; |
| 509 |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
| 510 |
|
## XML5: "tag state". |
| 511 |
|
|
| 512 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 513 |
if ($self->{nc} == 0x002F) { # / |
if ($self->{nc} == 0x002F) { # / |
| 514 |
|
|
| 640 |
|
|
| 641 |
redo A; |
redo A; |
| 642 |
} elsif ($self->{nc} == 0x003F) { # ? |
} elsif ($self->{nc} == 0x003F) { # ? |
| 643 |
|
if ($self->{is_xml}) { |
| 644 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'pio', |
|
| 645 |
line => $self->{line_prev}, |
$self->{state} = PI_STATE; |
| 646 |
column => $self->{column_prev}); |
|
| 647 |
$self->{state} = BOGUS_COMMENT_STATE; |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 648 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
$self->{line_prev} = $self->{line}; |
| 649 |
line => $self->{line_prev}, |
$self->{column_prev} = $self->{column}; |
| 650 |
column => $self->{column_prev}, |
$self->{column}++; |
| 651 |
}; |
$self->{nc} |
| 652 |
## $self->{nc} is intentionally left as is |
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 653 |
redo A; |
} else { |
| 654 |
} else { |
$self->{set_nc}->($self); |
| 655 |
|
} |
| 656 |
|
|
| 657 |
|
redo A; |
| 658 |
|
} else { |
| 659 |
|
|
| 660 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'pio', |
| 661 |
|
line => $self->{line_prev}, |
| 662 |
|
column => $self->{column_prev}); |
| 663 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 664 |
|
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
| 665 |
|
line => $self->{line_prev}, |
| 666 |
|
column => $self->{column_prev}, |
| 667 |
|
}; |
| 668 |
|
## $self->{nc} is intentionally left as is |
| 669 |
|
redo A; |
| 670 |
|
} |
| 671 |
|
} elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) { |
| 672 |
|
|
| 673 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', |
| 674 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 683 |
}); |
}); |
| 684 |
|
|
| 685 |
redo A; |
redo A; |
| 686 |
|
} else { |
| 687 |
|
## XML5: "<:" is a parse error. |
| 688 |
|
|
| 689 |
|
$self->{ct} = {type => START_TAG_TOKEN, |
| 690 |
|
tag_name => chr ($self->{nc}), |
| 691 |
|
line => $self->{line_prev}, |
| 692 |
|
column => $self->{column_prev}}; |
| 693 |
|
$self->{state} = TAG_NAME_STATE; |
| 694 |
|
|
| 695 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 696 |
|
$self->{line_prev} = $self->{line}; |
| 697 |
|
$self->{column_prev} = $self->{column}; |
| 698 |
|
$self->{column}++; |
| 699 |
|
$self->{nc} |
| 700 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 701 |
|
} else { |
| 702 |
|
$self->{set_nc}->($self); |
| 703 |
|
} |
| 704 |
|
|
| 705 |
|
redo A; |
| 706 |
} |
} |
| 707 |
} else { |
} else { |
| 708 |
die "$0: $self->{content_model} in tag open"; |
die "$0: $self->{content_model} in tag open"; |
| 711 |
## NOTE: The "close tag open state" in the spec is implemented as |
## NOTE: The "close tag open state" in the spec is implemented as |
| 712 |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|. |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|. |
| 713 |
|
|
| 714 |
|
## XML5: "end tag state". |
| 715 |
|
|
| 716 |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
| 717 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 718 |
if (defined $self->{last_stag_name}) { |
if (defined $self->{last_stag_name}) { |
| 774 |
|
|
| 775 |
redo A; |
redo A; |
| 776 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
|
|
| 777 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag', |
| 778 |
line => $self->{line_prev}, ## "<" in "</>" |
line => $self->{line_prev}, ## "<" in "</>" |
| 779 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
| 780 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 781 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 782 |
|
if ($self->{is_xml}) { |
| 783 |
|
|
| 784 |
|
## XML5: No parse error. |
| 785 |
|
|
| 786 |
|
## NOTE: This parser raises a parse error, since it supports |
| 787 |
|
## XML1, not XML5. |
| 788 |
|
|
| 789 |
|
## NOTE: A short end tag token. |
| 790 |
|
my $ct = {type => END_TAG_TOKEN, |
| 791 |
|
tag_name => '', |
| 792 |
|
line => $self->{line_prev}, |
| 793 |
|
column => $self->{column_prev} - 1, |
| 794 |
|
}; |
| 795 |
|
|
| 796 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 797 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 798 |
$self->{column_prev} = $self->{column}; |
$self->{column_prev} = $self->{column}; |
| 803 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 804 |
} |
} |
| 805 |
|
|
| 806 |
|
return ($ct); |
| 807 |
|
} else { |
| 808 |
|
|
| 809 |
|
|
| 810 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 811 |
|
$self->{line_prev} = $self->{line}; |
| 812 |
|
$self->{column_prev} = $self->{column}; |
| 813 |
|
$self->{column}++; |
| 814 |
|
$self->{nc} |
| 815 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 816 |
|
} else { |
| 817 |
|
$self->{set_nc}->($self); |
| 818 |
|
} |
| 819 |
|
|
| 820 |
|
} |
| 821 |
redo A; |
redo A; |
| 822 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 823 |
|
|
| 831 |
}); |
}); |
| 832 |
|
|
| 833 |
redo A; |
redo A; |
| 834 |
} else { |
} elsif (not $self->{is_xml} or |
| 835 |
|
$is_space->{$self->{nc}}) { |
| 836 |
|
|
| 837 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag', |
| 838 |
|
line => $self->{line_prev}, # "<" of "</" |
| 839 |
|
column => $self->{column_prev} - 1); |
| 840 |
$self->{state} = BOGUS_COMMENT_STATE; |
$self->{state} = BOGUS_COMMENT_STATE; |
| 841 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
| 842 |
line => $self->{line_prev}, # "<" of "</" |
line => $self->{line_prev}, # "<" of "</" |
| 849 |
## generated from the bogus end tag, as defined in the |
## generated from the bogus end tag, as defined in the |
| 850 |
## "bogus comment state" entry. |
## "bogus comment state" entry. |
| 851 |
redo A; |
redo A; |
| 852 |
|
} else { |
| 853 |
|
## XML5: "</:" is a parse error. |
| 854 |
|
|
| 855 |
|
$self->{ct} = {type => END_TAG_TOKEN, |
| 856 |
|
tag_name => chr ($self->{nc}), |
| 857 |
|
line => $l, column => $c}; |
| 858 |
|
$self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state". |
| 859 |
|
|
| 860 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 861 |
|
$self->{line_prev} = $self->{line}; |
| 862 |
|
$self->{column_prev} = $self->{column}; |
| 863 |
|
$self->{column}++; |
| 864 |
|
$self->{nc} |
| 865 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 866 |
|
} else { |
| 867 |
|
$self->{set_nc}->($self); |
| 868 |
|
} |
| 869 |
|
|
| 870 |
|
redo A; |
| 871 |
} |
} |
| 872 |
} elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) { |
} elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) { |
| 873 |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
| 2205 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2206 |
column => $self->{column_prev} - 2, |
column => $self->{column_prev} - 2, |
| 2207 |
}; |
}; |
| 2208 |
$self->{state} = COMMENT_START_STATE; |
$self->{state} = COMMENT_START_STATE; ## XML5: "comment state". |
| 2209 |
|
|
| 2210 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2211 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2268 |
} elsif ((length $self->{s_kwd}) == 6 and |
} elsif ((length $self->{s_kwd}) == 6 and |
| 2269 |
($self->{nc} == 0x0045 or # E |
($self->{nc} == 0x0045 or # E |
| 2270 |
$self->{nc} == 0x0065)) { # e |
$self->{nc} == 0x0065)) { # e |
| 2271 |
|
if ($self->{s_kwd} ne 'DOCTYP') { |
| 2272 |
|
|
| 2273 |
|
## XML5: case-sensitive. |
| 2274 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO |
| 2275 |
|
text => 'DOCTYPE', |
| 2276 |
|
line => $self->{line_prev}, |
| 2277 |
|
column => $self->{column_prev} - 5); |
| 2278 |
|
} else { |
| 2279 |
|
|
| 2280 |
|
} |
| 2281 |
$self->{state} = DOCTYPE_STATE; |
$self->{state} = DOCTYPE_STATE; |
| 2282 |
$self->{ct} = {type => DOCTYPE_TOKEN, |
$self->{ct} = {type => DOCTYPE_TOKEN, |
| 2283 |
quirks => 1, |
quirks => 1, |
| 2335 |
redo A; |
redo A; |
| 2336 |
} elsif ($self->{s_kwd} eq '[CDATA' and |
} elsif ($self->{s_kwd} eq '[CDATA' and |
| 2337 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
|
|
|
|
|
|
| 2338 |
if ($self->{is_xml} and |
if ($self->{is_xml} and |
| 2339 |
not $self->{tainted} and |
not $self->{tainted} and |
| 2340 |
@{$self->{open_elements} or []} == 0) { |
@{$self->{open_elements} or []} == 0) { |
| 2341 |
|
|
| 2342 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element', |
| 2343 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2344 |
column => $self->{column_prev} - 7); |
column => $self->{column_prev} - 7); |
| 2345 |
$self->{tainted} = 1; |
$self->{tainted} = 1; |
| 2346 |
|
} else { |
| 2347 |
|
|
| 2348 |
} |
} |
| 2349 |
|
|
| 2350 |
$self->{ct} = {type => CHARACTER_TOKEN, |
$self->{ct} = {type => CHARACTER_TOKEN, |
| 2554 |
redo A; |
redo A; |
| 2555 |
} |
} |
| 2556 |
} elsif ($self->{state} == COMMENT_END_DASH_STATE) { |
} elsif ($self->{state} == COMMENT_END_DASH_STATE) { |
| 2557 |
|
## XML5: "comment dash state". |
| 2558 |
|
|
| 2559 |
if ($self->{nc} == 0x002D) { # - |
if ($self->{nc} == 0x002D) { # - |
| 2560 |
|
|
| 2561 |
$self->{state} = COMMENT_END_STATE; |
$self->{state} = COMMENT_END_STATE; |
| 2621 |
redo A; |
redo A; |
| 2622 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 2623 |
|
|
| 2624 |
|
## XML5: Not a parse error. |
| 2625 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
| 2626 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2627 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 2651 |
redo A; |
redo A; |
| 2652 |
} else { |
} else { |
| 2653 |
|
|
| 2654 |
|
## XML5: Not a parse error. |
| 2655 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
| 2656 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 2657 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
| 3737 |
## NOTE: "CDATA section state" in the state is jointly implemented |
## NOTE: "CDATA section state" in the state is jointly implemented |
| 3738 |
## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|, |
## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|, |
| 3739 |
## and |CDATA_SECTION_MSE2_STATE|. |
## and |CDATA_SECTION_MSE2_STATE|. |
| 3740 |
|
|
| 3741 |
|
## XML5: "CDATA state". |
| 3742 |
|
|
| 3743 |
if ($self->{nc} == 0x005D) { # ] |
if ($self->{nc} == 0x005D) { # ] |
| 3744 |
|
|
| 3757 |
redo A; |
redo A; |
| 3758 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3759 |
if ($self->{is_xml}) { |
if ($self->{is_xml}) { |
| 3760 |
|
|
| 3761 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type |
| 3762 |
|
} else { |
| 3763 |
|
|
| 3764 |
} |
} |
| 3765 |
|
|
| 3766 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3767 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 3768 |
|
## Reconsume. |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
|
|
$self->{line_prev} = $self->{line}; |
|
|
$self->{column_prev} = $self->{column}; |
|
|
$self->{column}++; |
|
|
$self->{nc} |
|
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
|
|
} else { |
|
|
$self->{set_nc}->($self); |
|
|
} |
|
|
|
|
| 3769 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
| 3770 |
|
|
| 3771 |
return ($self->{ct}); # character |
return ($self->{ct}); # character |
| 3798 |
|
|
| 3799 |
## ISSUE: "text tokens" in spec. |
## ISSUE: "text tokens" in spec. |
| 3800 |
} elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) { |
| 3801 |
|
## XML5: "CDATA bracket state". |
| 3802 |
|
|
| 3803 |
if ($self->{nc} == 0x005D) { # ] |
if ($self->{nc} == 0x005D) { # ] |
| 3804 |
|
|
| 3805 |
$self->{state} = CDATA_SECTION_MSE2_STATE; |
$self->{state} = CDATA_SECTION_MSE2_STATE; |
| 3817 |
redo A; |
redo A; |
| 3818 |
} else { |
} else { |
| 3819 |
|
|
| 3820 |
|
## XML5: If EOF, "]" is not appended and changed to the data state. |
| 3821 |
$self->{ct}->{data} .= ']'; |
$self->{ct}->{data} .= ']'; |
| 3822 |
$self->{state} = CDATA_SECTION_STATE; |
$self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state. |
| 3823 |
## Reconsume. |
## Reconsume. |
| 3824 |
redo A; |
redo A; |
| 3825 |
} |
} |
| 3826 |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
| 3827 |
|
## XML5: "CDATA end state". |
| 3828 |
|
|
| 3829 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 3830 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3831 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 3868 |
|
|
| 3869 |
$self->{ct}->{data} .= ']]'; # character |
$self->{ct}->{data} .= ']]'; # character |
| 3870 |
$self->{state} = CDATA_SECTION_STATE; |
$self->{state} = CDATA_SECTION_STATE; |
| 3871 |
## Reconsume. |
## Reconsume. ## XML5: Emit. |
| 3872 |
redo A; |
redo A; |
| 3873 |
} |
} |
| 3874 |
} elsif ($self->{state} == ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_STATE) { |
| 4372 |
## Reconsume. |
## Reconsume. |
| 4373 |
redo A; |
redo A; |
| 4374 |
} |
} |
| 4375 |
|
|
| 4376 |
|
## XML-only states |
| 4377 |
|
|
| 4378 |
|
} elsif ($self->{state} == PI_STATE) { |
| 4379 |
|
if ($is_space->{$self->{nc}} or |
| 4380 |
|
$self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else" |
| 4381 |
|
$self->{nc} == -1) { |
| 4382 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type |
| 4383 |
|
line => $self->{line_prev}, |
| 4384 |
|
column => $self->{column_prev} |
| 4385 |
|
- 1 * ($self->{nc} != -1)); |
| 4386 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 4387 |
|
## Reconsume. |
| 4388 |
|
$self->{ct} = {type => COMMENT_TOKEN, |
| 4389 |
|
data => '?', |
| 4390 |
|
line => $self->{line_prev}, |
| 4391 |
|
column => $self->{column_prev} |
| 4392 |
|
- 1 * ($self->{nc} != -1), |
| 4393 |
|
}; |
| 4394 |
|
redo A; |
| 4395 |
|
} else { |
| 4396 |
|
$self->{ct} = {type => PI_TOKEN, |
| 4397 |
|
target => chr $self->{nc}, |
| 4398 |
|
data => '', |
| 4399 |
|
line => $self->{line_prev}, |
| 4400 |
|
column => $self->{column_prev} - 1, |
| 4401 |
|
}; |
| 4402 |
|
$self->{state} = PI_TARGET_STATE; |
| 4403 |
|
|
| 4404 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4405 |
|
$self->{line_prev} = $self->{line}; |
| 4406 |
|
$self->{column_prev} = $self->{column}; |
| 4407 |
|
$self->{column}++; |
| 4408 |
|
$self->{nc} |
| 4409 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4410 |
|
} else { |
| 4411 |
|
$self->{set_nc}->($self); |
| 4412 |
|
} |
| 4413 |
|
|
| 4414 |
|
redo A; |
| 4415 |
|
} |
| 4416 |
|
} elsif ($self->{state} == PI_TARGET_STATE) { |
| 4417 |
|
if ($is_space->{$self->{nc}}) { |
| 4418 |
|
$self->{state} = PI_TARGET_AFTER_STATE; |
| 4419 |
|
|
| 4420 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4421 |
|
$self->{line_prev} = $self->{line}; |
| 4422 |
|
$self->{column_prev} = $self->{column}; |
| 4423 |
|
$self->{column}++; |
| 4424 |
|
$self->{nc} |
| 4425 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4426 |
|
} else { |
| 4427 |
|
$self->{set_nc}->($self); |
| 4428 |
|
} |
| 4429 |
|
|
| 4430 |
|
redo A; |
| 4431 |
|
} elsif ($self->{nc} == -1) { |
| 4432 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4433 |
|
$self->{state} = DATA_STATE; |
| 4434 |
|
$self->{s_kwd} = ''; |
| 4435 |
|
## Reconsume. |
| 4436 |
|
return ($self->{ct}); # pi |
| 4437 |
|
redo A; |
| 4438 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4439 |
|
$self->{state} = PI_AFTER_STATE; |
| 4440 |
|
|
| 4441 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4442 |
|
$self->{line_prev} = $self->{line}; |
| 4443 |
|
$self->{column_prev} = $self->{column}; |
| 4444 |
|
$self->{column}++; |
| 4445 |
|
$self->{nc} |
| 4446 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4447 |
|
} else { |
| 4448 |
|
$self->{set_nc}->($self); |
| 4449 |
|
} |
| 4450 |
|
|
| 4451 |
|
redo A; |
| 4452 |
|
} else { |
| 4453 |
|
## XML5: typo ("tag name" -> "target") |
| 4454 |
|
$self->{ct}->{target} .= chr $self->{nc}; # pi |
| 4455 |
|
|
| 4456 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4457 |
|
$self->{line_prev} = $self->{line}; |
| 4458 |
|
$self->{column_prev} = $self->{column}; |
| 4459 |
|
$self->{column}++; |
| 4460 |
|
$self->{nc} |
| 4461 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4462 |
|
} else { |
| 4463 |
|
$self->{set_nc}->($self); |
| 4464 |
|
} |
| 4465 |
|
|
| 4466 |
|
redo A; |
| 4467 |
|
} |
| 4468 |
|
} elsif ($self->{state} == PI_TARGET_AFTER_STATE) { |
| 4469 |
|
if ($is_space->{$self->{nc}}) { |
| 4470 |
|
## Stay in the state. |
| 4471 |
|
|
| 4472 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4473 |
|
$self->{line_prev} = $self->{line}; |
| 4474 |
|
$self->{column_prev} = $self->{column}; |
| 4475 |
|
$self->{column}++; |
| 4476 |
|
$self->{nc} |
| 4477 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4478 |
|
} else { |
| 4479 |
|
$self->{set_nc}->($self); |
| 4480 |
|
} |
| 4481 |
|
|
| 4482 |
|
redo A; |
| 4483 |
|
} else { |
| 4484 |
|
$self->{state} = PI_DATA_STATE; |
| 4485 |
|
## Reprocess. |
| 4486 |
|
redo A; |
| 4487 |
|
} |
| 4488 |
|
} elsif ($self->{state} == PI_DATA_STATE) { |
| 4489 |
|
if ($self->{nc} == 0x003F) { # ? |
| 4490 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 4491 |
|
|
| 4492 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4493 |
|
$self->{line_prev} = $self->{line}; |
| 4494 |
|
$self->{column_prev} = $self->{column}; |
| 4495 |
|
$self->{column}++; |
| 4496 |
|
$self->{nc} |
| 4497 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4498 |
|
} else { |
| 4499 |
|
$self->{set_nc}->($self); |
| 4500 |
|
} |
| 4501 |
|
|
| 4502 |
|
redo A; |
| 4503 |
|
} elsif ($self->{nc} == -1) { |
| 4504 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4505 |
|
$self->{state} = DATA_STATE; |
| 4506 |
|
$self->{s_kwd} = ''; |
| 4507 |
|
## Reprocess. |
| 4508 |
|
return ($self->{ct}); # pi |
| 4509 |
|
redo A; |
| 4510 |
|
} else { |
| 4511 |
|
$self->{ct}->{data} .= chr $self->{nc}; # pi |
| 4512 |
|
$self->{read_until}->($self->{ct}->{data}, q[?], |
| 4513 |
|
length $self->{ct}->{data}); |
| 4514 |
|
## Stay in the state. |
| 4515 |
|
|
| 4516 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4517 |
|
$self->{line_prev} = $self->{line}; |
| 4518 |
|
$self->{column_prev} = $self->{column}; |
| 4519 |
|
$self->{column}++; |
| 4520 |
|
$self->{nc} |
| 4521 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4522 |
|
} else { |
| 4523 |
|
$self->{set_nc}->($self); |
| 4524 |
|
} |
| 4525 |
|
|
| 4526 |
|
## Reprocess. |
| 4527 |
|
redo A; |
| 4528 |
|
} |
| 4529 |
|
} elsif ($self->{state} == PI_AFTER_STATE) { |
| 4530 |
|
if ($self->{nc} == 0x003E) { # > |
| 4531 |
|
$self->{state} = DATA_STATE; |
| 4532 |
|
$self->{s_kwd} = ''; |
| 4533 |
|
|
| 4534 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4535 |
|
$self->{line_prev} = $self->{line}; |
| 4536 |
|
$self->{column_prev} = $self->{column}; |
| 4537 |
|
$self->{column}++; |
| 4538 |
|
$self->{nc} |
| 4539 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4540 |
|
} else { |
| 4541 |
|
$self->{set_nc}->($self); |
| 4542 |
|
} |
| 4543 |
|
|
| 4544 |
|
return ($self->{ct}); # pi |
| 4545 |
|
redo A; |
| 4546 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4547 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type |
| 4548 |
|
line => $self->{line_prev}, |
| 4549 |
|
column => $self->{column_prev}); ## XML5: no error |
| 4550 |
|
$self->{ct}->{data} .= '?'; |
| 4551 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
| 4552 |
|
|
| 4553 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4554 |
|
$self->{line_prev} = $self->{line}; |
| 4555 |
|
$self->{column_prev} = $self->{column}; |
| 4556 |
|
$self->{column}++; |
| 4557 |
|
$self->{nc} |
| 4558 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4559 |
|
} else { |
| 4560 |
|
$self->{set_nc}->($self); |
| 4561 |
|
} |
| 4562 |
|
|
| 4563 |
|
redo A; |
| 4564 |
|
} else { |
| 4565 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type |
| 4566 |
|
line => $self->{line_prev}, |
| 4567 |
|
column => $self->{column_prev} |
| 4568 |
|
+ 1 * ($self->{nc} == -1)); ## XML5: no error |
| 4569 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 4570 |
|
$self->{state} = PI_DATA_STATE; |
| 4571 |
|
## Reprocess. |
| 4572 |
|
redo A; |
| 4573 |
|
} |
| 4574 |
|
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
| 4575 |
|
## XML5: Same as "pi after state" in XML5 |
| 4576 |
|
if ($self->{nc} == 0x003E) { # > |
| 4577 |
|
$self->{state} = DATA_STATE; |
| 4578 |
|
$self->{s_kwd} = ''; |
| 4579 |
|
|
| 4580 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4581 |
|
$self->{line_prev} = $self->{line}; |
| 4582 |
|
$self->{column_prev} = $self->{column}; |
| 4583 |
|
$self->{column}++; |
| 4584 |
|
$self->{nc} |
| 4585 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4586 |
|
} else { |
| 4587 |
|
$self->{set_nc}->($self); |
| 4588 |
|
} |
| 4589 |
|
|
| 4590 |
|
return ($self->{ct}); # pi |
| 4591 |
|
redo A; |
| 4592 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 4593 |
|
$self->{ct}->{data} .= '?'; |
| 4594 |
|
## Stay in the state. |
| 4595 |
|
|
| 4596 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4597 |
|
$self->{line_prev} = $self->{line}; |
| 4598 |
|
$self->{column_prev} = $self->{column}; |
| 4599 |
|
$self->{column}++; |
| 4600 |
|
$self->{nc} |
| 4601 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4602 |
|
} else { |
| 4603 |
|
$self->{set_nc}->($self); |
| 4604 |
|
} |
| 4605 |
|
|
| 4606 |
|
redo A; |
| 4607 |
|
} else { |
| 4608 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
| 4609 |
|
$self->{state} = PI_DATA_STATE; |
| 4610 |
|
## Reprocess. |
| 4611 |
|
redo A; |
| 4612 |
|
} |
| 4613 |
|
|
| 4614 |
} else { |
} else { |
| 4615 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
| 4616 |
} |
} |