| 15 |
CHARACTER_TOKEN |
CHARACTER_TOKEN |
| 16 |
PI_TOKEN |
PI_TOKEN |
| 17 |
ABORT_TOKEN |
ABORT_TOKEN |
| 18 |
|
END_OF_DOCTYPE_TOKEN |
| 19 |
); |
); |
| 20 |
|
|
| 21 |
our %EXPORT_TAGS = ( |
our %EXPORT_TAGS = ( |
| 28 |
CHARACTER_TOKEN |
CHARACTER_TOKEN |
| 29 |
PI_TOKEN |
PI_TOKEN |
| 30 |
ABORT_TOKEN |
ABORT_TOKEN |
| 31 |
|
END_OF_DOCTYPE_TOKEN |
| 32 |
)], |
)], |
| 33 |
); |
); |
| 34 |
} |
} |
| 45 |
sub CHARACTER_TOKEN () { 6 } |
sub CHARACTER_TOKEN () { 6 } |
| 46 |
sub PI_TOKEN () { 7 } ## NOTE: XML only. |
sub PI_TOKEN () { 7 } ## NOTE: XML only. |
| 47 |
sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing. |
sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing. |
| 48 |
|
sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only |
| 49 |
|
|
| 50 |
## XML5: XML5 has "empty tag token". In this implementation, it is |
## XML5: XML5 has "empty tag token". In this implementation, it is |
| 51 |
## represented as a start tag token with $self->{self_closing} flag |
## represented as a start tag token with $self->{self_closing} flag |
| 136 |
sub PI_DATA_AFTER_STATE () { 56 } |
sub PI_DATA_AFTER_STATE () { 56 } |
| 137 |
sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 } |
sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 } |
| 138 |
sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 } |
sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 } |
| 139 |
|
sub DOCTYPE_TAG_STATE () { 59 } |
| 140 |
|
sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 60 } |
| 141 |
|
|
| 142 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
| 143 |
## list and descriptions) |
## list and descriptions) |
| 2188 |
redo A; |
redo A; |
| 2189 |
} |
} |
| 2190 |
} elsif ($self->{state} == BOGUS_COMMENT_STATE) { |
} elsif ($self->{state} == BOGUS_COMMENT_STATE) { |
|
## (only happen if PCDATA state) |
|
|
|
|
| 2191 |
## NOTE: Unlike spec's "bogus comment state", this implementation |
## NOTE: Unlike spec's "bogus comment state", this implementation |
| 2192 |
## consumes characters one-by-one basis. |
## consumes characters one-by-one basis. |
| 2193 |
|
|
| 2194 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2195 |
|
if ($self->{in_subset}) { |
| 2196 |
$self->{state} = DATA_STATE; |
|
| 2197 |
$self->{s_kwd} = ''; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2198 |
|
} else { |
| 2199 |
|
|
| 2200 |
|
$self->{state} = DATA_STATE; |
| 2201 |
|
$self->{s_kwd} = ''; |
| 2202 |
|
} |
| 2203 |
|
|
| 2204 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2205 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2215 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2216 |
redo A; |
redo A; |
| 2217 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2218 |
|
if ($self->{in_subset}) { |
| 2219 |
$self->{state} = DATA_STATE; |
|
| 2220 |
$self->{s_kwd} = ''; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2221 |
|
} else { |
| 2222 |
|
|
| 2223 |
|
$self->{state} = DATA_STATE; |
| 2224 |
|
$self->{s_kwd} = ''; |
| 2225 |
|
} |
| 2226 |
## reconsume |
## reconsume |
| 2227 |
|
|
| 2228 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2249 |
redo A; |
redo A; |
| 2250 |
} |
} |
| 2251 |
} elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) { |
} elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) { |
| 2252 |
## (only happen if PCDATA state) |
## XML5: "Markup declaration state" and "DOCTYPE markup |
| 2253 |
|
## declaration state". |
| 2254 |
|
|
| 2255 |
if ($self->{nc} == 0x002D) { # - |
if ($self->{nc} == 0x002D) { # - |
| 2256 |
|
|
| 2516 |
|
|
| 2517 |
redo A; |
redo A; |
| 2518 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
|
|
| 2519 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
| 2520 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 2521 |
$self->{s_kwd} = ''; |
|
| 2522 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2523 |
|
} else { |
| 2524 |
|
|
| 2525 |
|
$self->{state} = DATA_STATE; |
| 2526 |
|
$self->{s_kwd} = ''; |
| 2527 |
|
} |
| 2528 |
|
|
| 2529 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2530 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2541 |
|
|
| 2542 |
redo A; |
redo A; |
| 2543 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
|
|
| 2544 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2545 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 2546 |
$self->{s_kwd} = ''; |
|
| 2547 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2548 |
|
} else { |
| 2549 |
|
|
| 2550 |
|
$self->{state} = DATA_STATE; |
| 2551 |
|
$self->{s_kwd} = ''; |
| 2552 |
|
} |
| 2553 |
## reconsume |
## reconsume |
| 2554 |
|
|
| 2555 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2590 |
|
|
| 2591 |
redo A; |
redo A; |
| 2592 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
|
|
|
| 2593 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment'); |
| 2594 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 2595 |
$self->{s_kwd} = ''; |
|
| 2596 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2597 |
|
} else { |
| 2598 |
|
|
| 2599 |
|
$self->{state} = DATA_STATE; |
| 2600 |
|
$self->{s_kwd} = ''; |
| 2601 |
|
} |
| 2602 |
|
|
| 2603 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2604 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2615 |
|
|
| 2616 |
redo A; |
redo A; |
| 2617 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
|
|
| 2618 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2619 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 2620 |
$self->{s_kwd} = ''; |
|
| 2621 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2622 |
|
} else { |
| 2623 |
|
|
| 2624 |
|
$self->{state} = DATA_STATE; |
| 2625 |
|
$self->{s_kwd} = ''; |
| 2626 |
|
} |
| 2627 |
## reconsume |
## reconsume |
| 2628 |
|
|
| 2629 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2664 |
|
|
| 2665 |
redo A; |
redo A; |
| 2666 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
|
|
| 2667 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2668 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 2669 |
$self->{s_kwd} = ''; |
|
| 2670 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2671 |
|
} else { |
| 2672 |
|
|
| 2673 |
|
$self->{state} = DATA_STATE; |
| 2674 |
|
$self->{s_kwd} = ''; |
| 2675 |
|
} |
| 2676 |
## reconsume |
## reconsume |
| 2677 |
|
|
| 2678 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2718 |
|
|
| 2719 |
redo A; |
redo A; |
| 2720 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
|
|
| 2721 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2722 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 2723 |
$self->{s_kwd} = ''; |
|
| 2724 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2725 |
|
} else { |
| 2726 |
|
|
| 2727 |
|
$self->{state} = DATA_STATE; |
| 2728 |
|
$self->{s_kwd} = ''; |
| 2729 |
|
} |
| 2730 |
## reconsume |
## reconsume |
| 2731 |
|
|
| 2732 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2751 |
} |
} |
| 2752 |
} elsif ($self->{state} == COMMENT_END_STATE) { |
} elsif ($self->{state} == COMMENT_END_STATE) { |
| 2753 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2754 |
|
if ($self->{in_subset}) { |
| 2755 |
$self->{state} = DATA_STATE; |
|
| 2756 |
$self->{s_kwd} = ''; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2757 |
|
} else { |
| 2758 |
|
|
| 2759 |
|
$self->{state} = DATA_STATE; |
| 2760 |
|
$self->{s_kwd} = ''; |
| 2761 |
|
} |
| 2762 |
|
|
| 2763 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2764 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2795 |
|
|
| 2796 |
redo A; |
redo A; |
| 2797 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
|
|
|
| 2798 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 2799 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 2800 |
$self->{s_kwd} = ''; |
|
| 2801 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2802 |
|
} else { |
| 2803 |
|
|
| 2804 |
|
$self->{state} = DATA_STATE; |
| 2805 |
|
$self->{s_kwd} = ''; |
| 2806 |
|
} |
| 2807 |
## reconsume |
## reconsume |
| 2808 |
|
|
| 2809 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 2907 |
|
|
| 2908 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 2909 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2910 |
|
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 2911 |
|
$self->{in_subset} = 1; |
| 2912 |
|
|
| 2913 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2914 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 2920 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 2921 |
} |
} |
| 2922 |
|
|
| 2923 |
|
return ($self->{ct}); # DOCTYPE |
| 2924 |
redo A; |
redo A; |
| 2925 |
} else { |
} else { |
| 2926 |
|
|
| 2993 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
| 2994 |
|
|
| 2995 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 2996 |
|
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 2997 |
|
$self->{in_subset} = 1; |
| 2998 |
|
|
| 2999 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3000 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3006 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 3007 |
} |
} |
| 3008 |
|
|
| 3009 |
|
return ($self->{ct}); # DOCTYPE |
| 3010 |
redo A; |
redo A; |
| 3011 |
} else { |
} else { |
| 3012 |
|
|
| 3113 |
|
|
| 3114 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3115 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 3116 |
|
$self->{in_subset} = 1; |
| 3117 |
|
|
| 3118 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3119 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3125 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 3126 |
} |
} |
| 3127 |
|
|
| 3128 |
|
return ($self->{ct}); # DOCTYPE |
| 3129 |
redo A; |
redo A; |
| 3130 |
} else { |
} else { |
| 3131 |
|
|
| 3369 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal'); |
| 3370 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3371 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 3372 |
|
$self->{in_subset} = 1; |
| 3373 |
|
|
| 3374 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3375 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3381 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 3382 |
} |
} |
| 3383 |
|
|
| 3384 |
|
return ($self->{ct}); # DOCTYPE |
| 3385 |
redo A; |
redo A; |
| 3386 |
} else { |
} else { |
| 3387 |
|
|
| 3633 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal'); |
| 3634 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3635 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 3636 |
|
$self->{in_subset} = 1; |
| 3637 |
|
|
| 3638 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3639 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3645 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 3646 |
} |
} |
| 3647 |
|
|
| 3648 |
|
return ($self->{ct}); # DOCTYPE |
| 3649 |
redo A; |
redo A; |
| 3650 |
} else { |
} else { |
| 3651 |
|
|
| 3753 |
|
|
| 3754 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3755 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 3756 |
|
$self->{in_subset} = 1; |
| 3757 |
|
|
| 3758 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3759 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3765 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 3766 |
} |
} |
| 3767 |
|
|
| 3768 |
|
return ($self->{ct}); # DOCTYPE |
| 3769 |
redo A; |
redo A; |
| 3770 |
} else { |
} else { |
| 3771 |
|
|
| 3978 |
|
|
| 3979 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3980 |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 3981 |
|
$self->{in_subset} = 1; |
| 3982 |
|
|
| 3983 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3984 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3990 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 3991 |
} |
} |
| 3992 |
|
|
| 3993 |
|
return ($self->{ct}); # DOCTYPE |
| 3994 |
redo A; |
redo A; |
| 3995 |
} else { |
} else { |
| 3996 |
|
|
| 4032 |
|
|
| 4033 |
redo A; |
redo A; |
| 4034 |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
} elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [ |
| 4035 |
if ($self->{ct}->{has_internal_subset}) { # DOCTYPE |
|
| 4036 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4037 |
## Stay in the state. |
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
| 4038 |
|
$self->{in_subset} = 1; |
| 4039 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
|
|
$self->{line_prev} = $self->{line}; |
|
|
$self->{column_prev} = $self->{column}; |
|
|
$self->{column}++; |
|
|
$self->{nc} |
|
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
|
|
} else { |
|
|
$self->{set_nc}->($self); |
|
|
} |
|
|
|
|
|
redo A; |
|
|
} else { |
|
|
|
|
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
|
|
$self->{ct}->{has_internal_subset} = 1; # DOCTYPE |
|
|
|
|
| 4040 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4041 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 4042 |
$self->{column_prev} = $self->{column}; |
$self->{column_prev} = $self->{column}; |
| 4047 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 4048 |
} |
} |
| 4049 |
|
|
| 4050 |
redo A; |
return ($self->{ct}); # DOCTYPE |
| 4051 |
} |
redo A; |
| 4052 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 4053 |
|
|
| 4054 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 4774 |
redo A; |
redo A; |
| 4775 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 4776 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4777 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 4778 |
$self->{s_kwd} = ''; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4779 |
|
} else { |
| 4780 |
|
$self->{state} = DATA_STATE; |
| 4781 |
|
$self->{s_kwd} = ''; |
| 4782 |
|
} |
| 4783 |
## Reconsume. |
## Reconsume. |
| 4784 |
return ($self->{ct}); # pi |
return ($self->{ct}); # pi |
| 4785 |
redo A; |
redo A; |
| 4850 |
redo A; |
redo A; |
| 4851 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 4852 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type |
| 4853 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 4854 |
$self->{s_kwd} = ''; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4855 |
|
} else { |
| 4856 |
|
$self->{state} = DATA_STATE; |
| 4857 |
|
$self->{s_kwd} = ''; |
| 4858 |
|
} |
| 4859 |
## Reprocess. |
## Reprocess. |
| 4860 |
return ($self->{ct}); # pi |
return ($self->{ct}); # pi |
| 4861 |
redo A; |
redo A; |
| 4880 |
} |
} |
| 4881 |
} elsif ($self->{state} == PI_AFTER_STATE) { |
} elsif ($self->{state} == PI_AFTER_STATE) { |
| 4882 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 4883 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 4884 |
$self->{s_kwd} = ''; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4885 |
|
} else { |
| 4886 |
|
$self->{state} = DATA_STATE; |
| 4887 |
|
$self->{s_kwd} = ''; |
| 4888 |
|
} |
| 4889 |
|
|
| 4890 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4891 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 4930 |
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
| 4931 |
## XML5: Same as "pi after state" in XML5 |
## XML5: Same as "pi after state" in XML5 |
| 4932 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 4933 |
$self->{state} = DATA_STATE; |
if ($self->{in_subset}) { |
| 4934 |
$self->{s_kwd} = ''; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 4935 |
|
} else { |
| 4936 |
|
$self->{state} = DATA_STATE; |
| 4937 |
|
$self->{s_kwd} = ''; |
| 4938 |
|
} |
| 4939 |
|
|
| 4940 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4941 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 4973 |
|
|
| 4974 |
} elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) { |
} elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) { |
| 4975 |
if ($self->{nc} == 0x003C) { # < |
if ($self->{nc} == 0x003C) { # < |
| 4976 |
## TODO: |
$self->{state} = DOCTYPE_TAG_STATE; |
| 4977 |
|
|
| 4978 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4979 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 5003 |
|
|
| 5004 |
redo A; |
redo A; |
| 5005 |
} elsif ($self->{nc} == 0x005D) { # ] |
} elsif ($self->{nc} == 0x005D) { # ] |
| 5006 |
|
delete $self->{in_subset}; |
| 5007 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE; |
| 5008 |
|
|
| 5009 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5033 |
redo A; |
redo A; |
| 5034 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 5035 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type |
| 5036 |
|
delete $self->{in_subset}; |
| 5037 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 5038 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 5039 |
## Reconsume. |
## Reconsume. |
| 5040 |
return ($self->{ct}); # DOCTYPE |
return ({type => END_OF_DOCTYPE_TOKEN}); |
| 5041 |
redo A; |
redo A; |
| 5042 |
} else { |
} else { |
| 5043 |
unless ($self->{internal_subset_tainted}) { |
unless ($self->{internal_subset_tainted}) { |
| 5074 |
$self->{set_nc}->($self); |
$self->{set_nc}->($self); |
| 5075 |
} |
} |
| 5076 |
|
|
| 5077 |
return ($self->{ct}); # DOCTYPE |
return ({type => END_OF_DOCTYPE_TOKEN}); |
| 5078 |
redo A; |
redo A; |
| 5079 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 5080 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 5081 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 5082 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 5083 |
## Reconsume. |
## Reconsume. |
| 5084 |
return ($self->{ct}); # DOCTYPE |
return ({type => END_OF_DOCTYPE_TOKEN}); |
| 5085 |
redo A; |
redo A; |
| 5086 |
} else { |
} else { |
| 5087 |
## XML5: No parse error and stay in the state. |
## XML5: No parse error and stay in the state. |
| 5088 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type |
| 5089 |
|
|
| 5090 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE; |
| 5091 |
|
|
| 5092 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5093 |
|
$self->{line_prev} = $self->{line}; |
| 5094 |
|
$self->{column_prev} = $self->{column}; |
| 5095 |
|
$self->{column}++; |
| 5096 |
|
$self->{nc} |
| 5097 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 5098 |
|
} else { |
| 5099 |
|
$self->{set_nc}->($self); |
| 5100 |
|
} |
| 5101 |
|
|
| 5102 |
|
redo A; |
| 5103 |
|
} |
| 5104 |
|
} elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) { |
| 5105 |
|
if ($self->{nc} == 0x003E) { # > |
| 5106 |
|
$self->{state} = DATA_STATE; |
| 5107 |
|
$self->{s_kwd} = ''; |
| 5108 |
|
|
| 5109 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5110 |
|
$self->{line_prev} = $self->{line}; |
| 5111 |
|
$self->{column_prev} = $self->{column}; |
| 5112 |
|
$self->{column}++; |
| 5113 |
|
$self->{nc} |
| 5114 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 5115 |
|
} else { |
| 5116 |
|
$self->{set_nc}->($self); |
| 5117 |
|
} |
| 5118 |
|
|
| 5119 |
|
return ({type => END_OF_DOCTYPE_TOKEN}); |
| 5120 |
|
redo A; |
| 5121 |
|
} elsif ($self->{nc} == -1) { |
| 5122 |
|
$self->{state} = DATA_STATE; |
| 5123 |
|
$self->{s_kwd} = ''; |
| 5124 |
|
## Reconsume. |
| 5125 |
|
return ({type => END_OF_DOCTYPE_TOKEN}); |
| 5126 |
|
redo A; |
| 5127 |
|
} else { |
| 5128 |
|
## Stay in the state. |
| 5129 |
|
|
| 5130 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5131 |
|
$self->{line_prev} = $self->{line}; |
| 5132 |
|
$self->{column_prev} = $self->{column}; |
| 5133 |
|
$self->{column}++; |
| 5134 |
|
$self->{nc} |
| 5135 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 5136 |
|
} else { |
| 5137 |
|
$self->{set_nc}->($self); |
| 5138 |
|
} |
| 5139 |
|
|
| 5140 |
|
redo A; |
| 5141 |
|
} |
| 5142 |
|
} elsif ($self->{state} == DOCTYPE_TAG_STATE) { |
| 5143 |
|
if ($self->{nc} == 0x0021) { # ! |
| 5144 |
|
$self->{state} = MARKUP_DECLARATION_OPEN_STATE; |
| 5145 |
|
|
| 5146 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5147 |
|
$self->{line_prev} = $self->{line}; |
| 5148 |
|
$self->{column_prev} = $self->{column}; |
| 5149 |
|
$self->{column}++; |
| 5150 |
|
$self->{nc} |
| 5151 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 5152 |
|
} else { |
| 5153 |
|
$self->{set_nc}->($self); |
| 5154 |
|
} |
| 5155 |
|
|
| 5156 |
|
redo A; |
| 5157 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
| 5158 |
|
$self->{state} = PI_STATE; |
| 5159 |
|
|
| 5160 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5161 |
|
$self->{line_prev} = $self->{line}; |
| 5162 |
|
$self->{column_prev} = $self->{column}; |
| 5163 |
|
$self->{column}++; |
| 5164 |
|
$self->{nc} |
| 5165 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 5166 |
|
} else { |
| 5167 |
|
$self->{set_nc}->($self); |
| 5168 |
|
} |
| 5169 |
|
|
| 5170 |
|
redo A; |
| 5171 |
|
} elsif ($self->{nc} == -1) { |
| 5172 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago'); |
| 5173 |
|
$self->{state} = DATA_STATE; |
| 5174 |
|
$self->{s_kwd} = ''; |
| 5175 |
|
## Reconsume. |
| 5176 |
|
redo A; |
| 5177 |
|
} else { |
| 5178 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error. |
| 5179 |
|
line => $self->{line_prev}, |
| 5180 |
|
column => $self->{column_prev}); |
| 5181 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
| 5182 |
|
$self->{ct} = {type => COMMENT_TOKEN, |
| 5183 |
|
data => '', |
| 5184 |
|
}; ## NOTE: Will be discarded. |
| 5185 |
|
|
| 5186 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5187 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |