803 |
sub BOGUS_DOCTYPE_STATE () { 32 } |
sub BOGUS_DOCTYPE_STATE () { 32 } |
804 |
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
805 |
sub SELF_CLOSING_START_TAG_STATE () { 34 } |
sub SELF_CLOSING_START_TAG_STATE () { 34 } |
806 |
sub CDATA_BLOCK_STATE () { 35 } |
sub CDATA_SECTION_STATE () { 35 } |
807 |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
808 |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
809 |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
810 |
sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
811 |
|
sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec |
812 |
|
sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec |
813 |
|
|
814 |
sub DOCTYPE_TOKEN () { 1 } |
sub DOCTYPE_TOKEN () { 1 } |
815 |
sub COMMENT_TOKEN () { 2 } |
sub COMMENT_TOKEN () { 2 } |
864 |
$self->{state} = DATA_STATE; # MUST |
$self->{state} = DATA_STATE; # MUST |
865 |
#$self->{state_keyword}; # initialized when used |
#$self->{state_keyword}; # initialized when used |
866 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
867 |
undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE |
undef $self->{current_token}; |
868 |
undef $self->{current_attribute}; |
undef $self->{current_attribute}; |
869 |
undef $self->{last_emitted_start_tag_name}; |
undef $self->{last_emitted_start_tag_name}; |
870 |
undef $self->{last_attribute_value_state}; |
undef $self->{last_attribute_value_state}; |
2153 |
} elsif ($self->{state_keyword} eq '[CDATA' and |
} elsif ($self->{state_keyword} eq '[CDATA' and |
2154 |
$self->{next_char} == 0x005B) { # [ |
$self->{next_char} == 0x005B) { # [ |
2155 |
!!!cp (135.2); |
!!!cp (135.2); |
2156 |
$self->{state} = CDATA_BLOCK_STATE; |
$self->{current_token} = {type => CHARACTER_TOKEN, |
2157 |
|
data => '', |
2158 |
|
line => $self->{line_prev}, |
2159 |
|
column => $self->{column_prev} - 7}; |
2160 |
|
$self->{state} = CDATA_SECTION_STATE; |
2161 |
!!!next-input-character; |
!!!next-input-character; |
2162 |
redo A; |
redo A; |
2163 |
} else { |
} else { |
2882 |
!!!next-input-character; |
!!!next-input-character; |
2883 |
redo A; |
redo A; |
2884 |
} |
} |
2885 |
} elsif ($self->{state} == CDATA_BLOCK_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_STATE) { |
2886 |
my $s = ''; |
## NOTE: "CDATA section state" in the state is jointly implemented |
2887 |
|
## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|, |
2888 |
|
## and |CDATA_SECTION_MSE2_STATE|. |
2889 |
|
|
2890 |
my ($l, $c) = ($self->{line}, $self->{column}); |
if ($self->{next_char} == 0x005D) { # ] |
2891 |
|
!!!cp (221.1); |
2892 |
CS: while ($self->{next_char} != -1) { |
$self->{state} = CDATA_SECTION_MSE1_STATE; |
2893 |
if ($self->{next_char} == 0x005D) { # ] |
!!!next-input-character; |
2894 |
!!!next-input-character; |
redo A; |
2895 |
if ($self->{next_char} == 0x005D) { # ] |
} elsif ($self->{next_char} == -1) { |
2896 |
!!!next-input-character; |
$self->{state} = DATA_STATE; |
2897 |
MDC: { |
!!!next-input-character; |
2898 |
if ($self->{next_char} == 0x003E) { # > |
if (length $self->{current_token}->{data}) { # character |
2899 |
!!!cp (221.1); |
!!!cp (221.2); |
2900 |
!!!next-input-character; |
!!!emit ($self->{current_token}); # character |
|
last CS; |
|
|
} elsif ($self->{next_char} == 0x005D) { # ] |
|
|
!!!cp (221.2); |
|
|
$s .= ']'; |
|
|
!!!next-input-character; |
|
|
redo MDC; |
|
|
} else { |
|
|
!!!cp (221.3); |
|
|
$s .= ']]'; |
|
|
# |
|
|
} |
|
|
} # MDC |
|
|
} else { |
|
|
!!!cp (221.4); |
|
|
$s .= ']'; |
|
|
# |
|
|
} |
|
2901 |
} else { |
} else { |
2902 |
!!!cp (221.5); |
!!!cp (221.3); |
2903 |
# |
## No token to emit. $self->{current_token} is discarded. |
2904 |
} |
} |
2905 |
$s .= chr $self->{next_char}; |
redo A; |
2906 |
|
} else { |
2907 |
|
!!!cp (221.4); |
2908 |
|
$self->{current_token}->{data} .= chr $self->{next_char}; |
2909 |
|
## Stay in the state. |
2910 |
!!!next-input-character; |
!!!next-input-character; |
2911 |
} # CS |
redo A; |
2912 |
|
} |
|
$self->{state} = DATA_STATE; |
|
|
## next-input-character done or EOF, which is reconsumed. |
|
2913 |
|
|
2914 |
if (length $s) { |
## ISSUE: "text tokens" in spec. |
2915 |
|
} elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) { |
2916 |
|
if ($self->{next_char} == 0x005D) { # ] |
2917 |
|
!!!cp (221.5); |
2918 |
|
$self->{state} = CDATA_SECTION_MSE2_STATE; |
2919 |
|
!!!next-input-character; |
2920 |
|
redo A; |
2921 |
|
} else { |
2922 |
!!!cp (221.6); |
!!!cp (221.6); |
2923 |
!!!emit ({type => CHARACTER_TOKEN, data => $s, |
$self->{current_token}->{data} .= ']'; |
2924 |
line => $l, column => $c}); |
$self->{state} = CDATA_SECTION_STATE; |
2925 |
|
## Reconsume. |
2926 |
|
redo A; |
2927 |
|
} |
2928 |
|
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
2929 |
|
if ($self->{next_char} == 0x003E) { # > |
2930 |
|
$self->{state} = DATA_STATE; |
2931 |
|
!!!next-input-character; |
2932 |
|
if (length $self->{current_token}->{data}) { # character |
2933 |
|
!!!cp (221.7); |
2934 |
|
!!!emit ($self->{current_token}); # character |
2935 |
|
} else { |
2936 |
|
!!!cp (221.8); |
2937 |
|
## No token to emit. $self->{current_token} is discarded. |
2938 |
|
} |
2939 |
|
redo A; |
2940 |
|
} elsif ($self->{next_char} == 0x005D) { # ] |
2941 |
|
!!!cp (221.9); # character |
2942 |
|
$self->{current_token}->{data} .= ']'; ## Add first "]" of "]]]". |
2943 |
|
## Stay in the state. |
2944 |
|
!!!next-input-character; |
2945 |
|
redo A; |
2946 |
} else { |
} else { |
2947 |
!!!cp (221.7); |
!!!cp (221.11); |
2948 |
|
$self->{current_token}->{data} .= ']]'; # character |
2949 |
|
$self->{state} = CDATA_SECTION_STATE; |
2950 |
|
## Reconsume. |
2951 |
|
redo A; |
2952 |
} |
} |
|
|
|
|
redo A; |
|
|
|
|
|
## ISSUE: "text tokens" in spec. |
|
|
## TODO: Streaming support |
|
2953 |
} else { |
} else { |
2954 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
2955 |
} |
} |