| 105 |
sub COMMENT_START_DASH_STATE () { 15 } |
sub COMMENT_START_DASH_STATE () { 15 } |
| 106 |
sub COMMENT_STATE () { 16 } |
sub COMMENT_STATE () { 16 } |
| 107 |
sub COMMENT_END_STATE () { 17 } |
sub COMMENT_END_STATE () { 17 } |
| 108 |
|
sub COMMENT_END_BANG_STATE () { 102 } |
| 109 |
|
sub COMMENT_END_SPACE_STATE () { 103 } ## LAST |
| 110 |
sub COMMENT_END_DASH_STATE () { 18 } |
sub COMMENT_END_DASH_STATE () { 18 } |
| 111 |
sub BOGUS_COMMENT_STATE () { 19 } |
sub BOGUS_COMMENT_STATE () { 19 } |
| 112 |
sub DOCTYPE_STATE () { 20 } |
sub DOCTYPE_STATE () { 20 } |
| 1250 |
if ({ |
if ({ |
| 1251 |
0x0022 => 1, # " |
0x0022 => 1, # " |
| 1252 |
0x0027 => 1, # ' |
0x0027 => 1, # ' |
| 1253 |
|
0x003C => 1, # < |
| 1254 |
0x003D => 1, # = |
0x003D => 1, # = |
| 1255 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 1256 |
|
|
| 1433 |
|
|
| 1434 |
redo A; |
redo A; |
| 1435 |
} else { |
} else { |
| 1436 |
if ($self->{nc} == 0x0022 or # " |
if ({ |
| 1437 |
$self->{nc} == 0x0027) { # ' |
0x0022 => 1, # " |
| 1438 |
|
0x0027 => 1, # ' |
| 1439 |
|
0x003C => 1, # < |
| 1440 |
|
}->{$self->{nc}}) { |
| 1441 |
|
|
| 1442 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1443 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
| 1608 |
|
|
| 1609 |
} |
} |
| 1610 |
|
|
| 1611 |
if ($self->{nc} == 0x0022 or # " |
if ({ |
| 1612 |
$self->{nc} == 0x0027) { # ' |
0x0022 => 1, # " |
| 1613 |
|
0x0027 => 1, # ' |
| 1614 |
|
0x003C => 1, # < |
| 1615 |
|
}->{$self->{nc}}) { |
| 1616 |
|
|
| 1617 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1618 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
| 1749 |
|
|
| 1750 |
redo A; |
redo A; |
| 1751 |
} else { |
} else { |
| 1752 |
if ($self->{nc} == 0x003D) { # = |
if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, < |
| 1753 |
|
|
| 1754 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1755 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value'); |
| 1825 |
} |
} |
| 1826 |
|
|
| 1827 |
redo A; |
redo A; |
| 1828 |
|
} elsif ($self->{is_xml} and |
| 1829 |
|
$is_space->{$self->{nc}}) { |
| 1830 |
|
|
| 1831 |
|
$self->{ca}->{value} .= ' '; |
| 1832 |
|
## Stay in the state. |
| 1833 |
|
|
| 1834 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1835 |
|
$self->{line_prev} = $self->{line}; |
| 1836 |
|
$self->{column_prev} = $self->{column}; |
| 1837 |
|
$self->{column}++; |
| 1838 |
|
$self->{nc} |
| 1839 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 1840 |
|
} else { |
| 1841 |
|
$self->{set_nc}->($self); |
| 1842 |
|
} |
| 1843 |
|
|
| 1844 |
|
redo A; |
| 1845 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1846 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
| 1847 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 1889 |
} |
} |
| 1890 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 1891 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 1892 |
q["&<], |
qq["&<\x09\x0C\x20], |
| 1893 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 1894 |
|
|
| 1895 |
## Stay in the state |
## Stay in the state |
| 1956 |
} |
} |
| 1957 |
|
|
| 1958 |
redo A; |
redo A; |
| 1959 |
|
} elsif ($self->{is_xml} and |
| 1960 |
|
$is_space->{$self->{nc}}) { |
| 1961 |
|
|
| 1962 |
|
$self->{ca}->{value} .= ' '; |
| 1963 |
|
## Stay in the state. |
| 1964 |
|
|
| 1965 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1966 |
|
$self->{line_prev} = $self->{line}; |
| 1967 |
|
$self->{column_prev} = $self->{column}; |
| 1968 |
|
$self->{column}++; |
| 1969 |
|
$self->{nc} |
| 1970 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 1971 |
|
} else { |
| 1972 |
|
$self->{set_nc}->($self); |
| 1973 |
|
} |
| 1974 |
|
|
| 1975 |
|
redo A; |
| 1976 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1977 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
| 1978 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 2020 |
} |
} |
| 2021 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 2022 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 2023 |
q['&<], |
qq['&<\x09\x0C\x20], |
| 2024 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 2025 |
|
|
| 2026 |
## Stay in the state |
## Stay in the state |
| 2192 |
0x0022 => 1, # " |
0x0022 => 1, # " |
| 2193 |
0x0027 => 1, # ' |
0x0027 => 1, # ' |
| 2194 |
0x003D => 1, # = |
0x003D => 1, # = |
| 2195 |
|
0x003C => 1, # < |
| 2196 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 2197 |
|
|
| 2198 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 2202 |
} |
} |
| 2203 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 2204 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 2205 |
q["'=& >], |
qq["'=& \x09\x0C>], |
| 2206 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 2207 |
|
|
| 2208 |
## Stay in the state |
## Stay in the state |
| 2944 |
|
|
| 2945 |
redo A; |
redo A; |
| 2946 |
} |
} |
| 2947 |
} elsif ($self->{state} == COMMENT_END_STATE) { |
} elsif ($self->{state} == COMMENT_END_STATE or |
| 2948 |
|
$self->{state} == COMMENT_END_BANG_STATE) { |
| 2949 |
## XML5: "Comment end state" and "DOCTYPE comment end state". |
## XML5: "Comment end state" and "DOCTYPE comment end state". |
| 2950 |
|
## (No comment end bang state.) |
| 2951 |
|
|
| 2952 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2953 |
if ($self->{in_subset}) { |
if ($self->{in_subset}) { |
| 2974 |
|
|
| 2975 |
redo A; |
redo A; |
| 2976 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 2977 |
|
if ($self->{state} == COMMENT_END_BANG_STATE) { |
| 2978 |
|
|
| 2979 |
|
$self->{ct}->{data} .= '--!'; # comment |
| 2980 |
|
$self->{state} = COMMENT_END_DASH_STATE; |
| 2981 |
|
} else { |
| 2982 |
|
|
| 2983 |
|
## XML5: Not a parse error. |
| 2984 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
| 2985 |
|
line => $self->{line_prev}, |
| 2986 |
|
column => $self->{column_prev}); |
| 2987 |
|
$self->{ct}->{data} .= '-'; # comment |
| 2988 |
|
## Stay in the state |
| 2989 |
|
} |
| 2990 |
|
|
| 2991 |
## XML5: Not a parse error. |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2992 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{line_prev} = $self->{line}; |
| 2993 |
line => $self->{line_prev}, |
$self->{column_prev} = $self->{column}; |
| 2994 |
column => $self->{column_prev}); |
$self->{column}++; |
| 2995 |
$self->{ct}->{data} .= '-'; # comment |
$self->{nc} |
| 2996 |
## Stay in the state |
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 2997 |
|
} else { |
| 2998 |
|
$self->{set_nc}->($self); |
| 2999 |
|
} |
| 3000 |
|
|
| 3001 |
|
redo A; |
| 3002 |
|
} elsif ($self->{state} != COMMENT_END_BANG_STATE and |
| 3003 |
|
$is_space->{$self->{nc}}) { |
| 3004 |
|
|
| 3005 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end space'); # XXX error type |
| 3006 |
|
$self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment |
| 3007 |
|
$self->{state} = COMMENT_END_SPACE_STATE; |
| 3008 |
|
|
| 3009 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3010 |
|
$self->{line_prev} = $self->{line}; |
| 3011 |
|
$self->{column_prev} = $self->{column}; |
| 3012 |
|
$self->{column}++; |
| 3013 |
|
$self->{nc} |
| 3014 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3015 |
|
} else { |
| 3016 |
|
$self->{set_nc}->($self); |
| 3017 |
|
} |
| 3018 |
|
|
| 3019 |
|
redo A; |
| 3020 |
|
} elsif ($self->{state} != COMMENT_END_BANG_STATE and |
| 3021 |
|
$self->{nc} == 0x0021) { # ! |
| 3022 |
|
|
| 3023 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end bang'); # XXX error type |
| 3024 |
|
$self->{state} = COMMENT_END_BANG_STATE; |
| 3025 |
|
|
| 3026 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3027 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3044 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3045 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 3046 |
} |
} |
| 3047 |
## reconsume |
## Reconsume. |
| 3048 |
|
|
| 3049 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 3050 |
|
|
| 3051 |
redo A; |
redo A; |
| 3052 |
} else { |
} else { |
| 3053 |
|
|
| 3054 |
## XML5: Not a parse error. |
if ($self->{state} == COMMENT_END_BANG_STATE) { |
| 3055 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment |
| 3056 |
line => $self->{line_prev}, |
} else { |
| 3057 |
column => $self->{column_prev}); |
$self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment |
| 3058 |
$self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment |
} |
| 3059 |
$self->{state} = COMMENT_STATE; |
$self->{state} = COMMENT_STATE; |
| 3060 |
|
|
| 3061 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3070 |
|
|
| 3071 |
redo A; |
redo A; |
| 3072 |
} |
} |
| 3073 |
|
} elsif ($self->{state} == COMMENT_END_SPACE_STATE) { |
| 3074 |
|
## XML5: Not exist. |
| 3075 |
|
|
| 3076 |
|
if ($self->{nc} == 0x003E) { # > |
| 3077 |
|
if ($self->{in_subset}) { |
| 3078 |
|
|
| 3079 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3080 |
|
} else { |
| 3081 |
|
|
| 3082 |
|
$self->{state} = DATA_STATE; |
| 3083 |
|
$self->{s_kwd} = ''; |
| 3084 |
|
} |
| 3085 |
|
|
| 3086 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3087 |
|
$self->{line_prev} = $self->{line}; |
| 3088 |
|
$self->{column_prev} = $self->{column}; |
| 3089 |
|
$self->{column}++; |
| 3090 |
|
$self->{nc} |
| 3091 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3092 |
|
} else { |
| 3093 |
|
$self->{set_nc}->($self); |
| 3094 |
|
} |
| 3095 |
|
|
| 3096 |
|
|
| 3097 |
|
return ($self->{ct}); # comment |
| 3098 |
|
|
| 3099 |
|
redo A; |
| 3100 |
|
} elsif ($is_space->{$self->{nc}}) { |
| 3101 |
|
|
| 3102 |
|
$self->{ct}->{data} .= chr ($self->{nc}); # comment |
| 3103 |
|
## Stay in the state. |
| 3104 |
|
|
| 3105 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3106 |
|
$self->{line_prev} = $self->{line}; |
| 3107 |
|
$self->{column_prev} = $self->{column}; |
| 3108 |
|
$self->{column}++; |
| 3109 |
|
$self->{nc} |
| 3110 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3111 |
|
} else { |
| 3112 |
|
$self->{set_nc}->($self); |
| 3113 |
|
} |
| 3114 |
|
|
| 3115 |
|
redo A; |
| 3116 |
|
} elsif ($self->{nc} == -1) { |
| 3117 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment'); |
| 3118 |
|
if ($self->{in_subset}) { |
| 3119 |
|
|
| 3120 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
| 3121 |
|
} else { |
| 3122 |
|
|
| 3123 |
|
$self->{state} = DATA_STATE; |
| 3124 |
|
$self->{s_kwd} = ''; |
| 3125 |
|
} |
| 3126 |
|
## Reconsume. |
| 3127 |
|
|
| 3128 |
|
return ($self->{ct}); # comment |
| 3129 |
|
|
| 3130 |
|
redo A; |
| 3131 |
|
} else { |
| 3132 |
|
|
| 3133 |
|
$self->{ct}->{data} .= chr ($self->{nc}); # comment |
| 3134 |
|
$self->{state} = COMMENT_STATE; |
| 3135 |
|
|
| 3136 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3137 |
|
$self->{line_prev} = $self->{line}; |
| 3138 |
|
$self->{column_prev} = $self->{column}; |
| 3139 |
|
$self->{column}++; |
| 3140 |
|
$self->{nc} |
| 3141 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3142 |
|
} else { |
| 3143 |
|
$self->{set_nc}->($self); |
| 3144 |
|
} |
| 3145 |
|
|
| 3146 |
|
redo A; |
| 3147 |
|
} |
| 3148 |
} elsif ($self->{state} == DOCTYPE_STATE) { |
} elsif ($self->{state} == DOCTYPE_STATE) { |
| 3149 |
if ($is_space->{$self->{nc}}) { |
if ($is_space->{$self->{nc}}) { |
| 3150 |
|
|
| 3161 |
} |
} |
| 3162 |
|
|
| 3163 |
redo A; |
redo A; |
| 3164 |
|
} elsif ($self->{nc} == -1) { |
| 3165 |
|
|
| 3166 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3167 |
|
$self->{ct}->{quirks} = 1; |
| 3168 |
|
|
| 3169 |
|
$self->{state} = DATA_STATE; |
| 3170 |
|
## Reconsume. |
| 3171 |
|
return ($self->{ct}); # DOCTYPE (quirks) |
| 3172 |
|
|
| 3173 |
|
redo A; |
| 3174 |
} else { |
} else { |
| 3175 |
|
|
| 3176 |
## XML5: Unless EOF, swith to the bogus comment state. |
## XML5: Swith to the bogus comment state. |
| 3177 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name'); |
| 3178 |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
| 3179 |
## reconsume |
## reconsume |
| 3218 |
return ($self->{ct}); # DOCTYPE (quirks) |
return ($self->{ct}); # DOCTYPE (quirks) |
| 3219 |
|
|
| 3220 |
redo A; |
redo A; |
| 3221 |
|
} elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z |
| 3222 |
|
|
| 3223 |
|
$self->{ct}->{name} # DOCTYPE |
| 3224 |
|
= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 3225 |
|
delete $self->{ct}->{quirks}; |
| 3226 |
|
$self->{state} = DOCTYPE_NAME_STATE; |
| 3227 |
|
|
| 3228 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3229 |
|
$self->{line_prev} = $self->{line}; |
| 3230 |
|
$self->{column_prev} = $self->{column}; |
| 3231 |
|
$self->{column}++; |
| 3232 |
|
$self->{nc} |
| 3233 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3234 |
|
} else { |
| 3235 |
|
$self->{set_nc}->($self); |
| 3236 |
|
} |
| 3237 |
|
|
| 3238 |
|
redo A; |
| 3239 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3240 |
|
|
| 3241 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 3322 |
return ($self->{ct}); # DOCTYPE |
return ($self->{ct}); # DOCTYPE |
| 3323 |
|
|
| 3324 |
redo A; |
redo A; |
| 3325 |
|
} elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z |
| 3326 |
|
|
| 3327 |
|
$self->{ct}->{name} # DOCTYPE |
| 3328 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 3329 |
|
delete $self->{ct}->{quirks}; |
| 3330 |
|
## Stay in the state. |
| 3331 |
|
|
| 3332 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3333 |
|
$self->{line_prev} = $self->{line}; |
| 3334 |
|
$self->{column_prev} = $self->{column}; |
| 3335 |
|
$self->{column}++; |
| 3336 |
|
$self->{nc} |
| 3337 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3338 |
|
} else { |
| 3339 |
|
$self->{set_nc}->($self); |
| 3340 |
|
} |
| 3341 |
|
|
| 3342 |
|
redo A; |
| 3343 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3344 |
|
|
| 3345 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3371 |
redo A; |
redo A; |
| 3372 |
} else { |
} else { |
| 3373 |
|
|
| 3374 |
$self->{ct}->{name} |
$self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE |
| 3375 |
.= chr ($self->{nc}); # DOCTYPE |
## Stay in the state. |
|
## Stay in the state |
|
| 3376 |
|
|
| 3377 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3378 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 4835 |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 4836 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 4837 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 4838 |
|
if ($self->{is_xml}) { |
| 4839 |
|
|
| 4840 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero', |
| 4841 |
|
line => $self->{line_prev}, |
| 4842 |
|
column => $self->{column_prev} |
| 4843 |
|
+ ($self->{nc} == -1 ? 1 : 0)); |
| 4844 |
|
} else { |
| 4845 |
|
|
| 4846 |
|
## No error |
| 4847 |
|
} |
| 4848 |
## Don't consume |
## Don't consume |
|
## No error |
|
| 4849 |
## Return nothing. |
## Return nothing. |
| 4850 |
# |
# |
| 4851 |
} elsif ($self->{nc} == 0x0023) { # # |
} elsif ($self->{nc} == 0x0023) { # # |
| 4864 |
} |
} |
| 4865 |
|
|
| 4866 |
redo A; |
redo A; |
| 4867 |
} elsif ((0x0041 <= $self->{nc} and |
} elsif ($self->{is_xml} or |
| 4868 |
|
(0x0041 <= $self->{nc} and |
| 4869 |
$self->{nc} <= 0x005A) or # A..Z |
$self->{nc} <= 0x005A) or # A..Z |
| 4870 |
(0x0061 <= $self->{nc} and |
(0x0061 <= $self->{nc} and |
| 4871 |
$self->{nc} <= 0x007A)) { # a..z |
$self->{nc} <= 0x007A)) { # a..z |
| 5044 |
my $code = $self->{kwd}; |
my $code = $self->{kwd}; |
| 5045 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 5046 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 5047 |
if ($charref_map->{$code}) { |
if ((not $self->{is_xml} and $charref_map->{$code}) or |
| 5048 |
|
($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or |
| 5049 |
|
($self->{is_xml} and $code == 0x0000)) { |
| 5050 |
|
|
| 5051 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
| 5052 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 5199 |
my $code = $self->{kwd}; |
my $code = $self->{kwd}; |
| 5200 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 5201 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 5202 |
if ($charref_map->{$code}) { |
if ((not $self->{is_xml} and $charref_map->{$code}) or |
| 5203 |
|
($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or |
| 5204 |
|
($self->{is_xml} and $code == 0x0000)) { |
| 5205 |
|
|
| 5206 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
| 5207 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 5241 |
$self->{nc} <= 0x007A) or # z |
$self->{nc} <= 0x007A) or # z |
| 5242 |
(0x0030 <= $self->{nc} and # 0 |
(0x0030 <= $self->{nc} and # 0 |
| 5243 |
$self->{nc} <= 0x0039) or # 9 |
$self->{nc} <= 0x0039) or # 9 |
| 5244 |
$self->{nc} == 0x003B) { # ; |
$self->{nc} == 0x003B or # ; |
| 5245 |
|
($self->{is_xml} and |
| 5246 |
|
not ($is_space->{$self->{nc}} or |
| 5247 |
|
{ |
| 5248 |
|
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 5249 |
|
$self->{entity_add} => 1, |
| 5250 |
|
}->{$self->{nc}}))) { |
| 5251 |
our $EntityChar; |
our $EntityChar; |
| 5252 |
$self->{kwd} .= chr $self->{nc}; |
$self->{kwd} .= chr $self->{nc}; |
| 5253 |
if (defined $EntityChar->{$self->{kwd}} or |
if (defined $EntityChar->{$self->{kwd}} or |
| 5680 |
## XML5: Not defined yet. |
## XML5: Not defined yet. |
| 5681 |
|
|
| 5682 |
## TODO: |
## TODO: |
| 5683 |
|
|
| 5684 |
|
if (not $self->{stop_processing} and |
| 5685 |
|
not $self->{document}->xml_standalone) { |
| 5686 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type |
| 5687 |
|
level => $self->{level}->{info}); |
| 5688 |
|
$self->{stop_processing} = 1; |
| 5689 |
|
} |
| 5690 |
|
|
| 5691 |
|
|
| 5692 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5693 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 6122 |
} |
} |
| 6123 |
$self->{ct} = {type => ELEMENT_TOKEN, name => '', |
$self->{ct} = {type => ELEMENT_TOKEN, name => '', |
| 6124 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 6125 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 7}; |
| 6126 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 6127 |
|
|
| 6128 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 6190 |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
| 6191 |
attrdefs => [], |
attrdefs => [], |
| 6192 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 6193 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 7}; |
| 6194 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 6195 |
|
|
| 6196 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 6259 |
} |
} |
| 6260 |
$self->{ct} = {type => NOTATION_TOKEN, name => '', |
$self->{ct} = {type => NOTATION_TOKEN, name => '', |
| 6261 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 6262 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 8}; |
| 6263 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 6264 |
|
|
| 6265 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 8121 |
redo A; |
redo A; |
| 8122 |
} |
} |
| 8123 |
} elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) { |
|
## TODO: XMLize |
|
|
|
|
| 8124 |
if ($is_space->{$self->{nc}} or |
if ($is_space->{$self->{nc}} or |
| 8125 |
{ |
{ |
| 8126 |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 8127 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 8128 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 8129 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero', |
| 8130 |
|
line => $self->{line_prev}, |
| 8131 |
|
column => $self->{column_prev} |
| 8132 |
|
+ ($self->{nc} == -1 ? 1 : 0)); |
| 8133 |
## Don't consume |
## Don't consume |
|
## No error |
|
| 8134 |
## Return nothing. |
## Return nothing. |
| 8135 |
# |
# |
| 8136 |
} elsif ($self->{nc} == 0x0023) { # # |
} elsif ($self->{nc} == 0x0023) { # # |
| 8149 |
} |
} |
| 8150 |
|
|
| 8151 |
redo A; |
redo A; |
|
} elsif ((0x0041 <= $self->{nc} and |
|
|
$self->{nc} <= 0x005A) or # A..Z |
|
|
(0x0061 <= $self->{nc} and |
|
|
$self->{nc} <= 0x007A)) { # a..z |
|
|
# |
|
| 8152 |
} else { |
} else { |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero'); |
|
|
## Return nothing. |
|
| 8153 |
# |
# |
| 8154 |
} |
} |
| 8155 |
|
|