| 1740 |
|
|
| 1741 |
redo A; |
redo A; |
| 1742 |
} else { |
} else { |
| 1743 |
if ($self->{nc} == 0x003D) { # = |
if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, < |
| 1744 |
|
|
| 1745 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1746 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value'); |
| 1816 |
} |
} |
| 1817 |
|
|
| 1818 |
redo A; |
redo A; |
| 1819 |
|
} elsif ($self->{is_xml} and |
| 1820 |
|
$is_space->{$self->{nc}}) { |
| 1821 |
|
|
| 1822 |
|
$self->{ca}->{value} .= ' '; |
| 1823 |
|
## Stay in the state. |
| 1824 |
|
|
| 1825 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1826 |
|
$self->{line_prev} = $self->{line}; |
| 1827 |
|
$self->{column_prev} = $self->{column}; |
| 1828 |
|
$self->{column}++; |
| 1829 |
|
$self->{nc} |
| 1830 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 1831 |
|
} else { |
| 1832 |
|
$self->{set_nc}->($self); |
| 1833 |
|
} |
| 1834 |
|
|
| 1835 |
|
redo A; |
| 1836 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1837 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
| 1838 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 1880 |
} |
} |
| 1881 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 1882 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 1883 |
q["&<], |
qq["&<\x09\x0C\x20], |
| 1884 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 1885 |
|
|
| 1886 |
## Stay in the state |
## Stay in the state |
| 1947 |
} |
} |
| 1948 |
|
|
| 1949 |
redo A; |
redo A; |
| 1950 |
|
} elsif ($self->{is_xml} and |
| 1951 |
|
$is_space->{$self->{nc}}) { |
| 1952 |
|
|
| 1953 |
|
$self->{ca}->{value} .= ' '; |
| 1954 |
|
## Stay in the state. |
| 1955 |
|
|
| 1956 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1957 |
|
$self->{line_prev} = $self->{line}; |
| 1958 |
|
$self->{column_prev} = $self->{column}; |
| 1959 |
|
$self->{column}++; |
| 1960 |
|
$self->{nc} |
| 1961 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 1962 |
|
} else { |
| 1963 |
|
$self->{set_nc}->($self); |
| 1964 |
|
} |
| 1965 |
|
|
| 1966 |
|
redo A; |
| 1967 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1968 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
| 1969 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 2011 |
} |
} |
| 2012 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 2013 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 2014 |
q['&<], |
qq['&<\x09\x0C\x20], |
| 2015 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 2016 |
|
|
| 2017 |
## Stay in the state |
## Stay in the state |
| 2183 |
0x0022 => 1, # " |
0x0022 => 1, # " |
| 2184 |
0x0027 => 1, # ' |
0x0027 => 1, # ' |
| 2185 |
0x003D => 1, # = |
0x003D => 1, # = |
| 2186 |
|
0x003C => 1, # < |
| 2187 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 2188 |
|
|
| 2189 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 2193 |
} |
} |
| 2194 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 2195 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 2196 |
q["'=& >], |
qq["'=& \x09\x0C>], |
| 2197 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 2198 |
|
|
| 2199 |
## Stay in the state |
## Stay in the state |
| 2999 |
redo A; |
redo A; |
| 3000 |
} else { |
} else { |
| 3001 |
|
|
|
## XML5: Not a parse error. |
|
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
|
|
line => $self->{line_prev}, |
|
|
column => $self->{column_prev}); |
|
| 3002 |
$self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment |
$self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment |
| 3003 |
$self->{state} = COMMENT_STATE; |
$self->{state} = COMMENT_STATE; |
| 3004 |
|
|
| 3030 |
} |
} |
| 3031 |
|
|
| 3032 |
redo A; |
redo A; |
| 3033 |
|
} elsif ($self->{nc} == -1) { |
| 3034 |
|
|
| 3035 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3036 |
|
$self->{ct}->{quirks} = 1; |
| 3037 |
|
|
| 3038 |
|
$self->{state} = DATA_STATE; |
| 3039 |
|
## Reconsume. |
| 3040 |
|
return ($self->{ct}); # DOCTYPE (quirks) |
| 3041 |
|
|
| 3042 |
|
redo A; |
| 3043 |
} else { |
} else { |
| 3044 |
|
|
| 3045 |
## XML5: Unless EOF, swith to the bogus comment state. |
## XML5: Swith to the bogus comment state. |
| 3046 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name'); |
| 3047 |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
| 3048 |
## reconsume |
## reconsume |
| 3087 |
return ($self->{ct}); # DOCTYPE (quirks) |
return ($self->{ct}); # DOCTYPE (quirks) |
| 3088 |
|
|
| 3089 |
redo A; |
redo A; |
| 3090 |
|
} elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z |
| 3091 |
|
|
| 3092 |
|
$self->{ct}->{name} # DOCTYPE |
| 3093 |
|
= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 3094 |
|
delete $self->{ct}->{quirks}; |
| 3095 |
|
$self->{state} = DOCTYPE_NAME_STATE; |
| 3096 |
|
|
| 3097 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3098 |
|
$self->{line_prev} = $self->{line}; |
| 3099 |
|
$self->{column_prev} = $self->{column}; |
| 3100 |
|
$self->{column}++; |
| 3101 |
|
$self->{nc} |
| 3102 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3103 |
|
} else { |
| 3104 |
|
$self->{set_nc}->($self); |
| 3105 |
|
} |
| 3106 |
|
|
| 3107 |
|
redo A; |
| 3108 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3109 |
|
|
| 3110 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 3191 |
return ($self->{ct}); # DOCTYPE |
return ($self->{ct}); # DOCTYPE |
| 3192 |
|
|
| 3193 |
redo A; |
redo A; |
| 3194 |
|
} elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z |
| 3195 |
|
|
| 3196 |
|
$self->{ct}->{name} # DOCTYPE |
| 3197 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 3198 |
|
delete $self->{ct}->{quirks}; |
| 3199 |
|
## Stay in the state. |
| 3200 |
|
|
| 3201 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3202 |
|
$self->{line_prev} = $self->{line}; |
| 3203 |
|
$self->{column_prev} = $self->{column}; |
| 3204 |
|
$self->{column}++; |
| 3205 |
|
$self->{nc} |
| 3206 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3207 |
|
} else { |
| 3208 |
|
$self->{set_nc}->($self); |
| 3209 |
|
} |
| 3210 |
|
|
| 3211 |
|
redo A; |
| 3212 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3213 |
|
|
| 3214 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3240 |
redo A; |
redo A; |
| 3241 |
} else { |
} else { |
| 3242 |
|
|
| 3243 |
$self->{ct}->{name} |
$self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE |
| 3244 |
.= chr ($self->{nc}); # DOCTYPE |
## Stay in the state. |
|
## Stay in the state |
|
| 3245 |
|
|
| 3246 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3247 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 4913 |
my $code = $self->{kwd}; |
my $code = $self->{kwd}; |
| 4914 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 4915 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 4916 |
if ($charref_map->{$code}) { |
if ((not $self->{is_xml} and $charref_map->{$code}) or |
| 4917 |
|
($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or |
| 4918 |
|
($self->{is_xml} and $code == 0x0000)) { |
| 4919 |
|
|
| 4920 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
| 4921 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 5068 |
my $code = $self->{kwd}; |
my $code = $self->{kwd}; |
| 5069 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 5070 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 5071 |
if ($charref_map->{$code}) { |
if ((not $self->{is_xml} and $charref_map->{$code}) or |
| 5072 |
|
($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or |
| 5073 |
|
($self->{is_xml} and $code == 0x0000)) { |
| 5074 |
|
|
| 5075 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
| 5076 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 5549 |
## XML5: Not defined yet. |
## XML5: Not defined yet. |
| 5550 |
|
|
| 5551 |
## TODO: |
## TODO: |
| 5552 |
|
|
| 5553 |
|
if (not $self->{stop_processing} and |
| 5554 |
|
not $self->{document}->xml_standalone) { |
| 5555 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type |
| 5556 |
|
level => $self->{level}->{info}); |
| 5557 |
|
$self->{stop_processing} = 1; |
| 5558 |
|
} |
| 5559 |
|
|
| 5560 |
|
|
| 5561 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5562 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 5991 |
} |
} |
| 5992 |
$self->{ct} = {type => ELEMENT_TOKEN, name => '', |
$self->{ct} = {type => ELEMENT_TOKEN, name => '', |
| 5993 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 5994 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 7}; |
| 5995 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 5996 |
|
|
| 5997 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 6059 |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
| 6060 |
attrdefs => [], |
attrdefs => [], |
| 6061 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 6062 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 7}; |
| 6063 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 6064 |
|
|
| 6065 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 6128 |
} |
} |
| 6129 |
$self->{ct} = {type => NOTATION_TOKEN, name => '', |
$self->{ct} = {type => NOTATION_TOKEN, name => '', |
| 6130 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 6131 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 8}; |
| 6132 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 6133 |
|
|
| 6134 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |