| 1740 |
|
|
| 1741 |
redo A; |
redo A; |
| 1742 |
} else { |
} else { |
| 1743 |
if ($self->{nc} == 0x003D) { # = |
if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, < |
| 1744 |
|
|
| 1745 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1746 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value'); |
| 1816 |
} |
} |
| 1817 |
|
|
| 1818 |
redo A; |
redo A; |
| 1819 |
|
} elsif ($self->{is_xml} and |
| 1820 |
|
$is_space->{$self->{nc}}) { |
| 1821 |
|
|
| 1822 |
|
$self->{ca}->{value} .= ' '; |
| 1823 |
|
## Stay in the state. |
| 1824 |
|
|
| 1825 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1826 |
|
$self->{line_prev} = $self->{line}; |
| 1827 |
|
$self->{column_prev} = $self->{column}; |
| 1828 |
|
$self->{column}++; |
| 1829 |
|
$self->{nc} |
| 1830 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 1831 |
|
} else { |
| 1832 |
|
$self->{set_nc}->($self); |
| 1833 |
|
} |
| 1834 |
|
|
| 1835 |
|
redo A; |
| 1836 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1837 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
| 1838 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 1880 |
} |
} |
| 1881 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 1882 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 1883 |
q["&<], |
qq["&<\x09\x0C\x20], |
| 1884 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 1885 |
|
|
| 1886 |
## Stay in the state |
## Stay in the state |
| 1947 |
} |
} |
| 1948 |
|
|
| 1949 |
redo A; |
redo A; |
| 1950 |
|
} elsif ($self->{is_xml} and |
| 1951 |
|
$is_space->{$self->{nc}}) { |
| 1952 |
|
|
| 1953 |
|
$self->{ca}->{value} .= ' '; |
| 1954 |
|
## Stay in the state. |
| 1955 |
|
|
| 1956 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 1957 |
|
$self->{line_prev} = $self->{line}; |
| 1958 |
|
$self->{column_prev} = $self->{column}; |
| 1959 |
|
$self->{column}++; |
| 1960 |
|
$self->{nc} |
| 1961 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 1962 |
|
} else { |
| 1963 |
|
$self->{set_nc}->($self); |
| 1964 |
|
} |
| 1965 |
|
|
| 1966 |
|
redo A; |
| 1967 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1968 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value'); |
| 1969 |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
if ($self->{ct}->{type} == START_TAG_TOKEN) { |
| 2011 |
} |
} |
| 2012 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 2013 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 2014 |
q['&<], |
qq['&<\x09\x0C\x20], |
| 2015 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 2016 |
|
|
| 2017 |
## Stay in the state |
## Stay in the state |
| 2183 |
0x0022 => 1, # " |
0x0022 => 1, # " |
| 2184 |
0x0027 => 1, # ' |
0x0027 => 1, # ' |
| 2185 |
0x003D => 1, # = |
0x003D => 1, # = |
| 2186 |
|
0x003C => 1, # < |
| 2187 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 2188 |
|
|
| 2189 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 2193 |
} |
} |
| 2194 |
$self->{ca}->{value} .= chr ($self->{nc}); |
$self->{ca}->{value} .= chr ($self->{nc}); |
| 2195 |
$self->{read_until}->($self->{ca}->{value}, |
$self->{read_until}->($self->{ca}->{value}, |
| 2196 |
q["'=& >], |
qq["'=& \x09\x0C>], |
| 2197 |
length $self->{ca}->{value}); |
length $self->{ca}->{value}); |
| 2198 |
|
|
| 2199 |
## Stay in the state |
## Stay in the state |
| 4663 |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 4664 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 4665 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 4666 |
|
if ($self->{is_xml}) { |
| 4667 |
|
|
| 4668 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero', |
| 4669 |
|
line => $self->{line_prev}, |
| 4670 |
|
column => $self->{column_prev} |
| 4671 |
|
+ ($self->{nc} == -1 ? 1 : 0)); |
| 4672 |
|
} else { |
| 4673 |
|
|
| 4674 |
|
## No error |
| 4675 |
|
} |
| 4676 |
## Don't consume |
## Don't consume |
|
## No error |
|
| 4677 |
## Return nothing. |
## Return nothing. |
| 4678 |
# |
# |
| 4679 |
} elsif ($self->{nc} == 0x0023) { # # |
} elsif ($self->{nc} == 0x0023) { # # |
| 4692 |
} |
} |
| 4693 |
|
|
| 4694 |
redo A; |
redo A; |
| 4695 |
} elsif ((0x0041 <= $self->{nc} and |
} elsif ($self->{is_xml} or |
| 4696 |
|
(0x0041 <= $self->{nc} and |
| 4697 |
$self->{nc} <= 0x005A) or # A..Z |
$self->{nc} <= 0x005A) or # A..Z |
| 4698 |
(0x0061 <= $self->{nc} and |
(0x0061 <= $self->{nc} and |
| 4699 |
$self->{nc} <= 0x007A)) { # a..z |
$self->{nc} <= 0x007A)) { # a..z |
| 4747 |
redo A; |
redo A; |
| 4748 |
} |
} |
| 4749 |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
| 4750 |
if ($self->{nc} == 0x0078 or # x |
if ($self->{nc} == 0x0078) { # x |
|
$self->{nc} == 0x0058) { # X |
|
| 4751 |
|
|
| 4752 |
$self->{state} = HEXREF_X_STATE; |
$self->{state} = HEXREF_X_STATE; |
| 4753 |
$self->{kwd} .= chr $self->{nc}; |
$self->{kwd} .= chr $self->{nc}; |
| 4763 |
} |
} |
| 4764 |
|
|
| 4765 |
redo A; |
redo A; |
| 4766 |
|
} elsif ($self->{nc} == 0x0058) { # X |
| 4767 |
|
|
| 4768 |
|
if ($self->{is_xml}) { |
| 4769 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type |
| 4770 |
|
} |
| 4771 |
|
$self->{state} = HEXREF_X_STATE; |
| 4772 |
|
$self->{kwd} .= chr $self->{nc}; |
| 4773 |
|
|
| 4774 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4775 |
|
$self->{line_prev} = $self->{line}; |
| 4776 |
|
$self->{column_prev} = $self->{column}; |
| 4777 |
|
$self->{column}++; |
| 4778 |
|
$self->{nc} |
| 4779 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4780 |
|
} else { |
| 4781 |
|
$self->{set_nc}->($self); |
| 4782 |
|
} |
| 4783 |
|
|
| 4784 |
|
redo A; |
| 4785 |
} elsif (0x0030 <= $self->{nc} and |
} elsif (0x0030 <= $self->{nc} and |
| 4786 |
$self->{nc} <= 0x0039) { # 0..9 |
$self->{nc} <= 0x0039) { # 0..9 |
| 4787 |
|
|
| 4872 |
my $code = $self->{kwd}; |
my $code = $self->{kwd}; |
| 4873 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 4874 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 4875 |
if ($charref_map->{$code}) { |
if ((not $self->{is_xml} and $charref_map->{$code}) or |
| 4876 |
|
($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or |
| 4877 |
|
($self->{is_xml} and $code == 0x0000)) { |
| 4878 |
|
|
| 4879 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
| 4880 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 5027 |
my $code = $self->{kwd}; |
my $code = $self->{kwd}; |
| 5028 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 5029 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 5030 |
if ($charref_map->{$code}) { |
if ((not $self->{is_xml} and $charref_map->{$code}) or |
| 5031 |
|
($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or |
| 5032 |
|
($self->{is_xml} and $code == 0x0000)) { |
| 5033 |
|
|
| 5034 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference', |
| 5035 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 5063 |
redo A; |
redo A; |
| 5064 |
} |
} |
| 5065 |
} elsif ($self->{state} == ENTITY_NAME_STATE) { |
} elsif ($self->{state} == ENTITY_NAME_STATE) { |
| 5066 |
if (length $self->{kwd} < 30 and |
if ((0x0041 <= $self->{nc} and # a |
| 5067 |
## NOTE: Some number greater than the maximum length of entity name |
$self->{nc} <= 0x005A) or # x |
| 5068 |
((0x0041 <= $self->{nc} and # a |
(0x0061 <= $self->{nc} and # a |
| 5069 |
$self->{nc} <= 0x005A) or # x |
$self->{nc} <= 0x007A) or # z |
| 5070 |
(0x0061 <= $self->{nc} and # a |
(0x0030 <= $self->{nc} and # 0 |
| 5071 |
$self->{nc} <= 0x007A) or # z |
$self->{nc} <= 0x0039) or # 9 |
| 5072 |
(0x0030 <= $self->{nc} and # 0 |
$self->{nc} == 0x003B or # ; |
| 5073 |
$self->{nc} <= 0x0039) or # 9 |
($self->{is_xml} and |
| 5074 |
$self->{nc} == 0x003B)) { # ; |
not ($is_space->{$self->{nc}} or |
| 5075 |
|
{ |
| 5076 |
|
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 5077 |
|
$self->{entity_add} => 1, |
| 5078 |
|
}->{$self->{nc}}))) { |
| 5079 |
our $EntityChar; |
our $EntityChar; |
| 5080 |
$self->{kwd} .= chr $self->{nc}; |
$self->{kwd} .= chr $self->{nc}; |
| 5081 |
if (defined $EntityChar->{$self->{kwd}}) { |
if (defined $EntityChar->{$self->{kwd}} or |
| 5082 |
|
$self->{ge}->{$self->{kwd}}) { |
| 5083 |
if ($self->{nc} == 0x003B) { # ; |
if ($self->{nc} == 0x003B) { # ; |
| 5084 |
|
if (defined $self->{ge}->{$self->{kwd}}) { |
| 5085 |
$self->{entity__value} = $EntityChar->{$self->{kwd}}; |
if ($self->{ge}->{$self->{kwd}}->{only_text}) { |
| 5086 |
|
|
| 5087 |
|
$self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value}; |
| 5088 |
|
} else { |
| 5089 |
|
if (defined $self->{ge}->{$self->{kwd}}->{notation}) { |
| 5090 |
|
|
| 5091 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type |
| 5092 |
|
value => $self->{kwd}); |
| 5093 |
|
} else { |
| 5094 |
|
|
| 5095 |
|
} |
| 5096 |
|
$self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand |
| 5097 |
|
} |
| 5098 |
|
} else { |
| 5099 |
|
if ($self->{is_xml}) { |
| 5100 |
|
|
| 5101 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type |
| 5102 |
|
value => $self->{kwd}, |
| 5103 |
|
level => { |
| 5104 |
|
'amp;' => $self->{level}->{warn}, |
| 5105 |
|
'quot;' => $self->{level}->{warn}, |
| 5106 |
|
'lt;' => $self->{level}->{warn}, |
| 5107 |
|
'gt;' => $self->{level}->{warn}, |
| 5108 |
|
'apos;' => $self->{level}->{warn}, |
| 5109 |
|
}->{$self->{kwd}} || |
| 5110 |
|
$self->{level}->{must}); |
| 5111 |
|
} else { |
| 5112 |
|
|
| 5113 |
|
} |
| 5114 |
|
$self->{entity__value} = $EntityChar->{$self->{kwd}}; |
| 5115 |
|
} |
| 5116 |
$self->{entity__match} = 1; |
$self->{entity__match} = 1; |
| 5117 |
|
|
| 5118 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5508 |
## XML5: Not defined yet. |
## XML5: Not defined yet. |
| 5509 |
|
|
| 5510 |
## TODO: |
## TODO: |
| 5511 |
|
|
| 5512 |
|
if (not $self->{stop_processing} and |
| 5513 |
|
not $self->{document}->xml_standalone) { |
| 5514 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type |
| 5515 |
|
level => $self->{level}->{info}); |
| 5516 |
|
$self->{stop_processing} = 1; |
| 5517 |
|
} |
| 5518 |
|
|
| 5519 |
|
|
| 5520 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 5521 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 5950 |
} |
} |
| 5951 |
$self->{ct} = {type => ELEMENT_TOKEN, name => '', |
$self->{ct} = {type => ELEMENT_TOKEN, name => '', |
| 5952 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 5953 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 7}; |
| 5954 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 5955 |
|
|
| 5956 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 6018 |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
$self->{ct} = {type => ATTLIST_TOKEN, name => '', |
| 6019 |
attrdefs => [], |
attrdefs => [], |
| 6020 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 6021 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 7}; |
| 6022 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 6023 |
|
|
| 6024 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 6087 |
} |
} |
| 6088 |
$self->{ct} = {type => NOTATION_TOKEN, name => '', |
$self->{ct} = {type => NOTATION_TOKEN, name => '', |
| 6089 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 6090 |
column => $self->{column_prev} - 6}; |
column => $self->{column_prev} - 8}; |
| 6091 |
$self->{state} = DOCTYPE_MD_STATE; |
$self->{state} = DOCTYPE_MD_STATE; |
| 6092 |
|
|
| 6093 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 7949 |
redo A; |
redo A; |
| 7950 |
} |
} |
| 7951 |
} elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) { |
|
## TODO: XMLize |
|
|
|
|
| 7952 |
if ($is_space->{$self->{nc}} or |
if ($is_space->{$self->{nc}} or |
| 7953 |
{ |
{ |
| 7954 |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 7955 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 7956 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 7957 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero', |
| 7958 |
|
line => $self->{line_prev}, |
| 7959 |
|
column => $self->{column_prev} |
| 7960 |
|
+ ($self->{nc} == -1 ? 1 : 0)); |
| 7961 |
## Don't consume |
## Don't consume |
|
## No error |
|
| 7962 |
## Return nothing. |
## Return nothing. |
| 7963 |
# |
# |
| 7964 |
} elsif ($self->{nc} == 0x0023) { # # |
} elsif ($self->{nc} == 0x0023) { # # |
| 7977 |
} |
} |
| 7978 |
|
|
| 7979 |
redo A; |
redo A; |
|
} elsif ((0x0041 <= $self->{nc} and |
|
|
$self->{nc} <= 0x005A) or # A..Z |
|
|
(0x0061 <= $self->{nc} and |
|
|
$self->{nc} <= 0x007A)) { # a..z |
|
|
# |
|
| 7980 |
} else { |
} else { |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero'); |
|
|
## Return nothing. |
|
| 7981 |
# |
# |
| 7982 |
} |
} |
| 7983 |
|
|