| 4628 |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 4629 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 4630 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 4631 |
|
if ($self->{is_xml}) { |
| 4632 |
|
|
| 4633 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero', |
| 4634 |
|
line => $self->{line_prev}, |
| 4635 |
|
column => $self->{column_prev} |
| 4636 |
|
+ ($self->{nc} == -1 ? 1 : 0)); |
| 4637 |
|
} else { |
| 4638 |
|
|
| 4639 |
|
## No error |
| 4640 |
|
} |
| 4641 |
## Don't consume |
## Don't consume |
|
## No error |
|
| 4642 |
## Return nothing. |
## Return nothing. |
| 4643 |
# |
# |
| 4644 |
} elsif ($self->{nc} == 0x0023) { # # |
} elsif ($self->{nc} == 0x0023) { # # |
| 4657 |
} |
} |
| 4658 |
|
|
| 4659 |
redo A; |
redo A; |
| 4660 |
} elsif ((0x0041 <= $self->{nc} and |
} elsif ($self->{is_xml} or |
| 4661 |
|
(0x0041 <= $self->{nc} and |
| 4662 |
$self->{nc} <= 0x005A) or # A..Z |
$self->{nc} <= 0x005A) or # A..Z |
| 4663 |
(0x0061 <= $self->{nc} and |
(0x0061 <= $self->{nc} and |
| 4664 |
$self->{nc} <= 0x007A)) { # a..z |
$self->{nc} <= 0x007A)) { # a..z |
| 4712 |
redo A; |
redo A; |
| 4713 |
} |
} |
| 4714 |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
| 4715 |
if ($self->{nc} == 0x0078 or # x |
if ($self->{nc} == 0x0078) { # x |
| 4716 |
$self->{nc} == 0x0058) { # X |
|
| 4717 |
|
$self->{state} = HEXREF_X_STATE; |
| 4718 |
|
$self->{kwd} .= chr $self->{nc}; |
| 4719 |
|
|
| 4720 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4721 |
|
$self->{line_prev} = $self->{line}; |
| 4722 |
|
$self->{column_prev} = $self->{column}; |
| 4723 |
|
$self->{column}++; |
| 4724 |
|
$self->{nc} |
| 4725 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4726 |
|
} else { |
| 4727 |
|
$self->{set_nc}->($self); |
| 4728 |
|
} |
| 4729 |
|
|
| 4730 |
|
redo A; |
| 4731 |
|
} elsif ($self->{nc} == 0x0058) { # X |
| 4732 |
|
|
| 4733 |
|
if ($self->{is_xml}) { |
| 4734 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type |
| 4735 |
|
} |
| 4736 |
$self->{state} = HEXREF_X_STATE; |
$self->{state} = HEXREF_X_STATE; |
| 4737 |
$self->{kwd} .= chr $self->{nc}; |
$self->{kwd} .= chr $self->{nc}; |
| 4738 |
|
|
| 5024 |
redo A; |
redo A; |
| 5025 |
} |
} |
| 5026 |
} elsif ($self->{state} == ENTITY_NAME_STATE) { |
} elsif ($self->{state} == ENTITY_NAME_STATE) { |
| 5027 |
if (length $self->{kwd} < 30 and |
if ((0x0041 <= $self->{nc} and # a |
| 5028 |
## NOTE: Some number greater than the maximum length of entity name |
$self->{nc} <= 0x005A) or # x |
| 5029 |
((0x0041 <= $self->{nc} and # a |
(0x0061 <= $self->{nc} and # a |
| 5030 |
$self->{nc} <= 0x005A) or # x |
$self->{nc} <= 0x007A) or # z |
| 5031 |
(0x0061 <= $self->{nc} and # a |
(0x0030 <= $self->{nc} and # 0 |
| 5032 |
$self->{nc} <= 0x007A) or # z |
$self->{nc} <= 0x0039) or # 9 |
| 5033 |
(0x0030 <= $self->{nc} and # 0 |
$self->{nc} == 0x003B or # ; |
| 5034 |
$self->{nc} <= 0x0039) or # 9 |
($self->{is_xml} and |
| 5035 |
$self->{nc} == 0x003B)) { # ; |
not ($is_space->{$self->{nc}} or |
| 5036 |
|
{ |
| 5037 |
|
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 5038 |
|
$self->{entity_add} => 1, |
| 5039 |
|
}->{$self->{nc}}))) { |
| 5040 |
our $EntityChar; |
our $EntityChar; |
| 5041 |
$self->{kwd} .= chr $self->{nc}; |
$self->{kwd} .= chr $self->{nc}; |
| 5042 |
if (defined $EntityChar->{$self->{kwd}}) { |
if (defined $EntityChar->{$self->{kwd}} or |
| 5043 |
|
$self->{ge}->{$self->{kwd}}) { |
| 5044 |
if ($self->{nc} == 0x003B) { # ; |
if ($self->{nc} == 0x003B) { # ; |
| 5045 |
|
if (defined $self->{ge}->{$self->{kwd}}) { |
| 5046 |
$self->{entity__value} = $EntityChar->{$self->{kwd}}; |
if ($self->{ge}->{$self->{kwd}}->{only_text}) { |
| 5047 |
|
|
| 5048 |
|
$self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value}; |
| 5049 |
|
} else { |
| 5050 |
|
if (defined $self->{ge}->{$self->{kwd}}->{notation}) { |
| 5051 |
|
|
| 5052 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type |
| 5053 |
|
value => $self->{kwd}); |
| 5054 |
|
} else { |
| 5055 |
|
|
| 5056 |
|
} |
| 5057 |
|
$self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand |
| 5058 |
|
} |
| 5059 |
|
} else { |
| 5060 |
|
if ($self->{is_xml}) { |
| 5061 |
|
|
| 5062 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type |
| 5063 |
|
value => $self->{kwd}, |
| 5064 |
|
level => { |
| 5065 |
|
'amp;' => $self->{level}->{warn}, |
| 5066 |
|
'quot;' => $self->{level}->{warn}, |
| 5067 |
|
'lt;' => $self->{level}->{warn}, |
| 5068 |
|
'gt;' => $self->{level}->{warn}, |
| 5069 |
|
'apos;' => $self->{level}->{warn}, |
| 5070 |
|
}->{$self->{kwd}} || |
| 5071 |
|
$self->{level}->{must}); |
| 5072 |
|
} else { |
| 5073 |
|
|
| 5074 |
|
} |
| 5075 |
|
$self->{entity__value} = $EntityChar->{$self->{kwd}}; |
| 5076 |
|
} |
| 5077 |
$self->{entity__match} = 1; |
$self->{entity__match} = 1; |
| 5078 |
|
|
| 5079 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 7902 |
redo A; |
redo A; |
| 7903 |
} |
} |
| 7904 |
} elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) { |
|
## TODO: XMLize |
|
|
|
|
| 7905 |
if ($is_space->{$self->{nc}} or |
if ($is_space->{$self->{nc}} or |
| 7906 |
{ |
{ |
| 7907 |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 7908 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 7909 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 7910 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero', |
| 7911 |
|
line => $self->{line_prev}, |
| 7912 |
|
column => $self->{column_prev} |
| 7913 |
|
+ ($self->{nc} == -1 ? 1 : 0)); |
| 7914 |
## Don't consume |
## Don't consume |
|
## No error |
|
| 7915 |
## Return nothing. |
## Return nothing. |
| 7916 |
# |
# |
| 7917 |
} elsif ($self->{nc} == 0x0023) { # # |
} elsif ($self->{nc} == 0x0023) { # # |
| 7930 |
} |
} |
| 7931 |
|
|
| 7932 |
redo A; |
redo A; |
|
} elsif ((0x0041 <= $self->{nc} and |
|
|
$self->{nc} <= 0x005A) or # A..Z |
|
|
(0x0061 <= $self->{nc} and |
|
|
$self->{nc} <= 0x007A)) { # a..z |
|
|
# |
|
| 7933 |
} else { |
} else { |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero'); |
|
|
## Return nothing. |
|
| 7934 |
# |
# |
| 7935 |
} |
} |
| 7936 |
|
|