| 4703 |
redo A; |
redo A; |
| 4704 |
} |
} |
| 4705 |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
| 4706 |
if ($self->{nc} == 0x0078 or # x |
if ($self->{nc} == 0x0078) { # x |
|
$self->{nc} == 0x0058) { # X |
|
| 4707 |
|
|
| 4708 |
$self->{state} = HEXREF_X_STATE; |
$self->{state} = HEXREF_X_STATE; |
| 4709 |
$self->{kwd} .= chr $self->{nc}; |
$self->{kwd} .= chr $self->{nc}; |
| 4719 |
} |
} |
| 4720 |
|
|
| 4721 |
redo A; |
redo A; |
| 4722 |
|
} elsif ($self->{nc} == 0x0058) { # X |
| 4723 |
|
|
| 4724 |
|
if ($self->{is_xml}) { |
| 4725 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type |
| 4726 |
|
} |
| 4727 |
|
$self->{state} = HEXREF_X_STATE; |
| 4728 |
|
$self->{kwd} .= chr $self->{nc}; |
| 4729 |
|
|
| 4730 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 4731 |
|
$self->{line_prev} = $self->{line}; |
| 4732 |
|
$self->{column_prev} = $self->{column}; |
| 4733 |
|
$self->{column}++; |
| 4734 |
|
$self->{nc} |
| 4735 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 4736 |
|
} else { |
| 4737 |
|
$self->{set_nc}->($self); |
| 4738 |
|
} |
| 4739 |
|
|
| 4740 |
|
redo A; |
| 4741 |
} elsif (0x0030 <= $self->{nc} and |
} elsif (0x0030 <= $self->{nc} and |
| 4742 |
$self->{nc} <= 0x0039) { # 0..9 |
$self->{nc} <= 0x0039) { # 0..9 |
| 4743 |
|
|
| 5015 |
redo A; |
redo A; |
| 5016 |
} |
} |
| 5017 |
} elsif ($self->{state} == ENTITY_NAME_STATE) { |
} elsif ($self->{state} == ENTITY_NAME_STATE) { |
| 5018 |
if (length $self->{kwd} < 30 and |
if ((0x0041 <= $self->{nc} and # a |
| 5019 |
## NOTE: Some number greater than the maximum length of entity name |
$self->{nc} <= 0x005A) or # x |
| 5020 |
((0x0041 <= $self->{nc} and # a |
(0x0061 <= $self->{nc} and # a |
| 5021 |
$self->{nc} <= 0x005A) or # x |
$self->{nc} <= 0x007A) or # z |
| 5022 |
(0x0061 <= $self->{nc} and # a |
(0x0030 <= $self->{nc} and # 0 |
| 5023 |
$self->{nc} <= 0x007A) or # z |
$self->{nc} <= 0x0039) or # 9 |
| 5024 |
(0x0030 <= $self->{nc} and # 0 |
$self->{nc} == 0x003B) { # ; |
|
$self->{nc} <= 0x0039) or # 9 |
|
|
$self->{nc} == 0x003B)) { # ; |
|
| 5025 |
our $EntityChar; |
our $EntityChar; |
| 5026 |
$self->{kwd} .= chr $self->{nc}; |
$self->{kwd} .= chr $self->{nc}; |
| 5027 |
if (defined $EntityChar->{$self->{kwd}}) { |
if (defined $EntityChar->{$self->{kwd}} or |
| 5028 |
|
$self->{ge}->{$self->{kwd}}) { |
| 5029 |
if ($self->{nc} == 0x003B) { # ; |
if ($self->{nc} == 0x003B) { # ; |
| 5030 |
|
if (defined $self->{ge}->{$self->{kwd}}) { |
| 5031 |
$self->{entity__value} = $EntityChar->{$self->{kwd}}; |
if ($self->{ge}->{$self->{kwd}}->{only_text}) { |
| 5032 |
|
|
| 5033 |
|
$self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value}; |
| 5034 |
|
} else { |
| 5035 |
|
if (defined $self->{ge}->{$self->{kwd}}->{notation}) { |
| 5036 |
|
|
| 5037 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type |
| 5038 |
|
value => $self->{kwd}); |
| 5039 |
|
} else { |
| 5040 |
|
|
| 5041 |
|
} |
| 5042 |
|
$self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand |
| 5043 |
|
} |
| 5044 |
|
} else { |
| 5045 |
|
if ($self->{is_xml}) { |
| 5046 |
|
|
| 5047 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type |
| 5048 |
|
value => $self->{kwd}, |
| 5049 |
|
level => { |
| 5050 |
|
'amp;' => $self->{level}->{warn}, |
| 5051 |
|
'quot;' => $self->{level}->{warn}, |
| 5052 |
|
'lt;' => $self->{level}->{warn}, |
| 5053 |
|
'gt;' => $self->{level}->{warn}, |
| 5054 |
|
'apos;' => $self->{level}->{warn}, |
| 5055 |
|
}->{$self->{kwd}} || |
| 5056 |
|
$self->{level}->{must}); |
| 5057 |
|
} else { |
| 5058 |
|
|
| 5059 |
|
} |
| 5060 |
|
$self->{entity__value} = $EntityChar->{$self->{kwd}}; |
| 5061 |
|
} |
| 5062 |
$self->{entity__match} = 1; |
$self->{entity__match} = 1; |
| 5063 |
|
|
| 5064 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |