183 |
sub BEFORE_NOTATION_NAME_STATE () { 88 } |
sub BEFORE_NOTATION_NAME_STATE () { 88 } |
184 |
sub NOTATION_NAME_STATE () { 89 } |
sub NOTATION_NAME_STATE () { 89 } |
185 |
sub AFTER_NOTATION_NAME_STATE () { 90 } |
sub AFTER_NOTATION_NAME_STATE () { 90 } |
186 |
sub BOGUS_MD_STATE () { 91 } |
sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 91 } |
187 |
|
sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 92 } |
188 |
|
sub ENTITY_VALUE_ENTITY_STATE () { 93 } |
189 |
|
sub BOGUS_MD_STATE () { 94 } |
190 |
|
|
191 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
192 |
## list and descriptions) |
## list and descriptions) |
2265 |
$self->{kwd} = chr $self->{nc}; |
$self->{kwd} = chr $self->{nc}; |
2266 |
!!!next-input-character; |
!!!next-input-character; |
2267 |
redo A; |
redo A; |
2268 |
## TODO: " and ' for ENTITY |
} elsif ($self->{nc} == 0x0022 and # " |
2269 |
|
($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or |
2270 |
|
$self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) { |
2271 |
|
!!!cp (167.21); |
2272 |
|
$self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE; |
2273 |
|
$self->{ct}->{value} = ''; # ENTITY |
2274 |
|
!!!next-input-character; |
2275 |
|
redo A; |
2276 |
|
} elsif ($self->{nc} == 0x0027 and # ' |
2277 |
|
($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or |
2278 |
|
$self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) { |
2279 |
|
!!!cp (167.22); |
2280 |
|
$self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE; |
2281 |
|
$self->{ct}->{value} = ''; # ENTITY |
2282 |
|
!!!next-input-character; |
2283 |
|
redo A; |
2284 |
} elsif ($self->{is_xml} and |
} elsif ($self->{is_xml} and |
2285 |
$self->{ct}->{type} == DOCTYPE_TOKEN and |
$self->{ct}->{type} == DOCTYPE_TOKEN and |
2286 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
4687 |
!!!next-input-character; |
!!!next-input-character; |
4688 |
redo A; |
redo A; |
4689 |
} |
} |
4690 |
|
} elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) { |
4691 |
|
if ($self->{nc} == 0x0022) { # " |
4692 |
|
$self->{state} = AFTER_NOTATION_NAME_STATE; |
4693 |
|
!!!next-input-character; |
4694 |
|
redo A; |
4695 |
|
} elsif ($self->{nc} == 0x0026) { # & |
4696 |
|
$self->{prev_state} = $self->{state}; |
4697 |
|
$self->{state} = ENTITY_VALUE_ENTITY_STATE; |
4698 |
|
$self->{entity_add} = 0x0022; # " |
4699 |
|
!!!next-input-character; |
4700 |
|
redo A; |
4701 |
|
## TODO: % |
4702 |
|
} elsif ($self->{nc} == -1) { |
4703 |
|
!!!parse-error (type => 'unclosed entity value'); ## TODO: type |
4704 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4705 |
|
## Reconsume. |
4706 |
|
!!!emit ($self->{ct}); # ENTITY |
4707 |
|
redo A; |
4708 |
|
} else { |
4709 |
|
$self->{ct}->{value} .= chr $self->{nc}; # ENTITY |
4710 |
|
!!!next-input-character; |
4711 |
|
redo A; |
4712 |
|
} |
4713 |
|
} elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) { |
4714 |
|
if ($self->{nc} == 0x0027) { # ' |
4715 |
|
$self->{state} = AFTER_NOTATION_NAME_STATE; |
4716 |
|
!!!next-input-character; |
4717 |
|
redo A; |
4718 |
|
} elsif ($self->{nc} == 0x0026) { # & |
4719 |
|
$self->{prev_state} = $self->{state}; |
4720 |
|
$self->{state} = ENTITY_VALUE_ENTITY_STATE; |
4721 |
|
$self->{entity_add} = 0x0027; # ' |
4722 |
|
!!!next-input-character; |
4723 |
|
redo A; |
4724 |
|
## TODO: % |
4725 |
|
} elsif ($self->{nc} == -1) { |
4726 |
|
!!!parse-error (type => 'unclosed entity value'); ## TODO: type |
4727 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4728 |
|
## Reconsume. |
4729 |
|
!!!emit ($self->{ct}); # ENTITY |
4730 |
|
redo A; |
4731 |
|
} else { |
4732 |
|
$self->{ct}->{value} .= chr $self->{nc}; # ENTITY |
4733 |
|
!!!next-input-character; |
4734 |
|
redo A; |
4735 |
|
} |
4736 |
|
} elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) { |
4737 |
|
## TODO: XMLize |
4738 |
|
|
4739 |
|
if ($is_space->{$self->{nc}} or |
4740 |
|
{ |
4741 |
|
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
4742 |
|
$self->{entity_add} => 1, |
4743 |
|
}->{$self->{nc}}) { |
4744 |
|
## Don't consume |
4745 |
|
## No error |
4746 |
|
## Return nothing. |
4747 |
|
# |
4748 |
|
} elsif ($self->{nc} == 0x0023) { # # |
4749 |
|
$self->{ca} = $self->{ct}; |
4750 |
|
$self->{state} = ENTITY_HASH_STATE; |
4751 |
|
$self->{kwd} = '#'; |
4752 |
|
!!!next-input-character; |
4753 |
|
redo A; |
4754 |
|
} elsif ((0x0041 <= $self->{nc} and |
4755 |
|
$self->{nc} <= 0x005A) or # A..Z |
4756 |
|
(0x0061 <= $self->{nc} and |
4757 |
|
$self->{nc} <= 0x007A)) { # a..z |
4758 |
|
# |
4759 |
|
} else { |
4760 |
|
!!!parse-error (type => 'bare ero'); |
4761 |
|
## Return nothing. |
4762 |
|
# |
4763 |
|
} |
4764 |
|
|
4765 |
|
$self->{ct}->{value} .= '&'; |
4766 |
|
$self->{state} = $self->{prev_state}; |
4767 |
|
## Reconsume. |
4768 |
|
redo A; |
4769 |
} elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) { |
} elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) { |
4770 |
if ($is_space->{$self->{nc}}) { |
if ($is_space->{$self->{nc}}) { |
4771 |
## Stay in the state. |
## Stay in the state. |
4788 |
## Reconsume. |
## Reconsume. |
4789 |
redo A; |
redo A; |
4790 |
} |
} |
|
|
|
|
|
|
4791 |
} elsif ($self->{state} == BOGUS_MD_STATE) { |
} elsif ($self->{state} == BOGUS_MD_STATE) { |
4792 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
4793 |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |