177 |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 } |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 } |
178 |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 } |
sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 } |
179 |
sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 } |
sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 } |
180 |
sub BOGUS_MD_STATE () { 85 } |
sub BEFORE_NDATA_STATE () { 85 } |
181 |
|
sub NDATA_STATE () { 86 } |
182 |
|
sub AFTER_NDATA_STATE () { 87 } |
183 |
|
sub BEFORE_NOTATION_NAME_STATE () { 88 } |
184 |
|
sub NOTATION_NAME_STATE () { 89 } |
185 |
|
sub AFTER_NOTATION_NAME_STATE () { 90 } |
186 |
|
sub BOGUS_MD_STATE () { 91 } |
187 |
|
|
188 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
189 |
## list and descriptions) |
## list and descriptions) |
2818 |
} |
} |
2819 |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
2820 |
if ($is_space->{$self->{nc}}) { |
if ($is_space->{$self->{nc}}) { |
2821 |
!!!cp (215); |
if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) { |
2822 |
## Stay in the state |
!!!cp (215.1); |
2823 |
|
$self->{state} = BEFORE_NDATA_STATE; |
2824 |
|
} else { |
2825 |
|
!!!cp (215); |
2826 |
|
## Stay in the state |
2827 |
|
} |
2828 |
!!!next-input-character; |
!!!next-input-character; |
2829 |
redo A; |
redo A; |
2830 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2840 |
!!!next-input-character; |
!!!next-input-character; |
2841 |
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
!!!emit ($self->{ct}); # DOCTYPE/ENTITY/NOTATION |
2842 |
redo A; |
redo A; |
2843 |
## TODO: "NDATA" |
} elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and |
2844 |
|
($self->{nc} == 0x004E or # N |
2845 |
|
$self->{nc} == 0x006E)) { # n |
2846 |
|
!!!cp (216.2); |
2847 |
|
!!!parse-error (type => 'no space before NDATA'); ## TODO: type |
2848 |
|
$self->{state} = NDATA_STATE; |
2849 |
|
$self->{kwd} = chr $self->{nc}; |
2850 |
|
!!!next-input-character; |
2851 |
|
redo A; |
2852 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2853 |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
if ($self->{ct}->{type} == DOCTYPE_TOKEN) { |
2854 |
!!!cp (217); |
!!!cp (217); |
2890 |
!!!next-input-character; |
!!!next-input-character; |
2891 |
redo A; |
redo A; |
2892 |
} |
} |
2893 |
|
} elsif ($self->{state} == BEFORE_NDATA_STATE) { |
2894 |
|
if ($is_space->{$self->{nc}}) { |
2895 |
|
!!!cp (218.3); |
2896 |
|
## Stay in the state. |
2897 |
|
!!!next-input-character; |
2898 |
|
redo A; |
2899 |
|
} elsif ($self->{nc} == 0x003E) { # > |
2900 |
|
!!!cp (218.4); |
2901 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2902 |
|
!!!next-input-character; |
2903 |
|
!!!emit ($self->{ct}); # ENTITY |
2904 |
|
redo A; |
2905 |
|
} elsif ($self->{nc} == 0x004E or # N |
2906 |
|
$self->{nc} == 0x006E) { # n |
2907 |
|
!!!cp (218.5); |
2908 |
|
$self->{state} = NDATA_STATE; |
2909 |
|
$self->{kwd} = chr $self->{nc}; |
2910 |
|
!!!next-input-character; |
2911 |
|
redo A; |
2912 |
|
} elsif ($self->{nc} == -1) { |
2913 |
|
!!!cp (218.6); |
2914 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
2915 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
2916 |
|
## reconsume |
2917 |
|
!!!emit ($self->{ct}); # ENTITY |
2918 |
|
redo A; |
2919 |
|
} else { |
2920 |
|
!!!cp (218.7); |
2921 |
|
!!!parse-error (type => 'string after SYSTEM literal'); |
2922 |
|
$self->{state} = BOGUS_MD_STATE; |
2923 |
|
!!!next-input-character; |
2924 |
|
redo A; |
2925 |
|
} |
2926 |
} elsif ($self->{state} == BOGUS_DOCTYPE_STATE) { |
} elsif ($self->{state} == BOGUS_DOCTYPE_STATE) { |
2927 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
2928 |
!!!cp (219); |
!!!cp (219); |
4554 |
## Reconsume. |
## Reconsume. |
4555 |
redo A; |
redo A; |
4556 |
} |
} |
4557 |
|
} elsif ($self->{state} == NDATA_STATE) { |
4558 |
|
## ASCII case-insensitive |
4559 |
|
if ($self->{nc} == [ |
4560 |
|
undef, |
4561 |
|
0x0044, # D |
4562 |
|
0x0041, # A |
4563 |
|
0x0054, # T |
4564 |
|
]->[length $self->{kwd}] or |
4565 |
|
$self->{nc} == [ |
4566 |
|
undef, |
4567 |
|
0x0064, # d |
4568 |
|
0x0061, # a |
4569 |
|
0x0074, # t |
4570 |
|
]->[length $self->{kwd}]) { |
4571 |
|
!!!cp (172.2); |
4572 |
|
## Stay in the state. |
4573 |
|
$self->{kwd} .= chr $self->{nc}; |
4574 |
|
!!!next-input-character; |
4575 |
|
redo A; |
4576 |
|
} elsif ((length $self->{kwd}) == 4 and |
4577 |
|
($self->{nc} == 0x0041 or # A |
4578 |
|
$self->{nc} == 0x0061)) { # a |
4579 |
|
if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a |
4580 |
|
!!!cp (172.3); |
4581 |
|
!!!parse-error (type => 'lowercase keyword', ## TODO: type |
4582 |
|
text => 'NDATA', |
4583 |
|
line => $self->{line_prev}, |
4584 |
|
column => $self->{column_prev} - 4); |
4585 |
|
} else { |
4586 |
|
!!!cp (172.4); |
4587 |
|
} |
4588 |
|
$self->{state} = AFTER_NDATA_STATE; |
4589 |
|
!!!next-input-character; |
4590 |
|
redo A; |
4591 |
|
} else { |
4592 |
|
!!!parse-error (type => 'string after literal', ## TODO: type |
4593 |
|
line => $self->{line_prev}, |
4594 |
|
column => $self->{column_prev} + 1 |
4595 |
|
- length $self->{kwd}); |
4596 |
|
!!!cp (172.5); |
4597 |
|
$self->{state} = BOGUS_MD_STATE; |
4598 |
|
## Reconsume. |
4599 |
|
redo A; |
4600 |
|
} |
4601 |
|
} elsif ($self->{state} == AFTER_NDATA_STATE) { |
4602 |
|
if ($is_space->{$self->{nc}}) { |
4603 |
|
$self->{state} = BEFORE_NOTATION_NAME_STATE; |
4604 |
|
!!!next-input-character; |
4605 |
|
redo A; |
4606 |
|
} elsif ($self->{nc} == 0x003E) { # > |
4607 |
|
!!!parse-error (type => 'no notation name'); ## TODO: type |
4608 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4609 |
|
!!!next-input-character; |
4610 |
|
!!!emit ($self->{ct}); # ENTITY |
4611 |
|
redo A; |
4612 |
|
} elsif ($self->{nc} == -1) { |
4613 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
4614 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4615 |
|
!!!next-input-character; |
4616 |
|
!!!emit ($self->{ct}); # ENTITY |
4617 |
|
redo A; |
4618 |
|
} else { |
4619 |
|
!!!parse-error (type => 'string after literal', ## TODO: type |
4620 |
|
line => $self->{line_prev}, |
4621 |
|
column => $self->{column_prev} + 1 |
4622 |
|
- length $self->{kwd}); |
4623 |
|
$self->{state} = BOGUS_MD_STATE; |
4624 |
|
## Reconsume. |
4625 |
|
redo A; |
4626 |
|
} |
4627 |
|
} elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) { |
4628 |
|
if ($is_space->{$self->{nc}}) { |
4629 |
|
## Stay in the state. |
4630 |
|
!!!next-input-character; |
4631 |
|
redo A; |
4632 |
|
} elsif ($self->{nc} == 0x003E) { # > |
4633 |
|
!!!parse-error (type => 'no notation name'); ## TODO: type |
4634 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4635 |
|
!!!next-input-character; |
4636 |
|
!!!emit ($self->{ct}); # ENTITY |
4637 |
|
redo A; |
4638 |
|
} elsif ($self->{nc} == -1) { |
4639 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
4640 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4641 |
|
!!!next-input-character; |
4642 |
|
!!!emit ($self->{ct}); # ENTITY |
4643 |
|
redo A; |
4644 |
|
} else { |
4645 |
|
$self->{ct}->{notation} = chr $self->{nc}; # ENTITY |
4646 |
|
$self->{state} = NOTATION_NAME_STATE; |
4647 |
|
!!!next-input-character; |
4648 |
|
redo A; |
4649 |
|
} |
4650 |
|
} elsif ($self->{state} == NOTATION_NAME_STATE) { |
4651 |
|
if ($is_space->{$self->{nc}}) { |
4652 |
|
$self->{state} = AFTER_NOTATION_NAME_STATE; |
4653 |
|
!!!next-input-character; |
4654 |
|
redo A; |
4655 |
|
} elsif ($self->{nc} == 0x003E) { # > |
4656 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4657 |
|
!!!next-input-character; |
4658 |
|
!!!emit ($self->{ct}); # ENTITY |
4659 |
|
redo A; |
4660 |
|
} elsif ($self->{nc} == -1) { |
4661 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
4662 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4663 |
|
!!!next-input-character; |
4664 |
|
!!!emit ($self->{ct}); # ENTITY |
4665 |
|
redo A; |
4666 |
|
} else { |
4667 |
|
$self->{ct}->{notation} .= chr $self->{nc}; # ENTITY |
4668 |
|
## Stay in the state. |
4669 |
|
!!!next-input-character; |
4670 |
|
redo A; |
4671 |
|
} |
4672 |
|
} elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) { |
4673 |
|
if ($is_space->{$self->{nc}}) { |
4674 |
|
## Stay in the state. |
4675 |
|
!!!next-input-character; |
4676 |
|
redo A; |
4677 |
|
} elsif ($self->{nc} == 0x003E) { # > |
4678 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4679 |
|
!!!next-input-character; |
4680 |
|
!!!emit ($self->{ct}); # ENTITY |
4681 |
|
redo A; |
4682 |
|
} elsif ($self->{nc} == -1) { |
4683 |
|
!!!parse-error (type => 'unclosed md'); ## TODO: type |
4684 |
|
$self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; |
4685 |
|
!!!next-input-character; |
4686 |
|
!!!emit ($self->{ct}); # ENTITY |
4687 |
|
redo A; |
4688 |
|
} else { |
4689 |
|
!!!parse-error (type => 'string after notation name'); ## TODO: type |
4690 |
|
$self->{state} = BOGUS_MD_STATE; |
4691 |
|
## Reconsume. |
4692 |
|
redo A; |
4693 |
|
} |
4694 |
|
|
4695 |
|
|
4696 |
} elsif ($self->{state} == BOGUS_MD_STATE) { |
} elsif ($self->{state} == BOGUS_MD_STATE) { |
4697 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |