| 466 |
# Anything else |
# Anything else |
| 467 |
my $token = {type => CHARACTER_TOKEN, |
my $token = {type => CHARACTER_TOKEN, |
| 468 |
data => chr $self->{next_char}, |
data => chr $self->{next_char}, |
| 469 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}, |
| 470 |
|
}; |
| 471 |
## Stay in the data state |
## Stay in the data state |
| 472 |
!!!next-input-character; |
!!!next-input-character; |
| 473 |
|
|
| 487 |
unless (defined $token) { |
unless (defined $token) { |
| 488 |
!!!cp (13); |
!!!cp (13); |
| 489 |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
| 490 |
line => $l, column => $c}); |
line => $l, column => $c, |
| 491 |
|
}); |
| 492 |
} else { |
} else { |
| 493 |
!!!cp (14); |
!!!cp (14); |
| 494 |
!!!emit ($token); |
!!!emit ($token); |
| 509 |
|
|
| 510 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
| 511 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 512 |
column => $self->{column_prev}}); |
column => $self->{column_prev}, |
| 513 |
|
}); |
| 514 |
|
|
| 515 |
redo A; |
redo A; |
| 516 |
} |
} |
| 556 |
|
|
| 557 |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
| 558 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 559 |
column => $self->{column_prev}}); |
column => $self->{column_prev}, |
| 560 |
|
}); |
| 561 |
|
|
| 562 |
redo A; |
redo A; |
| 563 |
} elsif ($self->{next_char} == 0x003F) { # ? |
} elsif ($self->{next_char} == 0x003F) { # ? |
| 568 |
$self->{state} = BOGUS_COMMENT_STATE; |
$self->{state} = BOGUS_COMMENT_STATE; |
| 569 |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
| 570 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 571 |
column => $self->{column_prev}}; |
column => $self->{column_prev}, |
| 572 |
|
}; |
| 573 |
## $self->{next_char} is intentionally left as is |
## $self->{next_char} is intentionally left as is |
| 574 |
redo A; |
redo A; |
| 575 |
} else { |
} else { |
| 580 |
|
|
| 581 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
| 582 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 583 |
column => $self->{column_prev}}); |
column => $self->{column_prev}, |
| 584 |
|
}); |
| 585 |
|
|
| 586 |
redo A; |
redo A; |
| 587 |
} |
} |
| 610 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 611 |
|
|
| 612 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
| 613 |
line => $l, column => $c}); |
line => $l, column => $c, |
| 614 |
|
}); |
| 615 |
|
|
| 616 |
redo A; |
redo A; |
| 617 |
} |
} |
| 631 |
!!!back-next-input-character (@next_char); |
!!!back-next-input-character (@next_char); |
| 632 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 633 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
| 634 |
line => $l, column => $c}); |
line => $l, column => $c, |
| 635 |
|
}); |
| 636 |
redo A; |
redo A; |
| 637 |
} else { |
} else { |
| 638 |
!!!cp (27); |
!!!cp (27); |
| 646 |
# next-input-character is already done |
# next-input-character is already done |
| 647 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 648 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
| 649 |
line => $l, column => $c}); |
line => $l, column => $c, |
| 650 |
|
}); |
| 651 |
redo A; |
redo A; |
| 652 |
} |
} |
| 653 |
} |
} |
| 686 |
# reconsume |
# reconsume |
| 687 |
|
|
| 688 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
| 689 |
line => $l, column => $c}); |
line => $l, column => $c, |
| 690 |
|
}); |
| 691 |
|
|
| 692 |
redo A; |
redo A; |
| 693 |
} else { |
} else { |
| 696 |
$self->{state} = BOGUS_COMMENT_STATE; |
$self->{state} = BOGUS_COMMENT_STATE; |
| 697 |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
| 698 |
line => $self->{line_prev}, # "<" of "</" |
line => $self->{line_prev}, # "<" of "</" |
| 699 |
column => $self->{column_prev} - 1}; |
column => $self->{column_prev} - 1, |
| 700 |
|
}; |
| 701 |
## $self->{next_char} is intentionally left as is |
## $self->{next_char} is intentionally left as is |
| 702 |
redo A; |
redo A; |
| 703 |
} |
} |
| 714 |
} elsif ($self->{next_char} == 0x003E) { # > |
} elsif ($self->{next_char} == 0x003E) { # > |
| 715 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 716 |
!!!cp (35); |
!!!cp (35); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 717 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 718 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 719 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 745 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 746 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 747 |
!!!cp (39); |
!!!cp (39); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 748 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 749 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 750 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 800 |
} elsif ($self->{next_char} == 0x003E) { # > |
} elsif ($self->{next_char} == 0x003E) { # > |
| 801 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 802 |
!!!cp (46); |
!!!cp (46); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 803 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 804 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 805 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 821 |
} elsif (0x0041 <= $self->{next_char} and |
} elsif (0x0041 <= $self->{next_char} and |
| 822 |
$self->{next_char} <= 0x005A) { # A..Z |
$self->{next_char} <= 0x005A) { # A..Z |
| 823 |
!!!cp (49); |
!!!cp (49); |
| 824 |
$self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020), |
$self->{current_attribute} |
| 825 |
value => ''}; |
= {name => chr ($self->{next_char} + 0x0020), |
| 826 |
|
value => '', |
| 827 |
|
line => $self->{line}, column => $self->{column}}; |
| 828 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 829 |
!!!next-input-character; |
!!!next-input-character; |
| 830 |
redo A; |
redo A; |
| 847 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 848 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 849 |
!!!cp (52); |
!!!cp (52); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 850 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 851 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 852 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 876 |
} else { |
} else { |
| 877 |
!!!cp (56); |
!!!cp (56); |
| 878 |
} |
} |
| 879 |
$self->{current_attribute} = {name => chr ($self->{next_char}), |
$self->{current_attribute} |
| 880 |
value => ''}; |
= {name => chr ($self->{next_char}), |
| 881 |
|
value => '', |
| 882 |
|
line => $self->{line}, column => $self->{column}}; |
| 883 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 884 |
!!!next-input-character; |
!!!next-input-character; |
| 885 |
redo A; |
redo A; |
| 889 |
if (exists $self->{current_token}->{attributes} # start tag or end tag |
if (exists $self->{current_token}->{attributes} # start tag or end tag |
| 890 |
->{$self->{current_attribute}->{name}}) { # MUST |
->{$self->{current_attribute}->{name}}) { # MUST |
| 891 |
!!!cp (57); |
!!!cp (57); |
| 892 |
!!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}); |
!!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column}); |
| 893 |
## Discard $self->{current_attribute} # MUST |
## Discard $self->{current_attribute} # MUST |
| 894 |
} else { |
} else { |
| 895 |
!!!cp (58); |
!!!cp (58); |
| 918 |
$before_leave->(); |
$before_leave->(); |
| 919 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 920 |
!!!cp (61); |
!!!cp (61); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 921 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 922 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 923 |
!!!cp (62); |
!!!cp (62); |
| 962 |
$before_leave->(); |
$before_leave->(); |
| 963 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 964 |
!!!cp (66); |
!!!cp (66); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 965 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 966 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 967 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1012 |
} elsif ($self->{next_char} == 0x003E) { # > |
} elsif ($self->{next_char} == 0x003E) { # > |
| 1013 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1014 |
!!!cp (73); |
!!!cp (73); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1015 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1016 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1017 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1034 |
} elsif (0x0041 <= $self->{next_char} and |
} elsif (0x0041 <= $self->{next_char} and |
| 1035 |
$self->{next_char} <= 0x005A) { # A..Z |
$self->{next_char} <= 0x005A) { # A..Z |
| 1036 |
!!!cp (76); |
!!!cp (76); |
| 1037 |
$self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020), |
$self->{current_attribute} |
| 1038 |
value => ''}; |
= {name => chr ($self->{next_char} + 0x0020), |
| 1039 |
|
value => '', |
| 1040 |
|
line => $self->{line}, column => $self->{column}}; |
| 1041 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 1042 |
!!!next-input-character; |
!!!next-input-character; |
| 1043 |
redo A; |
redo A; |
| 1061 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 1062 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1063 |
!!!cp (79); |
!!!cp (79); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1064 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1065 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1066 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1082 |
redo A; |
redo A; |
| 1083 |
} else { |
} else { |
| 1084 |
!!!cp (82); |
!!!cp (82); |
| 1085 |
$self->{current_attribute} = {name => chr ($self->{next_char}), |
$self->{current_attribute} |
| 1086 |
value => ''}; |
= {name => chr ($self->{next_char}), |
| 1087 |
|
value => '', |
| 1088 |
|
line => $self->{line}, column => $self->{column}}; |
| 1089 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 1090 |
!!!next-input-character; |
!!!next-input-character; |
| 1091 |
redo A; |
redo A; |
| 1118 |
} elsif ($self->{next_char} == 0x003E) { # > |
} elsif ($self->{next_char} == 0x003E) { # > |
| 1119 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1120 |
!!!cp (87); |
!!!cp (87); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1121 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1122 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1123 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1141 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 1142 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1143 |
!!!cp (90); |
!!!cp (90); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1144 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1145 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1146 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1188 |
!!!parse-error (type => 'unclosed attribute value'); |
!!!parse-error (type => 'unclosed attribute value'); |
| 1189 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1190 |
!!!cp (97); |
!!!cp (97); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1191 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1192 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1193 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1230 |
!!!parse-error (type => 'unclosed attribute value'); |
!!!parse-error (type => 'unclosed attribute value'); |
| 1231 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1232 |
!!!cp (103); |
!!!cp (103); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1233 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1234 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1235 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1275 |
} elsif ($self->{next_char} == 0x003E) { # > |
} elsif ($self->{next_char} == 0x003E) { # > |
| 1276 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1277 |
!!!cp (109); |
!!!cp (109); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1278 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1279 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1280 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1298 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 1299 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1300 |
!!!cp (112); |
!!!cp (112); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1301 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1302 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1303 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1368 |
} elsif ($self->{next_char} == 0x003E) { # > |
} elsif ($self->{next_char} == 0x003E) { # > |
| 1369 |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1370 |
!!!cp (119); |
!!!cp (119); |
|
$self->{current_token}->{first_start_tag} |
|
|
= not defined $self->{last_emitted_start_tag_name}; |
|
| 1371 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1372 |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1373 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1455 |
if ($self->{next_char} == 0x002D) { # - |
if ($self->{next_char} == 0x002D) { # - |
| 1456 |
!!!cp (127); |
!!!cp (127); |
| 1457 |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
| 1458 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 1459 |
|
}; |
| 1460 |
$self->{state} = COMMENT_START_STATE; |
$self->{state} = COMMENT_START_STATE; |
| 1461 |
!!!next-input-character; |
!!!next-input-character; |
| 1462 |
redo A; |
redo A; |
| 1494 |
$self->{state} = DOCTYPE_STATE; |
$self->{state} = DOCTYPE_STATE; |
| 1495 |
$self->{current_token} = {type => DOCTYPE_TOKEN, |
$self->{current_token} = {type => DOCTYPE_TOKEN, |
| 1496 |
quirks => 1, |
quirks => 1, |
| 1497 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 1498 |
|
}; |
| 1499 |
!!!next-input-character; |
!!!next-input-character; |
| 1500 |
redo A; |
redo A; |
| 1501 |
} else { |
} else { |
| 1525 |
!!!back-next-input-character (@next_char); |
!!!back-next-input-character (@next_char); |
| 1526 |
$self->{state} = BOGUS_COMMENT_STATE; |
$self->{state} = BOGUS_COMMENT_STATE; |
| 1527 |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
$self->{current_token} = {type => COMMENT_TOKEN, data => '', |
| 1528 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 1529 |
|
}; |
| 1530 |
redo A; |
redo A; |
| 1531 |
|
|
| 1532 |
## ISSUE: typos in spec: chacacters, is is a parse error |
## ISSUE: typos in spec: chacacters, is is a parse error |
| 2324 |
} |
} |
| 2325 |
|
|
| 2326 |
return {type => CHARACTER_TOKEN, data => chr $code, |
return {type => CHARACTER_TOKEN, data => chr $code, |
| 2327 |
has_reference => 1, line => $l, column => $c}; |
has_reference => 1, |
| 2328 |
|
line => $l, column => $c, |
| 2329 |
|
}; |
| 2330 |
} # X |
} # X |
| 2331 |
} elsif (0x0030 <= $self->{next_char} and |
} elsif (0x0030 <= $self->{next_char} and |
| 2332 |
$self->{next_char} <= 0x0039) { # 0..9 |
$self->{next_char} <= 0x0039) { # 0..9 |
| 2369 |
} |
} |
| 2370 |
|
|
| 2371 |
return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1, |
return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1, |
| 2372 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 2373 |
|
}; |
| 2374 |
} else { |
} else { |
| 2375 |
!!!cp (1019); |
!!!cp (1019); |
| 2376 |
!!!parse-error (type => 'bare nero', line => $l, column => $c); |
!!!parse-error (type => 'bare nero', line => $l, column => $c); |
| 2424 |
if ($match > 0) { |
if ($match > 0) { |
| 2425 |
!!!cp (1023); |
!!!cp (1023); |
| 2426 |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1, |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1, |
| 2427 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 2428 |
|
}; |
| 2429 |
} elsif ($match < 0) { |
} elsif ($match < 0) { |
| 2430 |
!!!parse-error (type => 'no refc', line => $l, column => $c); |
!!!parse-error (type => 'no refc', line => $l, column => $c); |
| 2431 |
if ($in_attr and $match < -1) { |
if ($in_attr and $match < -1) { |
| 2432 |
!!!cp (1024); |
!!!cp (1024); |
| 2433 |
return {type => CHARACTER_TOKEN, data => '&'.$entity_name, |
return {type => CHARACTER_TOKEN, data => '&'.$entity_name, |
| 2434 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 2435 |
|
}; |
| 2436 |
} else { |
} else { |
| 2437 |
!!!cp (1025); |
!!!cp (1025); |
| 2438 |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1, |
return {type => CHARACTER_TOKEN, data => $value, has_reference => 1, |
| 2439 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 2440 |
|
}; |
| 2441 |
} |
} |
| 2442 |
} else { |
} else { |
| 2443 |
!!!cp (1026); |
!!!cp (1026); |
| 2444 |
!!!parse-error (type => 'bare ero', line => $l, column => $c); |
!!!parse-error (type => 'bare ero', line => $l, column => $c); |
| 2445 |
## NOTE: "No characters are consumed" in the spec. |
## NOTE: "No characters are consumed" in the spec. |
| 2446 |
return {type => CHARACTER_TOKEN, data => '&'.$value, |
return {type => CHARACTER_TOKEN, data => '&'.$value, |
| 2447 |
line => $l, column => $c}; |
line => $l, column => $c, |
| 2448 |
|
}; |
| 2449 |
} |
} |
| 2450 |
} else { |
} else { |
| 2451 |
!!!cp (1027); |
!!!cp (1027); |
| 2756 |
!!!cp ('t24'); |
!!!cp ('t24'); |
| 2757 |
$self->{application_cache_selection} |
$self->{application_cache_selection} |
| 2758 |
->($token->{attributes}->{manifest}->{value}); |
->($token->{attributes}->{manifest}->{value}); |
| 2759 |
## ISSUE: No relative reference resolution? |
## ISSUE: Spec is unclear on relative references. |
| 2760 |
|
## According to Hixie (#whatwg 2008-03-19), it should be |
| 2761 |
|
## resolved against the base URI of the document in HTML |
| 2762 |
|
## or xml:base of the element in XHTML. |
| 2763 |
} else { |
} else { |
| 2764 |
!!!cp ('t25'); |
!!!cp ('t25'); |
| 2765 |
$self->{application_cache_selection}->(undef); |
$self->{application_cache_selection}->(undef); |
| 5940 |
if ($prompt_attr) { |
if ($prompt_attr) { |
| 5941 |
!!!cp ('t390'); |
!!!cp ('t390'); |
| 5942 |
push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value}, |
push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value}, |
| 5943 |
line => $token->{line}, column => $token->{column}}; |
#line => $token->{line}, column => $token->{column}, |
| 5944 |
|
}; |
| 5945 |
} else { |
} else { |
| 5946 |
!!!cp ('t391'); |
!!!cp ('t391'); |
| 5947 |
push @tokens, {type => CHARACTER_TOKEN, |
push @tokens, {type => CHARACTER_TOKEN, |
| 5948 |
data => 'This is a searchable index. Insert your search keywords here: ', |
data => 'This is a searchable index. Insert your search keywords here: ', |
| 5949 |
line => $token->{line}, column => $token->{column}}; # SHOULD |
#line => $token->{line}, column => $token->{column}, |
| 5950 |
|
}; # SHOULD |
| 5951 |
## TODO: make this configurable |
## TODO: make this configurable |
| 5952 |
} |
} |
| 5953 |
push @tokens, |
push @tokens, |