| 8 |
## doc.write (''); |
## doc.write (''); |
| 9 |
## alert (doc.compatMode); |
## alert (doc.compatMode); |
| 10 |
|
|
|
## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT |
|
|
## strip BOM and the HTML layer MUST ignore it. Whether we can do it |
|
|
## is not yet clear. |
|
|
## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters? |
|
|
## "{U+FEFF}..." in GB18030? |
|
|
|
|
| 11 |
## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263) |
## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263) |
| 12 |
## TODO: 1252 parse error (revision 1264) |
## TODO: 1252 parse error (revision 1264) |
| 13 |
## TODO: 8859-11 = 874 (revision 1271) |
## TODO: 8859-11 = 874 (revision 1271) |
| 18 |
meta => 1, |
meta => 1, |
| 19 |
hr => 1, |
hr => 1, |
| 20 |
br => 1, |
br => 1, |
| 21 |
img=> 1, |
img => 1, |
| 22 |
embed => 1, |
embed => 1, |
| 23 |
param => 1, |
param => 1, |
| 24 |
area => 1, |
area => 1, |
| 153 |
return $return; |
return $return; |
| 154 |
} # parse_byte_string |
} # parse_byte_string |
| 155 |
|
|
| 156 |
|
## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM |
| 157 |
|
## and the HTML layer MUST ignore it. However, we does strip BOM in |
| 158 |
|
## the encoding layer and the HTML layer does not ignore any U+FEFF, |
| 159 |
|
## because the core part of our HTML parser expects a string of character, |
| 160 |
|
## not a string of bytes or code units or anything which might contain a BOM. |
| 161 |
|
## Therefore, any parser interface that accepts a string of bytes, |
| 162 |
|
## such as |parse_byte_string| in this module, must ensure that it does |
| 163 |
|
## strip the BOM and never strip any ZWNBSP. |
| 164 |
|
|
| 165 |
*parse_char_string = \&parse_string; |
*parse_char_string = \&parse_string; |
| 166 |
|
|
| 167 |
sub parse_string ($$$;$) { |
sub parse_string ($$$;$) { |
| 286 |
sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 } |
sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 } |
| 287 |
sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 } |
sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 } |
| 288 |
sub BOGUS_DOCTYPE_STATE () { 32 } |
sub BOGUS_DOCTYPE_STATE () { 32 } |
| 289 |
|
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
| 290 |
|
|
| 291 |
sub DOCTYPE_TOKEN () { 1 } |
sub DOCTYPE_TOKEN () { 1 } |
| 292 |
sub COMMENT_TOKEN () { 2 } |
sub COMMENT_TOKEN () { 2 } |
| 346 |
## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN) |
## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN) |
| 347 |
## ->{public_identifier} (DOCTYPE_TOKEN) |
## ->{public_identifier} (DOCTYPE_TOKEN) |
| 348 |
## ->{system_identifier} (DOCTYPE_TOKEN) |
## ->{system_identifier} (DOCTYPE_TOKEN) |
| 349 |
## ->{correct} == 1 or 0 (DOCTYPE_TOKEN) |
## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag |
| 350 |
## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN) |
## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN) |
| 351 |
## ->{name} |
## ->{name} |
| 352 |
## ->{value} |
## ->{value} |
| 385 |
A: { |
A: { |
| 386 |
if ($self->{state} == DATA_STATE) { |
if ($self->{state} == DATA_STATE) { |
| 387 |
if ($self->{next_input_character} == 0x0026) { # & |
if ($self->{next_input_character} == 0x0026) { # & |
| 388 |
if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
| 389 |
|
not $self->{escape}) { |
| 390 |
$self->{state} = ENTITY_DATA_STATE; |
$self->{state} = ENTITY_DATA_STATE; |
| 391 |
!!!next-input-character; |
!!!next-input-character; |
| 392 |
redo A; |
redo A; |
| 441 |
} elsif ($self->{state} == ENTITY_DATA_STATE) { |
} elsif ($self->{state} == ENTITY_DATA_STATE) { |
| 442 |
## (cannot happen in CDATA state) |
## (cannot happen in CDATA state) |
| 443 |
|
|
| 444 |
my $token = $self->_tokenize_attempt_to_consume_an_entity (0); |
my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1); |
| 445 |
|
|
| 446 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 447 |
# next-input-character is already done |
# next-input-character is already done |
| 744 |
|
|
| 745 |
redo A; |
redo A; |
| 746 |
} else { |
} else { |
| 747 |
|
if ({ |
| 748 |
|
0x0022 => 1, # " |
| 749 |
|
0x0027 => 1, # ' |
| 750 |
|
0x003D => 1, # = |
| 751 |
|
}->{$self->{next_input_character}}) { |
| 752 |
|
!!!parse-error (type => 'bad attribute name'); |
| 753 |
|
} |
| 754 |
$self->{current_attribute} = {name => chr ($self->{next_input_character}), |
$self->{current_attribute} = {name => chr ($self->{next_input_character}), |
| 755 |
value => ''}; |
value => ''}; |
| 756 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 845 |
|
|
| 846 |
redo A; |
redo A; |
| 847 |
} else { |
} else { |
| 848 |
|
if ($self->{next_input_character} == 0x0022 or # " |
| 849 |
|
$self->{next_input_character} == 0x0027) { # ' |
| 850 |
|
!!!parse-error (type => 'bad attribute name'); |
| 851 |
|
} |
| 852 |
$self->{current_attribute}->{name} .= chr ($self->{next_input_character}); |
$self->{current_attribute}->{name} .= chr ($self->{next_input_character}); |
| 853 |
## Stay in the state |
## Stay in the state |
| 854 |
!!!next-input-character; |
!!!next-input-character; |
| 995 |
|
|
| 996 |
redo A; |
redo A; |
| 997 |
} else { |
} else { |
| 998 |
|
if ($self->{next_input_character} == 0x003D) { # = |
| 999 |
|
!!!parse-error (type => 'bad attribute value'); |
| 1000 |
|
} |
| 1001 |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
| 1002 |
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
| 1003 |
!!!next-input-character; |
!!!next-input-character; |
| 1005 |
} |
} |
| 1006 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { |
| 1007 |
if ($self->{next_input_character} == 0x0022) { # " |
if ($self->{next_input_character} == 0x0022) { # " |
| 1008 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
| 1009 |
!!!next-input-character; |
!!!next-input-character; |
| 1010 |
redo A; |
redo A; |
| 1011 |
} elsif ($self->{next_input_character} == 0x0026) { # & |
} elsif ($self->{next_input_character} == 0x0026) { # & |
| 1041 |
} |
} |
| 1042 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { |
| 1043 |
if ($self->{next_input_character} == 0x0027) { # ' |
if ($self->{next_input_character} == 0x0027) { # ' |
| 1044 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
| 1045 |
!!!next-input-character; |
!!!next-input-character; |
| 1046 |
redo A; |
redo A; |
| 1047 |
} elsif ($self->{next_input_character} == 0x0026) { # & |
} elsif ($self->{next_input_character} == 0x0026) { # & |
| 1129 |
|
|
| 1130 |
redo A; |
redo A; |
| 1131 |
} else { |
} else { |
| 1132 |
|
if ({ |
| 1133 |
|
0x0022 => 1, # " |
| 1134 |
|
0x0027 => 1, # ' |
| 1135 |
|
0x003D => 1, # = |
| 1136 |
|
}->{$self->{next_input_character}}) { |
| 1137 |
|
!!!parse-error (type => 'bad attribute value'); |
| 1138 |
|
} |
| 1139 |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
| 1140 |
## Stay in the state |
## Stay in the state |
| 1141 |
!!!next-input-character; |
!!!next-input-character; |
| 1142 |
redo A; |
redo A; |
| 1143 |
} |
} |
| 1144 |
} elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) { |
} elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) { |
| 1145 |
my $token = $self->_tokenize_attempt_to_consume_an_entity (1); |
my $token = $self->_tokenize_attempt_to_consume_an_entity |
| 1146 |
|
(1, |
| 1147 |
|
$self->{last_attribute_value_state} |
| 1148 |
|
== ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # " |
| 1149 |
|
$self->{last_attribute_value_state} |
| 1150 |
|
== ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # ' |
| 1151 |
|
-1); |
| 1152 |
|
|
| 1153 |
unless (defined $token) { |
unless (defined $token) { |
| 1154 |
$self->{current_attribute}->{value} .= '&'; |
$self->{current_attribute}->{value} .= '&'; |
| 1161 |
$self->{state} = $self->{last_attribute_value_state}; |
$self->{state} = $self->{last_attribute_value_state}; |
| 1162 |
# next-input-character is already done |
# next-input-character is already done |
| 1163 |
redo A; |
redo A; |
| 1164 |
|
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
| 1165 |
|
if ($self->{next_input_character} == 0x0009 or # HT |
| 1166 |
|
$self->{next_input_character} == 0x000A or # LF |
| 1167 |
|
$self->{next_input_character} == 0x000B or # VT |
| 1168 |
|
$self->{next_input_character} == 0x000C or # FF |
| 1169 |
|
$self->{next_input_character} == 0x0020) { # SP |
| 1170 |
|
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1171 |
|
!!!next-input-character; |
| 1172 |
|
redo A; |
| 1173 |
|
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 1174 |
|
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1175 |
|
$self->{current_token}->{first_start_tag} |
| 1176 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 1177 |
|
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1178 |
|
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1179 |
|
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1180 |
|
if ($self->{current_token}->{attributes}) { |
| 1181 |
|
!!!parse-error (type => 'end tag attribute'); |
| 1182 |
|
} |
| 1183 |
|
} else { |
| 1184 |
|
die "$0: $self->{current_token}->{type}: Unknown token type"; |
| 1185 |
|
} |
| 1186 |
|
$self->{state} = DATA_STATE; |
| 1187 |
|
!!!next-input-character; |
| 1188 |
|
|
| 1189 |
|
!!!emit ($self->{current_token}); # start tag or end tag |
| 1190 |
|
|
| 1191 |
|
redo A; |
| 1192 |
|
} elsif ($self->{next_input_character} == 0x002F) { # / |
| 1193 |
|
!!!next-input-character; |
| 1194 |
|
if ($self->{next_input_character} == 0x003E and # > |
| 1195 |
|
$self->{current_token}->{type} == START_TAG_TOKEN and |
| 1196 |
|
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
| 1197 |
|
# permitted slash |
| 1198 |
|
# |
| 1199 |
|
} else { |
| 1200 |
|
!!!parse-error (type => 'nestc'); |
| 1201 |
|
} |
| 1202 |
|
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1203 |
|
# next-input-character is already done |
| 1204 |
|
redo A; |
| 1205 |
|
} else { |
| 1206 |
|
!!!parse-error (type => 'no space between attributes'); |
| 1207 |
|
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1208 |
|
## reconsume |
| 1209 |
|
redo A; |
| 1210 |
|
} |
| 1211 |
} elsif ($self->{state} == BOGUS_COMMENT_STATE) { |
} elsif ($self->{state} == BOGUS_COMMENT_STATE) { |
| 1212 |
## (only happen if PCDATA state) |
## (only happen if PCDATA state) |
| 1213 |
|
|
| 1447 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1448 |
!!!next-input-character; |
!!!next-input-character; |
| 1449 |
|
|
| 1450 |
!!!emit ({type => DOCTYPE_TOKEN}); # incorrect |
!!!emit ({type => DOCTYPE_TOKEN, quirks => 1}); |
| 1451 |
|
|
| 1452 |
redo A; |
redo A; |
| 1453 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 1455 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1456 |
## reconsume |
## reconsume |
| 1457 |
|
|
| 1458 |
!!!emit ({type => DOCTYPE_TOKEN}); # incorrect |
!!!emit ({type => DOCTYPE_TOKEN, quirks => 1}); |
| 1459 |
|
|
| 1460 |
redo A; |
redo A; |
| 1461 |
} else { |
} else { |
| 1462 |
$self->{current_token} |
$self->{current_token} |
| 1463 |
= {type => DOCTYPE_TOKEN, |
= {type => DOCTYPE_TOKEN, |
| 1464 |
name => chr ($self->{next_input_character}), |
name => chr ($self->{next_input_character}), |
| 1465 |
correct => 1}; |
#quirks => 0, |
| 1466 |
|
}; |
| 1467 |
## ISSUE: "Set the token's name name to the" in the spec |
## ISSUE: "Set the token's name name to the" in the spec |
| 1468 |
$self->{state} = DOCTYPE_NAME_STATE; |
$self->{state} = DOCTYPE_NAME_STATE; |
| 1469 |
!!!next-input-character; |
!!!next-input-character; |
| 1491 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1492 |
## reconsume |
## reconsume |
| 1493 |
|
|
| 1494 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1495 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1496 |
|
|
| 1497 |
redo A; |
redo A; |
| 1523 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1524 |
## reconsume |
## reconsume |
| 1525 |
|
|
| 1526 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1527 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1528 |
|
|
| 1529 |
redo A; |
redo A; |
| 1587 |
} |
} |
| 1588 |
|
|
| 1589 |
!!!parse-error (type => 'string after DOCTYPE name'); |
!!!parse-error (type => 'string after DOCTYPE name'); |
| 1590 |
|
$self->{current_token}->{quirks} = 1; |
| 1591 |
|
|
| 1592 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 1593 |
# next-input-character is already done |
# next-input-character is already done |
| 1594 |
redo A; |
redo A; |
| 1616 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1617 |
!!!next-input-character; |
!!!next-input-character; |
| 1618 |
|
|
| 1619 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1620 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1621 |
|
|
| 1622 |
redo A; |
redo A; |
| 1626 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1627 |
## reconsume |
## reconsume |
| 1628 |
|
|
| 1629 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1630 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1631 |
|
|
| 1632 |
redo A; |
redo A; |
| 1633 |
} else { |
} else { |
| 1634 |
!!!parse-error (type => 'string after PUBLIC'); |
!!!parse-error (type => 'string after PUBLIC'); |
| 1635 |
|
$self->{current_token}->{quirks} = 1; |
| 1636 |
|
|
| 1637 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 1638 |
!!!next-input-character; |
!!!next-input-character; |
| 1639 |
redo A; |
redo A; |
| 1649 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1650 |
!!!next-input-character; |
!!!next-input-character; |
| 1651 |
|
|
| 1652 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1653 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1654 |
|
|
| 1655 |
redo A; |
redo A; |
| 1659 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1660 |
## reconsume |
## reconsume |
| 1661 |
|
|
| 1662 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1663 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1664 |
|
|
| 1665 |
redo A; |
redo A; |
| 1681 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1682 |
!!!next-input-character; |
!!!next-input-character; |
| 1683 |
|
|
| 1684 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1685 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1686 |
|
|
| 1687 |
redo A; |
redo A; |
| 1691 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1692 |
## reconsume |
## reconsume |
| 1693 |
|
|
| 1694 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1695 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1696 |
|
|
| 1697 |
redo A; |
redo A; |
| 1733 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1734 |
## reconsume |
## reconsume |
| 1735 |
|
|
| 1736 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1737 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1738 |
|
|
| 1739 |
redo A; |
redo A; |
| 1740 |
} else { |
} else { |
| 1741 |
!!!parse-error (type => 'string after PUBLIC literal'); |
!!!parse-error (type => 'string after PUBLIC literal'); |
| 1742 |
|
$self->{current_token}->{quirks} = 1; |
| 1743 |
|
|
| 1744 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 1745 |
!!!next-input-character; |
!!!next-input-character; |
| 1746 |
redo A; |
redo A; |
| 1768 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1769 |
!!!next-input-character; |
!!!next-input-character; |
| 1770 |
|
|
| 1771 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1772 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1773 |
|
|
| 1774 |
redo A; |
redo A; |
| 1778 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1779 |
## reconsume |
## reconsume |
| 1780 |
|
|
| 1781 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1782 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1783 |
|
|
| 1784 |
redo A; |
redo A; |
| 1785 |
} else { |
} else { |
| 1786 |
!!!parse-error (type => 'string after SYSTEM'); |
!!!parse-error (type => 'string after SYSTEM'); |
| 1787 |
|
$self->{current_token}->{quirks} = 1; |
| 1788 |
|
|
| 1789 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 1790 |
!!!next-input-character; |
!!!next-input-character; |
| 1791 |
redo A; |
redo A; |
| 1801 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1802 |
!!!next-input-character; |
!!!next-input-character; |
| 1803 |
|
|
| 1804 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1805 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1806 |
|
|
| 1807 |
redo A; |
redo A; |
| 1811 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1812 |
## reconsume |
## reconsume |
| 1813 |
|
|
| 1814 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1815 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1816 |
|
|
| 1817 |
redo A; |
redo A; |
| 1833 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1834 |
!!!next-input-character; |
!!!next-input-character; |
| 1835 |
|
|
| 1836 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1837 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1838 |
|
|
| 1839 |
redo A; |
redo A; |
| 1843 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1844 |
## reconsume |
## reconsume |
| 1845 |
|
|
| 1846 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1847 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1848 |
|
|
| 1849 |
redo A; |
redo A; |
| 1875 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1876 |
## reconsume |
## reconsume |
| 1877 |
|
|
| 1878 |
delete $self->{current_token}->{correct}; |
$self->{current_token}->{quirks} = 1; |
| 1879 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1880 |
|
|
| 1881 |
redo A; |
redo A; |
| 1882 |
} else { |
} else { |
| 1883 |
!!!parse-error (type => 'string after SYSTEM literal'); |
!!!parse-error (type => 'string after SYSTEM literal'); |
| 1884 |
|
#$self->{current_token}->{quirks} = 1; |
| 1885 |
|
|
| 1886 |
$self->{state} = BOGUS_DOCTYPE_STATE; |
$self->{state} = BOGUS_DOCTYPE_STATE; |
| 1887 |
!!!next-input-character; |
!!!next-input-character; |
| 1888 |
redo A; |
redo A; |
| 1892 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1893 |
!!!next-input-character; |
!!!next-input-character; |
| 1894 |
|
|
|
delete $self->{current_token}->{correct}; |
|
| 1895 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1896 |
|
|
| 1897 |
redo A; |
redo A; |
| 1900 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1901 |
## reconsume |
## reconsume |
| 1902 |
|
|
|
delete $self->{current_token}->{correct}; |
|
| 1903 |
!!!emit ($self->{current_token}); # DOCTYPE |
!!!emit ($self->{current_token}); # DOCTYPE |
| 1904 |
|
|
| 1905 |
redo A; |
redo A; |
| 1916 |
die "$0: _get_next_token: unexpected case"; |
die "$0: _get_next_token: unexpected case"; |
| 1917 |
} # _get_next_token |
} # _get_next_token |
| 1918 |
|
|
| 1919 |
sub _tokenize_attempt_to_consume_an_entity ($$) { |
sub _tokenize_attempt_to_consume_an_entity ($$$) { |
| 1920 |
my ($self, $in_attr) = @_; |
my ($self, $in_attr, $additional) = @_; |
| 1921 |
|
|
| 1922 |
if ({ |
if ({ |
| 1923 |
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF, |
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF, |
| 1924 |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
| 1925 |
|
$additional => 1, |
| 1926 |
}->{$self->{next_input_character}}) { |
}->{$self->{next_input_character}}) { |
| 1927 |
## Don't consume |
## Don't consume |
| 1928 |
## No error |
## No error |
| 2155 |
## ISSUE: internalSubset = null?? |
## ISSUE: internalSubset = null?? |
| 2156 |
$self->{document}->append_child ($doctype); |
$self->{document}->append_child ($doctype); |
| 2157 |
|
|
| 2158 |
if (not $token->{correct} or $doctype_name ne 'HTML') { |
if ($token->{quirks} or $doctype_name ne 'HTML') { |
| 2159 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
| 2160 |
} elsif (defined $token->{public_identifier}) { |
} elsif (defined $token->{public_identifier}) { |
| 2161 |
my $pubid = $token->{public_identifier}; |
my $pubid = $token->{public_identifier}; |
| 2209 |
"-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1, |
"-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1, |
| 2210 |
"-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1, |
"-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1, |
| 2211 |
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1, |
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1, |
| 2212 |
|
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1, |
| 2213 |
|
"-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1, |
| 2214 |
|
"-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1, |
| 2215 |
"-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1, |
"-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1, |
| 2216 |
"-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1, |
"-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1, |
| 2217 |
"-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1, |
"-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1, |
| 5556 |
$p->_initialize_tree_constructor; |
$p->_initialize_tree_constructor; |
| 5557 |
|
|
| 5558 |
## Step 2 |
## Step 2 |
| 5559 |
my $node_ln = $node->local_name; |
my $node_ln = $node->manakai_local_name; |
| 5560 |
$p->{content_model} = { |
$p->{content_model} = { |
| 5561 |
title => RCDATA_CONTENT_MODEL, |
title => RCDATA_CONTENT_MODEL, |
| 5562 |
textarea => RCDATA_CONTENT_MODEL, |
textarea => RCDATA_CONTENT_MODEL, |
| 5596 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
| 5597 |
my $nsuri = $anode->namespace_uri; |
my $nsuri = $anode->namespace_uri; |
| 5598 |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
| 5599 |
if ($anode->local_name eq 'form') { ## TODO: case? |
if ($anode->manakai_local_name eq 'form') { |
| 5600 |
$p->{form_element} = $anode; |
$p->{form_element} = $anode; |
| 5601 |
last AN; |
last AN; |
| 5602 |
} |
} |