| 8 |
## doc.write (''); |
## doc.write (''); |
| 9 |
## alert (doc.compatMode); |
## alert (doc.compatMode); |
| 10 |
|
|
| 11 |
## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT |
## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263) |
| 12 |
## strip BOM and the HTML layer MUST ignore it. Whether we can do it |
## TODO: 1252 parse error (revision 1264) |
| 13 |
## is not yet clear. |
## TODO: 8859-11 = 874 (revision 1271) |
|
## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters? |
|
|
## "{U+FEFF}..." in GB18030? |
|
| 14 |
|
|
| 15 |
my $permitted_slash_tag_name = { |
my $permitted_slash_tag_name = { |
| 16 |
base => 1, |
base => 1, |
| 18 |
meta => 1, |
meta => 1, |
| 19 |
hr => 1, |
hr => 1, |
| 20 |
br => 1, |
br => 1, |
| 21 |
img=> 1, |
img => 1, |
| 22 |
embed => 1, |
embed => 1, |
| 23 |
param => 1, |
param => 1, |
| 24 |
area => 1, |
area => 1, |
| 153 |
return $return; |
return $return; |
| 154 |
} # parse_byte_string |
} # parse_byte_string |
| 155 |
|
|
| 156 |
|
## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM |
| 157 |
|
## and the HTML layer MUST ignore it. However, we does strip BOM in |
| 158 |
|
## the encoding layer and the HTML layer does not ignore any U+FEFF, |
| 159 |
|
## because the core part of our HTML parser expects a string of character, |
| 160 |
|
## not a string of bytes or code units or anything which might contain a BOM. |
| 161 |
|
## Therefore, any parser interface that accepts a string of bytes, |
| 162 |
|
## such as |parse_byte_string| in this module, must ensure that it does |
| 163 |
|
## strip the BOM and never strip any ZWNBSP. |
| 164 |
|
|
| 165 |
*parse_char_string = \&parse_string; |
*parse_char_string = \&parse_string; |
| 166 |
|
|
| 167 |
sub parse_string ($$$;$) { |
sub parse_string ($$$;$) { |
| 286 |
sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 } |
sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 } |
| 287 |
sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 } |
sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 } |
| 288 |
sub BOGUS_DOCTYPE_STATE () { 32 } |
sub BOGUS_DOCTYPE_STATE () { 32 } |
| 289 |
|
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
| 290 |
|
|
| 291 |
sub DOCTYPE_TOKEN () { 1 } |
sub DOCTYPE_TOKEN () { 1 } |
| 292 |
sub COMMENT_TOKEN () { 2 } |
sub COMMENT_TOKEN () { 2 } |
| 385 |
A: { |
A: { |
| 386 |
if ($self->{state} == DATA_STATE) { |
if ($self->{state} == DATA_STATE) { |
| 387 |
if ($self->{next_input_character} == 0x0026) { # & |
if ($self->{next_input_character} == 0x0026) { # & |
| 388 |
if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
| 389 |
|
not $self->{escape}) { |
| 390 |
$self->{state} = ENTITY_DATA_STATE; |
$self->{state} = ENTITY_DATA_STATE; |
| 391 |
!!!next-input-character; |
!!!next-input-character; |
| 392 |
redo A; |
redo A; |
| 441 |
} elsif ($self->{state} == ENTITY_DATA_STATE) { |
} elsif ($self->{state} == ENTITY_DATA_STATE) { |
| 442 |
## (cannot happen in CDATA state) |
## (cannot happen in CDATA state) |
| 443 |
|
|
| 444 |
my $token = $self->_tokenize_attempt_to_consume_an_entity (0); |
my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1); |
| 445 |
|
|
| 446 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 447 |
# next-input-character is already done |
# next-input-character is already done |
| 744 |
|
|
| 745 |
redo A; |
redo A; |
| 746 |
} else { |
} else { |
| 747 |
|
if ({ |
| 748 |
|
0x0022 => 1, # " |
| 749 |
|
0x0027 => 1, # ' |
| 750 |
|
0x003D => 1, # = |
| 751 |
|
}->{$self->{next_input_character}}) { |
| 752 |
|
!!!parse-error (type => 'bad attribute name'); |
| 753 |
|
} |
| 754 |
$self->{current_attribute} = {name => chr ($self->{next_input_character}), |
$self->{current_attribute} = {name => chr ($self->{next_input_character}), |
| 755 |
value => ''}; |
value => ''}; |
| 756 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
| 845 |
|
|
| 846 |
redo A; |
redo A; |
| 847 |
} else { |
} else { |
| 848 |
|
if ($self->{next_input_character} == 0x0022 or # " |
| 849 |
|
$self->{next_input_character} == 0x0027) { # ' |
| 850 |
|
!!!parse-error (type => 'bad attribute name'); |
| 851 |
|
} |
| 852 |
$self->{current_attribute}->{name} .= chr ($self->{next_input_character}); |
$self->{current_attribute}->{name} .= chr ($self->{next_input_character}); |
| 853 |
## Stay in the state |
## Stay in the state |
| 854 |
!!!next-input-character; |
!!!next-input-character; |
| 995 |
|
|
| 996 |
redo A; |
redo A; |
| 997 |
} else { |
} else { |
| 998 |
|
if ($self->{next_input_character} == 0x003D) { # = |
| 999 |
|
!!!parse-error (type => 'bad attribute value'); |
| 1000 |
|
} |
| 1001 |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
| 1002 |
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
$self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE; |
| 1003 |
!!!next-input-character; |
!!!next-input-character; |
| 1005 |
} |
} |
| 1006 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) { |
| 1007 |
if ($self->{next_input_character} == 0x0022) { # " |
if ($self->{next_input_character} == 0x0022) { # " |
| 1008 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
| 1009 |
!!!next-input-character; |
!!!next-input-character; |
| 1010 |
redo A; |
redo A; |
| 1011 |
} elsif ($self->{next_input_character} == 0x0026) { # & |
} elsif ($self->{next_input_character} == 0x0026) { # & |
| 1041 |
} |
} |
| 1042 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) { |
| 1043 |
if ($self->{next_input_character} == 0x0027) { # ' |
if ($self->{next_input_character} == 0x0027) { # ' |
| 1044 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE; |
| 1045 |
!!!next-input-character; |
!!!next-input-character; |
| 1046 |
redo A; |
redo A; |
| 1047 |
} elsif ($self->{next_input_character} == 0x0026) { # & |
} elsif ($self->{next_input_character} == 0x0026) { # & |
| 1129 |
|
|
| 1130 |
redo A; |
redo A; |
| 1131 |
} else { |
} else { |
| 1132 |
|
if ({ |
| 1133 |
|
0x0022 => 1, # " |
| 1134 |
|
0x0027 => 1, # ' |
| 1135 |
|
0x003D => 1, # = |
| 1136 |
|
}->{$self->{next_input_character}}) { |
| 1137 |
|
!!!parse-error (type => 'bad attribute value'); |
| 1138 |
|
} |
| 1139 |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
$self->{current_attribute}->{value} .= chr ($self->{next_input_character}); |
| 1140 |
## Stay in the state |
## Stay in the state |
| 1141 |
!!!next-input-character; |
!!!next-input-character; |
| 1142 |
redo A; |
redo A; |
| 1143 |
} |
} |
| 1144 |
} elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) { |
} elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) { |
| 1145 |
my $token = $self->_tokenize_attempt_to_consume_an_entity (1); |
my $token = $self->_tokenize_attempt_to_consume_an_entity |
| 1146 |
|
(1, |
| 1147 |
|
$self->{last_attribute_value_state} |
| 1148 |
|
== ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # " |
| 1149 |
|
$self->{last_attribute_value_state} |
| 1150 |
|
== ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # ' |
| 1151 |
|
-1); |
| 1152 |
|
|
| 1153 |
unless (defined $token) { |
unless (defined $token) { |
| 1154 |
$self->{current_attribute}->{value} .= '&'; |
$self->{current_attribute}->{value} .= '&'; |
| 1161 |
$self->{state} = $self->{last_attribute_value_state}; |
$self->{state} = $self->{last_attribute_value_state}; |
| 1162 |
# next-input-character is already done |
# next-input-character is already done |
| 1163 |
redo A; |
redo A; |
| 1164 |
|
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
| 1165 |
|
if ($self->{next_input_character} == 0x0009 or # HT |
| 1166 |
|
$self->{next_input_character} == 0x000A or # LF |
| 1167 |
|
$self->{next_input_character} == 0x000B or # VT |
| 1168 |
|
$self->{next_input_character} == 0x000C or # FF |
| 1169 |
|
$self->{next_input_character} == 0x0020) { # SP |
| 1170 |
|
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1171 |
|
!!!next-input-character; |
| 1172 |
|
redo A; |
| 1173 |
|
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 1174 |
|
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 1175 |
|
$self->{current_token}->{first_start_tag} |
| 1176 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 1177 |
|
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 1178 |
|
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 1179 |
|
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 1180 |
|
if ($self->{current_token}->{attributes}) { |
| 1181 |
|
!!!parse-error (type => 'end tag attribute'); |
| 1182 |
|
} |
| 1183 |
|
} else { |
| 1184 |
|
die "$0: $self->{current_token}->{type}: Unknown token type"; |
| 1185 |
|
} |
| 1186 |
|
$self->{state} = DATA_STATE; |
| 1187 |
|
!!!next-input-character; |
| 1188 |
|
|
| 1189 |
|
!!!emit ($self->{current_token}); # start tag or end tag |
| 1190 |
|
|
| 1191 |
|
redo A; |
| 1192 |
|
} elsif ($self->{next_input_character} == 0x002F) { # / |
| 1193 |
|
!!!next-input-character; |
| 1194 |
|
if ($self->{next_input_character} == 0x003E and # > |
| 1195 |
|
$self->{current_token}->{type} == START_TAG_TOKEN and |
| 1196 |
|
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
| 1197 |
|
# permitted slash |
| 1198 |
|
# |
| 1199 |
|
} else { |
| 1200 |
|
!!!parse-error (type => 'nestc'); |
| 1201 |
|
} |
| 1202 |
|
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1203 |
|
# next-input-character is already done |
| 1204 |
|
redo A; |
| 1205 |
|
} else { |
| 1206 |
|
!!!parse-error (type => 'no space between attributes'); |
| 1207 |
|
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1208 |
|
## reconsume |
| 1209 |
|
redo A; |
| 1210 |
|
} |
| 1211 |
} elsif ($self->{state} == BOGUS_COMMENT_STATE) { |
} elsif ($self->{state} == BOGUS_COMMENT_STATE) { |
| 1212 |
## (only happen if PCDATA state) |
## (only happen if PCDATA state) |
| 1213 |
|
|
| 1638 |
$self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE; |
$self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE; |
| 1639 |
!!!next-input-character; |
!!!next-input-character; |
| 1640 |
redo A; |
redo A; |
| 1641 |
|
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 1642 |
|
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 1643 |
|
|
| 1644 |
|
$self->{state} = DATA_STATE; |
| 1645 |
|
!!!next-input-character; |
| 1646 |
|
|
| 1647 |
|
delete $self->{current_token}->{correct}; |
| 1648 |
|
!!!emit ($self->{current_token}); # DOCTYPE |
| 1649 |
|
|
| 1650 |
|
redo A; |
| 1651 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 1652 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 1653 |
|
|
| 1670 |
$self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE; |
$self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE; |
| 1671 |
!!!next-input-character; |
!!!next-input-character; |
| 1672 |
redo A; |
redo A; |
| 1673 |
|
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 1674 |
|
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 1675 |
|
|
| 1676 |
|
$self->{state} = DATA_STATE; |
| 1677 |
|
!!!next-input-character; |
| 1678 |
|
|
| 1679 |
|
delete $self->{current_token}->{correct}; |
| 1680 |
|
!!!emit ($self->{current_token}); # DOCTYPE |
| 1681 |
|
|
| 1682 |
|
redo A; |
| 1683 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 1684 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 1685 |
|
|
| 1786 |
$self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE; |
$self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE; |
| 1787 |
!!!next-input-character; |
!!!next-input-character; |
| 1788 |
redo A; |
redo A; |
| 1789 |
|
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 1790 |
|
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 1791 |
|
|
| 1792 |
|
$self->{state} = DATA_STATE; |
| 1793 |
|
!!!next-input-character; |
| 1794 |
|
|
| 1795 |
|
delete $self->{current_token}->{correct}; |
| 1796 |
|
!!!emit ($self->{current_token}); # DOCTYPE |
| 1797 |
|
|
| 1798 |
|
redo A; |
| 1799 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 1800 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 1801 |
|
|
| 1818 |
$self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE; |
$self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE; |
| 1819 |
!!!next-input-character; |
!!!next-input-character; |
| 1820 |
redo A; |
redo A; |
| 1821 |
|
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 1822 |
|
!!!parse-error (type => 'unclosed PUBLIC literal'); |
| 1823 |
|
|
| 1824 |
|
$self->{state} = DATA_STATE; |
| 1825 |
|
!!!next-input-character; |
| 1826 |
|
|
| 1827 |
|
delete $self->{current_token}->{correct}; |
| 1828 |
|
!!!emit ($self->{current_token}); # DOCTYPE |
| 1829 |
|
|
| 1830 |
|
redo A; |
| 1831 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 1832 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
| 1833 |
|
|
| 1907 |
die "$0: _get_next_token: unexpected case"; |
die "$0: _get_next_token: unexpected case"; |
| 1908 |
} # _get_next_token |
} # _get_next_token |
| 1909 |
|
|
| 1910 |
sub _tokenize_attempt_to_consume_an_entity ($$) { |
sub _tokenize_attempt_to_consume_an_entity ($$$) { |
| 1911 |
my ($self, $in_attr) = @_; |
my ($self, $in_attr, $additional) = @_; |
| 1912 |
|
|
| 1913 |
if ({ |
if ({ |
| 1914 |
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF, |
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF, |
| 1915 |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR |
| 1916 |
|
$additional => 1, |
| 1917 |
}->{$self->{next_input_character}}) { |
}->{$self->{next_input_character}}) { |
| 1918 |
## Don't consume |
## Don't consume |
| 1919 |
## No error |
## No error |
| 2200 |
"-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1, |
"-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1, |
| 2201 |
"-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1, |
"-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1, |
| 2202 |
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1, |
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1, |
| 2203 |
|
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1, |
| 2204 |
|
"-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1, |
| 2205 |
|
"-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1, |
| 2206 |
"-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1, |
"-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1, |
| 2207 |
"-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1, |
"-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1, |
| 2208 |
"-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1, |
"-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1, |
| 3012 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 3013 |
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 3014 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 3015 |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
=~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 3016 |
|
[\x09-\x0D\x20]*= |
| 3017 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 3018 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
| 3019 |
$self->{change_encoding} |
$self->{change_encoding} |
| 3020 |
->($self, defined $1 ? $1 : defined $2 ? $2 : $3); |
->($self, defined $1 ? $1 : defined $2 ? $2 : $3); |
| 3021 |
|
$meta_el->[0]->get_attribute_node_ns (undef, 'content') |
| 3022 |
|
->set_user_data (manakai_has_reference => |
| 3023 |
|
$token->{attributes}->{content} |
| 3024 |
|
->{has_reference}); |
| 3025 |
} |
} |
| 3026 |
} |
} |
| 3027 |
} else { |
} else { |
| 3031 |
$token->{attributes}->{charset} |
$token->{attributes}->{charset} |
| 3032 |
->{has_reference}); |
->{has_reference}); |
| 3033 |
} |
} |
| 3034 |
|
if ($token->{attributes}->{content}) { |
| 3035 |
|
$meta_el->[0]->get_attribute_node_ns (undef, 'content') |
| 3036 |
|
->set_user_data (manakai_has_reference => |
| 3037 |
|
$token->{attributes}->{content} |
| 3038 |
|
->{has_reference}); |
| 3039 |
|
} |
| 3040 |
} |
} |
| 3041 |
|
|
| 3042 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 4623 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 4624 |
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 4625 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 4626 |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
=~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 4627 |
|
[\x09-\x0D\x20]*= |
| 4628 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 4629 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
| 4630 |
$self->{change_encoding} |
$self->{change_encoding} |
| 4631 |
->($self, defined $1 ? $1 : defined $2 ? $2 : $3); |
->($self, defined $1 ? $1 : defined $2 ? $2 : $3); |
| 4632 |
|
$meta_el->[0]->get_attribute_node_ns (undef, 'content') |
| 4633 |
|
->set_user_data (manakai_has_reference => |
| 4634 |
|
$token->{attributes}->{content} |
| 4635 |
|
->{has_reference}); |
| 4636 |
} |
} |
| 4637 |
} |
} |
| 4638 |
} else { |
} else { |
| 4642 |
$token->{attributes}->{charset} |
$token->{attributes}->{charset} |
| 4643 |
->{has_reference}); |
->{has_reference}); |
| 4644 |
} |
} |
| 4645 |
|
if ($token->{attributes}->{content}) { |
| 4646 |
|
$meta_el->[0]->get_attribute_node_ns (undef, 'content') |
| 4647 |
|
->set_user_data (manakai_has_reference => |
| 4648 |
|
$token->{attributes}->{content} |
| 4649 |
|
->{has_reference}); |
| 4650 |
|
} |
| 4651 |
} |
} |
| 4652 |
|
|
| 4653 |
!!!next-token; |
!!!next-token; |
| 5547 |
$p->_initialize_tree_constructor; |
$p->_initialize_tree_constructor; |
| 5548 |
|
|
| 5549 |
## Step 2 |
## Step 2 |
| 5550 |
my $node_ln = $node->local_name; |
my $node_ln = $node->manakai_local_name; |
| 5551 |
$p->{content_model} = { |
$p->{content_model} = { |
| 5552 |
title => RCDATA_CONTENT_MODEL, |
title => RCDATA_CONTENT_MODEL, |
| 5553 |
textarea => RCDATA_CONTENT_MODEL, |
textarea => RCDATA_CONTENT_MODEL, |
| 5587 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
| 5588 |
my $nsuri = $anode->namespace_uri; |
my $nsuri = $anode->namespace_uri; |
| 5589 |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
| 5590 |
if ($anode->local_name eq 'form') { ## TODO: case? |
if ($anode->manakai_local_name eq 'form') { |
| 5591 |
$p->{form_element} = $anode; |
$p->{form_element} = $anode; |
| 5592 |
last AN; |
last AN; |
| 5593 |
} |
} |