| 66 |
} |
} |
| 67 |
|
|
| 68 |
## NOTE: Used in "generate implied end tags" algorithm. |
## NOTE: Used in "generate implied end tags" algorithm. |
| 69 |
## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL |
## NOTE: There is a code where a modified version of |
| 70 |
## is used in "generate implied end tags" implementation (search for the |
## END_TAG_OPTIONAL_EL is used in "generate implied end tags" |
| 71 |
## function mae). |
## implementation (search for the algorithm name). |
| 72 |
sub END_TAG_OPTIONAL_EL () { |
sub END_TAG_OPTIONAL_EL () { |
| 73 |
DD_EL | |
DD_EL | |
| 74 |
DT_EL | |
DT_EL | |
| 75 |
LI_EL | |
LI_EL | |
| 76 |
|
OPTION_EL | |
| 77 |
|
OPTGROUP_EL | |
| 78 |
P_EL | |
P_EL | |
| 79 |
RUBY_COMPONENT_EL |
RUBY_COMPONENT_EL |
| 80 |
} |
} |
| 86 |
LI_EL | |
LI_EL | |
| 87 |
P_EL | |
P_EL | |
| 88 |
|
|
| 89 |
|
## ISSUE: option, optgroup, rt, rp? |
| 90 |
|
|
| 91 |
BODY_EL | |
BODY_EL | |
| 92 |
HTML_EL | |
HTML_EL | |
| 93 |
TABLE_CELL_EL | |
TABLE_CELL_EL | |
| 132 |
FORM_EL | |
FORM_EL | |
| 133 |
FRAMESET_EL | |
FRAMESET_EL | |
| 134 |
HEADING_EL | |
HEADING_EL | |
|
OPTION_EL | |
|
|
OPTGROUP_EL | |
|
| 135 |
SELECT_EL | |
SELECT_EL | |
| 136 |
TABLE_ROW_EL | |
TABLE_ROW_EL | |
| 137 |
TABLE_ROW_GROUP_EL | |
TABLE_ROW_GROUP_EL | |
| 143 |
address => ADDRESS_EL, |
address => ADDRESS_EL, |
| 144 |
applet => MISC_SCOPING_EL, |
applet => MISC_SCOPING_EL, |
| 145 |
area => MISC_SPECIAL_EL, |
area => MISC_SPECIAL_EL, |
| 146 |
|
article => MISC_SPECIAL_EL, |
| 147 |
|
aside => MISC_SPECIAL_EL, |
| 148 |
b => FORMATTING_EL, |
b => FORMATTING_EL, |
| 149 |
base => MISC_SPECIAL_EL, |
base => MISC_SPECIAL_EL, |
| 150 |
basefont => MISC_SPECIAL_EL, |
basefont => MISC_SPECIAL_EL, |
| 158 |
center => MISC_SPECIAL_EL, |
center => MISC_SPECIAL_EL, |
| 159 |
col => MISC_SPECIAL_EL, |
col => MISC_SPECIAL_EL, |
| 160 |
colgroup => MISC_SPECIAL_EL, |
colgroup => MISC_SPECIAL_EL, |
| 161 |
|
command => MISC_SPECIAL_EL, |
| 162 |
|
datagrid => MISC_SPECIAL_EL, |
| 163 |
dd => DD_EL, |
dd => DD_EL, |
| 164 |
|
details => MISC_SPECIAL_EL, |
| 165 |
|
dialog => MISC_SPECIAL_EL, |
| 166 |
dir => MISC_SPECIAL_EL, |
dir => MISC_SPECIAL_EL, |
| 167 |
div => DIV_EL, |
div => DIV_EL, |
| 168 |
dl => MISC_SPECIAL_EL, |
dl => MISC_SPECIAL_EL, |
| 169 |
dt => DT_EL, |
dt => DT_EL, |
| 170 |
em => FORMATTING_EL, |
em => FORMATTING_EL, |
| 171 |
embed => MISC_SPECIAL_EL, |
embed => MISC_SPECIAL_EL, |
| 172 |
|
eventsource => MISC_SPECIAL_EL, |
| 173 |
fieldset => MISC_SPECIAL_EL, |
fieldset => MISC_SPECIAL_EL, |
| 174 |
|
figure => MISC_SPECIAL_EL, |
| 175 |
font => FORMATTING_EL, |
font => FORMATTING_EL, |
| 176 |
|
footer => MISC_SPECIAL_EL, |
| 177 |
form => FORM_EL, |
form => FORM_EL, |
| 178 |
frame => MISC_SPECIAL_EL, |
frame => MISC_SPECIAL_EL, |
| 179 |
frameset => FRAMESET_EL, |
frameset => FRAMESET_EL, |
| 184 |
h5 => HEADING_EL, |
h5 => HEADING_EL, |
| 185 |
h6 => HEADING_EL, |
h6 => HEADING_EL, |
| 186 |
head => MISC_SPECIAL_EL, |
head => MISC_SPECIAL_EL, |
| 187 |
|
header => MISC_SPECIAL_EL, |
| 188 |
hr => MISC_SPECIAL_EL, |
hr => MISC_SPECIAL_EL, |
| 189 |
html => HTML_EL, |
html => HTML_EL, |
| 190 |
i => FORMATTING_EL, |
i => FORMATTING_EL, |
| 191 |
iframe => MISC_SPECIAL_EL, |
iframe => MISC_SPECIAL_EL, |
| 192 |
img => MISC_SPECIAL_EL, |
img => MISC_SPECIAL_EL, |
| 193 |
|
#image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec. |
| 194 |
input => MISC_SPECIAL_EL, |
input => MISC_SPECIAL_EL, |
| 195 |
isindex => MISC_SPECIAL_EL, |
isindex => MISC_SPECIAL_EL, |
| 196 |
li => LI_EL, |
li => LI_EL, |
| 199 |
marquee => MISC_SCOPING_EL, |
marquee => MISC_SCOPING_EL, |
| 200 |
menu => MISC_SPECIAL_EL, |
menu => MISC_SPECIAL_EL, |
| 201 |
meta => MISC_SPECIAL_EL, |
meta => MISC_SPECIAL_EL, |
| 202 |
|
nav => MISC_SPECIAL_EL, |
| 203 |
nobr => NOBR_EL | FORMATTING_EL, |
nobr => NOBR_EL | FORMATTING_EL, |
| 204 |
noembed => MISC_SPECIAL_EL, |
noembed => MISC_SPECIAL_EL, |
| 205 |
noframes => MISC_SPECIAL_EL, |
noframes => MISC_SPECIAL_EL, |
| 218 |
s => FORMATTING_EL, |
s => FORMATTING_EL, |
| 219 |
script => MISC_SPECIAL_EL, |
script => MISC_SPECIAL_EL, |
| 220 |
select => SELECT_EL, |
select => SELECT_EL, |
| 221 |
|
section => MISC_SPECIAL_EL, |
| 222 |
small => FORMATTING_EL, |
small => FORMATTING_EL, |
| 223 |
spacer => MISC_SPECIAL_EL, |
spacer => MISC_SPECIAL_EL, |
| 224 |
strike => FORMATTING_EL, |
strike => FORMATTING_EL, |
| 249 |
mtext => FOREIGN_FLOW_CONTENT_EL, |
mtext => FOREIGN_FLOW_CONTENT_EL, |
| 250 |
}, |
}, |
| 251 |
$SVG_NS => { |
$SVG_NS => { |
| 252 |
foreignObject => FOREIGN_FLOW_CONTENT_EL, |
foreignObject => FOREIGN_FLOW_CONTENT_EL | MISC_SCOPING_EL, |
| 253 |
desc => FOREIGN_FLOW_CONTENT_EL, |
desc => FOREIGN_FLOW_CONTENT_EL, |
| 254 |
title => FOREIGN_FLOW_CONTENT_EL, |
title => FOREIGN_FLOW_CONTENT_EL, |
| 255 |
}, |
}, |
| 338 |
|
|
| 339 |
## ISSUE: xmlns:xlink="non-xlink-ns" is not an error. |
## ISSUE: xmlns:xlink="non-xlink-ns" is not an error. |
| 340 |
|
|
| 341 |
my $c1_entity_char = { |
my $charref_map = { |
| 342 |
|
0x0D => 0x000A, |
| 343 |
0x80 => 0x20AC, |
0x80 => 0x20AC, |
| 344 |
0x81 => 0xFFFD, |
0x81 => 0xFFFD, |
| 345 |
0x82 => 0x201A, |
0x82 => 0x201A, |
| 372 |
0x9D => 0xFFFD, |
0x9D => 0xFFFD, |
| 373 |
0x9E => 0x017E, |
0x9E => 0x017E, |
| 374 |
0x9F => 0x0178, |
0x9F => 0x0178, |
| 375 |
}; # $c1_entity_char |
}; # $charref_map |
| 376 |
|
$charref_map->{$_} = 0xFFFD |
| 377 |
|
for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F, |
| 378 |
|
0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF |
| 379 |
|
0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, |
| 380 |
|
0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, |
| 381 |
|
0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, |
| 382 |
|
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, |
| 383 |
|
0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF; |
| 384 |
|
|
| 385 |
|
## TODO: Invoke the reset algorithm when a resettable element is |
| 386 |
|
## created (cf. HTML5 revision 2259). |
| 387 |
|
|
| 388 |
sub parse_byte_string ($$$$;$) { |
sub parse_byte_string ($$$$;$) { |
| 389 |
my $self = shift; |
my $self = shift; |
| 428 |
## TODO: Is this ok? Transfer protocol's parameter should be |
## TODO: Is this ok? Transfer protocol's parameter should be |
| 429 |
## interpreted in its semantics? |
## interpreted in its semantics? |
| 430 |
|
|
|
## ISSUE: Unsupported encoding is not ignored according to the spec. |
|
| 431 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 432 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 433 |
allow_fallback => 1); |
allow_fallback => 1); |
| 435 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 436 |
last SNIFFING; |
last SNIFFING; |
| 437 |
} else { |
} else { |
| 438 |
## TODO: unsupported error |
!!!parse-error (type => 'charset:not supported', |
| 439 |
|
layer => 'encode', |
| 440 |
|
line => 1, column => 1, |
| 441 |
|
value => $charset_name, |
| 442 |
|
level => $self->{level}->{uncertain}); |
| 443 |
} |
} |
| 444 |
} |
} |
| 445 |
|
|
| 488 |
if (defined $charset_name) { |
if (defined $charset_name) { |
| 489 |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
$charset = Message::Charset::Info->get_by_html_name ($charset_name); |
| 490 |
|
|
|
## ISSUE: Unsupported encoding is not ignored according to the spec. |
|
| 491 |
require Whatpm::Charset::DecodeHandle; |
require Whatpm::Charset::DecodeHandle; |
| 492 |
$buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new |
$buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new |
| 493 |
($byte_stream); |
($byte_stream); |
| 918 |
## NOTE: "in foreign content" insertion mode is special; it is combined |
## NOTE: "in foreign content" insertion mode is special; it is combined |
| 919 |
## with the secondary insertion mode. In this parser, they are stored |
## with the secondary insertion mode. In this parser, they are stored |
| 920 |
## together in the bit-or'ed form. |
## together in the bit-or'ed form. |
| 921 |
|
sub IN_CDATA_RCDATA_IM () { 0b1000000000000 } |
| 922 |
|
## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is |
| 923 |
|
## combined with the original insertion mode. In thie parser, |
| 924 |
|
## they are stored together in the bit-or'ed form. |
| 925 |
|
|
| 926 |
## NOTE: "initial" and "before html" insertion modes have no constants. |
## NOTE: "initial" and "before html" insertion modes have no constants. |
| 927 |
|
|
| 999 |
## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) |
## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) |
| 1000 |
## (This requirement was dropped from HTML5 spec, unfortunately.) |
## (This requirement was dropped from HTML5 spec, unfortunately.) |
| 1001 |
|
|
| 1002 |
|
my $is_space = { |
| 1003 |
|
0x0009 => 1, # CHARACTER TABULATION (HT) |
| 1004 |
|
0x000A => 1, # LINE FEED (LF) |
| 1005 |
|
#0x000B => 0, # LINE TABULATION (VT) |
| 1006 |
|
0x000C => 1, # FORM FEED (FF) |
| 1007 |
|
#0x000D => 1, # CARRIAGE RETURN (CR) |
| 1008 |
|
0x0020 => 1, # SPACE (SP) |
| 1009 |
|
}; |
| 1010 |
|
|
| 1011 |
sub _get_next_token ($) { |
sub _get_next_token ($) { |
| 1012 |
my $self = shift; |
my $self = shift; |
| 1013 |
|
|
| 1378 |
redo A; |
redo A; |
| 1379 |
} |
} |
| 1380 |
} else { # after "<{tag-name}" |
} else { # after "<{tag-name}" |
| 1381 |
unless ({ |
unless ($is_space->{$self->{nc}} or |
| 1382 |
0x0009 => 1, # HT |
{ |
|
0x000A => 1, # LF |
|
|
0x000B => 1, # VT |
|
|
0x000C => 1, # FF |
|
|
0x0020 => 1, # SP |
|
| 1383 |
0x003E => 1, # > |
0x003E => 1, # > |
| 1384 |
0x002F => 1, # / |
0x002F => 1, # / |
| 1385 |
-1 => 1, # EOF |
-1 => 1, # EOF |
| 1406 |
} |
} |
| 1407 |
} |
} |
| 1408 |
} elsif ($self->{state} == TAG_NAME_STATE) { |
} elsif ($self->{state} == TAG_NAME_STATE) { |
| 1409 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1410 |
!!!cp (34); |
!!!cp (34); |
| 1411 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1412 |
!!!next-input-character; |
!!!next-input-character; |
| 1478 |
redo A; |
redo A; |
| 1479 |
} |
} |
| 1480 |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) { |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) { |
| 1481 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1482 |
!!!cp (45); |
!!!cp (45); |
| 1483 |
## Stay in the state |
## Stay in the state |
| 1484 |
!!!next-input-character; |
!!!next-input-character; |
| 1574 |
} |
} |
| 1575 |
}; # $before_leave |
}; # $before_leave |
| 1576 |
|
|
| 1577 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1578 |
!!!cp (59); |
!!!cp (59); |
| 1579 |
$before_leave->(); |
$before_leave->(); |
| 1580 |
$self->{state} = AFTER_ATTRIBUTE_NAME_STATE; |
$self->{state} = AFTER_ATTRIBUTE_NAME_STATE; |
| 1657 |
redo A; |
redo A; |
| 1658 |
} |
} |
| 1659 |
} elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) { |
} elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) { |
| 1660 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1661 |
!!!cp (71); |
!!!cp (71); |
| 1662 |
## Stay in the state |
## Stay in the state |
| 1663 |
!!!next-input-character; |
!!!next-input-character; |
| 1744 |
redo A; |
redo A; |
| 1745 |
} |
} |
| 1746 |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) { |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) { |
| 1747 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1748 |
!!!cp (83); |
!!!cp (83); |
| 1749 |
## Stay in the state |
## Stay in the state |
| 1750 |
!!!next-input-character; |
!!!next-input-character; |
| 1925 |
redo A; |
redo A; |
| 1926 |
} |
} |
| 1927 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) { |
| 1928 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # HT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1929 |
!!!cp (107); |
!!!cp (107); |
| 1930 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1931 |
!!!next-input-character; |
!!!next-input-character; |
| 2007 |
redo A; |
redo A; |
| 2008 |
} |
} |
| 2009 |
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
| 2010 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2011 |
!!!cp (118); |
!!!cp (118); |
| 2012 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 2013 |
!!!next-input-character; |
!!!next-input-character; |
| 2448 |
redo A; |
redo A; |
| 2449 |
} |
} |
| 2450 |
} elsif ($self->{state} == DOCTYPE_STATE) { |
} elsif ($self->{state} == DOCTYPE_STATE) { |
| 2451 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2452 |
!!!cp (155); |
!!!cp (155); |
| 2453 |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
| 2454 |
!!!next-input-character; |
!!!next-input-character; |
| 2461 |
redo A; |
redo A; |
| 2462 |
} |
} |
| 2463 |
} elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) { |
| 2464 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2465 |
!!!cp (157); |
!!!cp (157); |
| 2466 |
## Stay in the state |
## Stay in the state |
| 2467 |
!!!next-input-character; |
!!!next-input-character; |
| 2488 |
!!!cp (160); |
!!!cp (160); |
| 2489 |
$self->{ct}->{name} = chr $self->{nc}; |
$self->{ct}->{name} = chr $self->{nc}; |
| 2490 |
delete $self->{ct}->{quirks}; |
delete $self->{ct}->{quirks}; |
|
## ISSUE: "Set the token's name name to the" in the spec |
|
| 2491 |
$self->{state} = DOCTYPE_NAME_STATE; |
$self->{state} = DOCTYPE_NAME_STATE; |
| 2492 |
!!!next-input-character; |
!!!next-input-character; |
| 2493 |
redo A; |
redo A; |
| 2494 |
} |
} |
| 2495 |
} elsif ($self->{state} == DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == DOCTYPE_NAME_STATE) { |
| 2496 |
## ISSUE: Redundant "First," in the spec. |
## ISSUE: Redundant "First," in the spec. |
| 2497 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2498 |
!!!cp (161); |
!!!cp (161); |
| 2499 |
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
| 2500 |
!!!next-input-character; |
!!!next-input-character; |
| 2526 |
redo A; |
redo A; |
| 2527 |
} |
} |
| 2528 |
} elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) { |
| 2529 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2530 |
!!!cp (165); |
!!!cp (165); |
| 2531 |
## Stay in the state |
## Stay in the state |
| 2532 |
!!!next-input-character; |
!!!next-input-character; |
| 2649 |
redo A; |
redo A; |
| 2650 |
} |
} |
| 2651 |
} elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
| 2652 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2653 |
!!!cp (181); |
!!!cp (181); |
| 2654 |
## Stay in the state |
## Stay in the state |
| 2655 |
!!!next-input-character; |
!!!next-input-character; |
| 2776 |
redo A; |
redo A; |
| 2777 |
} |
} |
| 2778 |
} elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
| 2779 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2780 |
!!!cp (195); |
!!!cp (195); |
| 2781 |
## Stay in the state |
## Stay in the state |
| 2782 |
!!!next-input-character; |
!!!next-input-character; |
| 2822 |
redo A; |
redo A; |
| 2823 |
} |
} |
| 2824 |
} elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
| 2825 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2826 |
!!!cp (201); |
!!!cp (201); |
| 2827 |
## Stay in the state |
## Stay in the state |
| 2828 |
!!!next-input-character; |
!!!next-input-character; |
| 2948 |
redo A; |
redo A; |
| 2949 |
} |
} |
| 2950 |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
| 2951 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2952 |
!!!cp (215); |
!!!cp (215); |
| 2953 |
## Stay in the state |
## Stay in the state |
| 2954 |
!!!next-input-character; |
!!!next-input-character; |
| 2991 |
redo A; |
redo A; |
| 2992 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 2993 |
!!!cp (220); |
!!!cp (220); |
|
!!!parse-error (type => 'unclosed DOCTYPE'); |
|
| 2994 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 2995 |
## reconsume |
## reconsume |
| 2996 |
|
|
| 3079 |
redo A; |
redo A; |
| 3080 |
} |
} |
| 3081 |
} elsif ($self->{state} == ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_STATE) { |
| 3082 |
if ({ |
if ($is_space->{$self->{nc}} or |
| 3083 |
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF, |
{ |
| 3084 |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 3085 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 3086 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 3087 |
!!!cp (1001); |
!!!cp (1001); |
| 3088 |
## Don't consume |
## Don't consume |
| 3089 |
## No error |
## No error |
| 3202 |
my $code = $self->{s_kwd}; |
my $code = $self->{s_kwd}; |
| 3203 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 3204 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 3205 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($charref_map->{$code}) { |
| 3206 |
!!!cp (1015); |
!!!cp (1015); |
| 3207 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3208 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 3209 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3210 |
$code = 0xFFFD; |
$code = $charref_map->{$code}; |
| 3211 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 3212 |
!!!cp (1016); |
!!!cp (1016); |
| 3213 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3214 |
text => (sprintf 'U-%08X', $code), |
text => (sprintf 'U-%08X', $code), |
| 3215 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3216 |
$code = 0xFFFD; |
$code = 0xFFFD; |
|
} elsif ($code == 0x000D) { |
|
|
!!!cp (1017); |
|
|
!!!parse-error (type => 'CR character reference', |
|
|
line => $l, column => $c); |
|
|
$code = 0x000A; |
|
|
} elsif (0x80 <= $code and $code <= 0x9F) { |
|
|
!!!cp (1018); |
|
|
!!!parse-error (type => 'C1 character reference', |
|
|
text => (sprintf 'U+%04X', $code), |
|
|
line => $l, column => $c); |
|
|
$code = $c1_entity_char->{$code}; |
|
| 3217 |
} |
} |
| 3218 |
|
|
| 3219 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3310 |
my $code = $self->{s_kwd}; |
my $code = $self->{s_kwd}; |
| 3311 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 3312 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 3313 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($charref_map->{$code}) { |
| 3314 |
!!!cp (1008); |
!!!cp (1008); |
| 3315 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3316 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 3317 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3318 |
$code = 0xFFFD; |
$code = $charref_map->{$code}; |
| 3319 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 3320 |
!!!cp (1009); |
!!!cp (1009); |
| 3321 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3322 |
text => (sprintf 'U-%08X', $code), |
text => (sprintf 'U-%08X', $code), |
| 3323 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3324 |
$code = 0xFFFD; |
$code = 0xFFFD; |
|
} elsif ($code == 0x000D) { |
|
|
!!!cp (1010); |
|
|
!!!parse-error (type => 'CR character reference', line => $l, column => $c); |
|
|
$code = 0x000A; |
|
|
} elsif (0x80 <= $code and $code <= 0x9F) { |
|
|
!!!cp (1011); |
|
|
!!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c); |
|
|
$code = $c1_entity_char->{$code}; |
|
| 3325 |
} |
} |
| 3326 |
|
|
| 3327 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3470 |
## When an interactive UA render the $self->{document} available |
## When an interactive UA render the $self->{document} available |
| 3471 |
## to the user, or when it begin accepting user input, are |
## to the user, or when it begin accepting user input, are |
| 3472 |
## not defined. |
## not defined. |
|
|
|
|
## Append a character: collect it and all subsequent consecutive |
|
|
## characters and insert one Text node whose data is concatenation |
|
|
## of all those characters. # MUST |
|
| 3473 |
|
|
| 3474 |
!!!next-token; |
!!!next-token; |
| 3475 |
|
|
| 3476 |
undef $self->{form_element}; |
undef $self->{form_element}; |
| 3477 |
undef $self->{head_element}; |
undef $self->{head_element}; |
| 3478 |
|
undef $self->{head_element_inserted}; |
| 3479 |
$self->{open_elements} = []; |
$self->{open_elements} = []; |
| 3480 |
undef $self->{inner_html_node}; |
undef $self->{inner_html_node}; |
| 3481 |
|
|
| 3659 |
!!!ack-later; |
!!!ack-later; |
| 3660 |
return; |
return; |
| 3661 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 3662 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 3663 |
## Ignore the token |
## Ignore the token |
| 3664 |
|
|
| 3665 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 3716 |
!!!next-token; |
!!!next-token; |
| 3717 |
redo B; |
redo B; |
| 3718 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 3719 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 3720 |
## Ignore the token. |
## Ignore the token. |
| 3721 |
|
|
| 3722 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 3783 |
## NOTE: Reprocess the token. |
## NOTE: Reprocess the token. |
| 3784 |
!!!ack-later; |
!!!ack-later; |
| 3785 |
return; ## Go to the "before head" insertion mode. |
return; ## Go to the "before head" insertion mode. |
|
|
|
|
## ISSUE: There is an issue in the spec |
|
| 3786 |
} # B |
} # B |
| 3787 |
|
|
| 3788 |
die "$0: _tree_construction_root_element: This should never be reached"; |
die "$0: _tree_construction_root_element: This should never be reached"; |
| 3979 |
|
|
| 3980 |
## Step 1 |
## Step 1 |
| 3981 |
my $start_tag_name = $token->{tag_name}; |
my $start_tag_name = $token->{tag_name}; |
| 3982 |
my $el; |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
|
!!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token); |
|
| 3983 |
|
|
| 3984 |
## Step 2 |
## Step 2 |
|
$insert->($el); |
|
|
|
|
|
## Step 3 |
|
| 3985 |
$self->{content_model} = $content_model_flag; # CDATA or RCDATA |
$self->{content_model} = $content_model_flag; # CDATA or RCDATA |
| 3986 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 3987 |
|
|
| 3988 |
## Step 4 |
## Step 3, 4 |
| 3989 |
my $text = ''; |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
|
!!!nack ('t40.1'); |
|
|
!!!next-token; |
|
|
while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing |
|
|
!!!cp ('t40'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} |
|
|
|
|
|
## Step 5 |
|
|
if (length $text) { |
|
|
!!!cp ('t41'); |
|
|
my $text = $self->{document}->create_text_node ($text); |
|
|
$el->append_child ($text); |
|
|
} |
|
| 3990 |
|
|
| 3991 |
## Step 6 |
!!!nack ('t40.1'); |
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
|
|
|
|
## Step 7 |
|
|
if ($token->{type} == END_TAG_TOKEN and |
|
|
$token->{tag_name} eq $start_tag_name) { |
|
|
!!!cp ('t42'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
## NOTE: An end-of-file token. |
|
|
if ($content_model_flag == CDATA_CONTENT_MODEL) { |
|
|
!!!cp ('t43'); |
|
|
!!!parse-error (type => 'in CDATA:#eof', token => $token); |
|
|
} elsif ($content_model_flag == RCDATA_CONTENT_MODEL) { |
|
|
!!!cp ('t44'); |
|
|
!!!parse-error (type => 'in RCDATA:#eof', token => $token); |
|
|
} else { |
|
|
die "$0: $content_model_flag in parse_rcdata"; |
|
|
} |
|
|
} |
|
| 3992 |
!!!next-token; |
!!!next-token; |
| 3993 |
}; # $parse_rcdata |
}; # $parse_rcdata |
| 3994 |
|
|
| 3995 |
my $script_start_tag = sub () { |
my $script_start_tag = sub () { |
| 3996 |
|
## Step 1 |
| 3997 |
my $script_el; |
my $script_el; |
| 3998 |
!!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token); |
!!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token); |
| 3999 |
|
|
| 4000 |
|
## Step 2 |
| 4001 |
## TODO: mark as "parser-inserted" |
## TODO: mark as "parser-inserted" |
| 4002 |
|
|
| 4003 |
|
## Step 3 |
| 4004 |
|
## TODO: Mark as "already executed", if ... |
| 4005 |
|
|
| 4006 |
|
## Step 4 |
| 4007 |
|
$insert->($script_el); |
| 4008 |
|
|
| 4009 |
|
## ISSUE: $script_el is not put into the stack |
| 4010 |
|
push @{$self->{open_elements}}, [$script_el, $el_category->{script}]; |
| 4011 |
|
|
| 4012 |
|
## Step 5 |
| 4013 |
$self->{content_model} = CDATA_CONTENT_MODEL; |
$self->{content_model} = CDATA_CONTENT_MODEL; |
| 4014 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
|
|
|
|
my $text = ''; |
|
|
!!!nack ('t45.1'); |
|
|
!!!next-token; |
|
|
while ($token->{type} == CHARACTER_TOKEN) { |
|
|
!!!cp ('t45'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} # stop if non-character token or tokenizer stops tokenising |
|
|
if (length $text) { |
|
|
!!!cp ('t46'); |
|
|
$script_el->manakai_append_text ($text); |
|
|
} |
|
|
|
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
| 4015 |
|
|
| 4016 |
if ($token->{type} == END_TAG_TOKEN and |
## Step 6-7 |
| 4017 |
$token->{tag_name} eq 'script') { |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
|
!!!cp ('t47'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
!!!cp ('t48'); |
|
|
!!!parse-error (type => 'in CDATA:#eof', token => $token); |
|
|
## ISSUE: And ignore? |
|
|
## TODO: mark as "already executed" |
|
|
} |
|
|
|
|
|
if (defined $self->{inner_html_node}) { |
|
|
!!!cp ('t49'); |
|
|
## TODO: mark as "already executed" |
|
|
} else { |
|
|
!!!cp ('t50'); |
|
|
## TODO: $old_insertion_point = current insertion point |
|
|
## TODO: insertion point = just before the next input character |
|
| 4018 |
|
|
| 4019 |
$insert->($script_el); |
!!!nack ('t40.2'); |
|
|
|
|
## TODO: insertion point = $old_insertion_point (might be "undefined") |
|
|
|
|
|
## TODO: if there is a script that will execute as soon as the parser resume, then... |
|
|
} |
|
|
|
|
| 4020 |
!!!next-token; |
!!!next-token; |
| 4021 |
}; # $script_start_tag |
}; # $script_start_tag |
| 4022 |
|
|
| 4023 |
## NOTE: $open_tables->[-1]->[0] is the "current table" element node. |
## NOTE: $open_tables->[-1]->[0] is the "current table" element node. |
| 4024 |
## NOTE: $open_tables->[-1]->[1] is the "tainted" flag. |
## NOTE: $open_tables->[-1]->[1] is the "tainted" flag. |
| 4025 |
|
## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted. |
| 4026 |
my $open_tables = [[$self->{open_elements}->[0]->[0]]]; |
my $open_tables = [[$self->{open_elements}->[0]->[0]]]; |
| 4027 |
|
|
| 4028 |
my $formatting_end_tag = sub { |
my $formatting_end_tag = sub { |
| 4107 |
!!!cp ('t59'); |
!!!cp ('t59'); |
| 4108 |
$furthest_block = $node; |
$furthest_block = $node; |
| 4109 |
$furthest_block_i_in_open = $_; |
$furthest_block_i_in_open = $_; |
| 4110 |
|
## NOTE: The topmost (eldest) node. |
| 4111 |
} elsif ($node->[0] eq $formatting_element->[0]) { |
} elsif ($node->[0] eq $formatting_element->[0]) { |
| 4112 |
!!!cp ('t60'); |
!!!cp ('t60'); |
| 4113 |
last OE; |
last OE; |
| 4254 |
$i = $_; |
$i = $_; |
| 4255 |
} |
} |
| 4256 |
} # OE |
} # OE |
| 4257 |
splice @{$self->{open_elements}}, $i + 1, 1, $clone; |
splice @{$self->{open_elements}}, $i + 1, 0, $clone; |
| 4258 |
|
|
| 4259 |
## Step 14 |
## Step 14 |
| 4260 |
redo FET; |
redo FET; |
| 4297 |
} |
} |
| 4298 |
}; # $insert_to_foster |
}; # $insert_to_foster |
| 4299 |
|
|
| 4300 |
|
## NOTE: Insert a character (MUST): When a character is inserted, if |
| 4301 |
|
## the last node that was inserted by the parser is a Text node and |
| 4302 |
|
## the character has to be inserted after that node, then the |
| 4303 |
|
## character is appended to the Text node. However, if any other |
| 4304 |
|
## node is inserted by the parser, then a new Text node is created |
| 4305 |
|
## and the character is appended as that Text node. If I'm not |
| 4306 |
|
## wrong, for a parser with scripting disabled, there are only two |
| 4307 |
|
## cases where this occurs. One is the case where an element node |
| 4308 |
|
## is inserted to the |head| element. This is covered by using the |
| 4309 |
|
## |$self->{head_element_inserted}| flag. Another is the case where |
| 4310 |
|
## an element or comment is inserted into the |table| subtree while |
| 4311 |
|
## foster parenting happens. This is covered by using the [2] flag |
| 4312 |
|
## of the |$open_tables| structure. All other cases are handled |
| 4313 |
|
## simply by calling |manakai_append_text| method. |
| 4314 |
|
|
| 4315 |
|
## TODO: |<body><script>document.write("a<br>"); |
| 4316 |
|
## document.body.removeChild (document.body.lastChild); |
| 4317 |
|
## document.write ("b")</script>| |
| 4318 |
|
|
| 4319 |
B: while (1) { |
B: while (1) { |
| 4320 |
if ($token->{type} == DOCTYPE_TOKEN) { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 4321 |
!!!cp ('t73'); |
!!!cp ('t73'); |
| 4363 |
} else { |
} else { |
| 4364 |
!!!cp ('t87'); |
!!!cp ('t87'); |
| 4365 |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
| 4366 |
|
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 4367 |
} |
} |
| 4368 |
!!!next-token; |
!!!next-token; |
| 4369 |
next B; |
next B; |
| 4370 |
|
} elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) { |
| 4371 |
|
if ($token->{type} == CHARACTER_TOKEN) { |
| 4372 |
|
$token->{data} =~ s/^\x0A// if $self->{ignore_newline}; |
| 4373 |
|
delete $self->{ignore_newline}; |
| 4374 |
|
|
| 4375 |
|
if (length $token->{data}) { |
| 4376 |
|
!!!cp ('t43'); |
| 4377 |
|
$self->{open_elements}->[-1]->[0]->manakai_append_text |
| 4378 |
|
($token->{data}); |
| 4379 |
|
} else { |
| 4380 |
|
!!!cp ('t43.1'); |
| 4381 |
|
} |
| 4382 |
|
!!!next-token; |
| 4383 |
|
next B; |
| 4384 |
|
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 4385 |
|
delete $self->{ignore_newline}; |
| 4386 |
|
|
| 4387 |
|
if ($token->{tag_name} eq 'script') { |
| 4388 |
|
!!!cp ('t50'); |
| 4389 |
|
|
| 4390 |
|
## Para 1-2 |
| 4391 |
|
my $script = pop @{$self->{open_elements}}; |
| 4392 |
|
|
| 4393 |
|
## Para 3 |
| 4394 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
| 4395 |
|
|
| 4396 |
|
## Para 4 |
| 4397 |
|
## TODO: $old_insertion_point = $current_insertion_point; |
| 4398 |
|
## TODO: $current_insertion_point = just before $self->{nc}; |
| 4399 |
|
|
| 4400 |
|
## Para 5 |
| 4401 |
|
## TODO: Run the $script->[0]. |
| 4402 |
|
|
| 4403 |
|
## Para 6 |
| 4404 |
|
## TODO: $current_insertion_point = $old_insertion_point; |
| 4405 |
|
|
| 4406 |
|
## Para 7 |
| 4407 |
|
## TODO: if ($pending_external_script) { |
| 4408 |
|
## TODO: ... |
| 4409 |
|
## TODO: } |
| 4410 |
|
|
| 4411 |
|
!!!next-token; |
| 4412 |
|
next B; |
| 4413 |
|
} else { |
| 4414 |
|
!!!cp ('t42'); |
| 4415 |
|
|
| 4416 |
|
pop @{$self->{open_elements}}; |
| 4417 |
|
|
| 4418 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
| 4419 |
|
!!!next-token; |
| 4420 |
|
next B; |
| 4421 |
|
} |
| 4422 |
|
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 4423 |
|
delete $self->{ignore_newline}; |
| 4424 |
|
|
| 4425 |
|
!!!cp ('t44'); |
| 4426 |
|
!!!parse-error (type => 'not closed', |
| 4427 |
|
text => $self->{open_elements}->[-1]->[0] |
| 4428 |
|
->manakai_local_name, |
| 4429 |
|
token => $token); |
| 4430 |
|
|
| 4431 |
|
#if ($self->{open_elements}->[-1]->[1] & SCRIPT_EL) { |
| 4432 |
|
# ## TODO: Mark as "already executed" |
| 4433 |
|
#} |
| 4434 |
|
|
| 4435 |
|
pop @{$self->{open_elements}}; |
| 4436 |
|
|
| 4437 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
| 4438 |
|
## Reprocess. |
| 4439 |
|
next B; |
| 4440 |
|
} else { |
| 4441 |
|
die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type"; |
| 4442 |
|
} |
| 4443 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
| 4444 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4445 |
!!!cp ('t87.1'); |
!!!cp ('t87.1'); |
| 4552 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 4553 |
while $self->{open_elements}->[-1]->[1] & FOREIGN_EL; |
while $self->{open_elements}->[-1]->[1] & FOREIGN_EL; |
| 4554 |
|
|
| 4555 |
|
## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error. |
| 4556 |
|
|
| 4557 |
$self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM; |
$self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM; |
| 4558 |
## Reprocess. |
## Reprocess. |
| 4559 |
next B; |
next B; |
| 4564 |
|
|
| 4565 |
if ($self->{insertion_mode} & HEAD_IMS) { |
if ($self->{insertion_mode} & HEAD_IMS) { |
| 4566 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4567 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 4568 |
unless ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
unless ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 4569 |
!!!cp ('t88.2'); |
if ($self->{head_element_inserted}) { |
| 4570 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
!!!cp ('t88.3'); |
| 4571 |
# |
$self->{open_elements}->[-1]->[0]->append_child |
| 4572 |
|
($self->{document}->create_text_node ($1)); |
| 4573 |
|
delete $self->{head_element_inserted}; |
| 4574 |
|
## NOTE: |</head> <link> | |
| 4575 |
|
# |
| 4576 |
|
} else { |
| 4577 |
|
!!!cp ('t88.2'); |
| 4578 |
|
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 4579 |
|
## NOTE: |</head>  | |
| 4580 |
|
# |
| 4581 |
|
} |
| 4582 |
} else { |
} else { |
| 4583 |
!!!cp ('t88.1'); |
!!!cp ('t88.1'); |
| 4584 |
## Ignore the token. |
## Ignore the token. |
| 4674 |
!!!cp ('t97'); |
!!!cp ('t97'); |
| 4675 |
} |
} |
| 4676 |
|
|
| 4677 |
if ($token->{tag_name} eq 'base') { |
if ($token->{tag_name} eq 'base') { |
| 4678 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 4679 |
!!!cp ('t98'); |
!!!cp ('t98'); |
| 4680 |
## As if </noscript> |
## As if </noscript> |
| 4681 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 4682 |
!!!parse-error (type => 'in noscript', text => 'base', |
!!!parse-error (type => 'in noscript', text => 'base', |
| 4683 |
token => $token); |
token => $token); |
| 4684 |
|
|
| 4685 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 4686 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 4687 |
} else { |
} else { |
| 4688 |
!!!cp ('t99'); |
!!!cp ('t99'); |
| 4689 |
} |
} |
| 4690 |
|
|
| 4691 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 4692 |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4693 |
!!!cp ('t100'); |
!!!cp ('t100'); |
| 4694 |
!!!parse-error (type => 'after head', |
!!!parse-error (type => 'after head', |
| 4695 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 4696 |
push @{$self->{open_elements}}, |
push @{$self->{open_elements}}, |
| 4697 |
[$self->{head_element}, $el_category->{head}]; |
[$self->{head_element}, $el_category->{head}]; |
| 4698 |
} else { |
$self->{head_element_inserted} = 1; |
| 4699 |
!!!cp ('t101'); |
} else { |
| 4700 |
} |
!!!cp ('t101'); |
| 4701 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
} |
| 4702 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 4703 |
pop @{$self->{open_elements}} # <head> |
pop @{$self->{open_elements}}; |
| 4704 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
pop @{$self->{open_elements}} # <head> |
| 4705 |
!!!nack ('t101.1'); |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 4706 |
!!!next-token; |
!!!nack ('t101.1'); |
| 4707 |
next B; |
!!!next-token; |
| 4708 |
} elsif ($token->{tag_name} eq 'link') { |
next B; |
| 4709 |
## NOTE: There is a "as if in head" code clone. |
} elsif ($token->{tag_name} eq 'link') { |
| 4710 |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
## NOTE: There is a "as if in head" code clone. |
| 4711 |
!!!cp ('t102'); |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4712 |
!!!parse-error (type => 'after head', |
!!!cp ('t102'); |
| 4713 |
text => $token->{tag_name}, token => $token); |
!!!parse-error (type => 'after head', |
| 4714 |
push @{$self->{open_elements}}, |
text => $token->{tag_name}, token => $token); |
| 4715 |
[$self->{head_element}, $el_category->{head}]; |
push @{$self->{open_elements}}, |
| 4716 |
} else { |
[$self->{head_element}, $el_category->{head}]; |
| 4717 |
!!!cp ('t103'); |
$self->{head_element_inserted} = 1; |
| 4718 |
} |
} else { |
| 4719 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
!!!cp ('t103'); |
| 4720 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
} |
| 4721 |
pop @{$self->{open_elements}} # <head> |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 4722 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
pop @{$self->{open_elements}}; |
| 4723 |
!!!ack ('t103.1'); |
pop @{$self->{open_elements}} # <head> |
| 4724 |
!!!next-token; |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 4725 |
next B; |
!!!ack ('t103.1'); |
| 4726 |
} elsif ($token->{tag_name} eq 'meta') { |
!!!next-token; |
| 4727 |
## NOTE: There is a "as if in head" code clone. |
next B; |
| 4728 |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
} elsif ($token->{tag_name} eq 'command' or |
| 4729 |
!!!cp ('t104'); |
$token->{tag_name} eq 'eventsource') { |
| 4730 |
!!!parse-error (type => 'after head', |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
| 4731 |
text => $token->{tag_name}, token => $token); |
## NOTE: If the insertion mode at the time of the emission |
| 4732 |
push @{$self->{open_elements}}, |
## of the token was "before head", $self->{insertion_mode} |
| 4733 |
[$self->{head_element}, $el_category->{head}]; |
## is already changed to |IN_HEAD_IM|. |
| 4734 |
} else { |
|
| 4735 |
!!!cp ('t105'); |
## NOTE: There is a "as if in head" code clone. |
| 4736 |
} |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 4737 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
pop @{$self->{open_elements}}; |
| 4738 |
my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}} # <head> |
| 4739 |
|
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 4740 |
|
!!!ack ('t103.2'); |
| 4741 |
|
!!!next-token; |
| 4742 |
|
next B; |
| 4743 |
|
} else { |
| 4744 |
|
## NOTE: "in head noscript" or "after head" insertion mode |
| 4745 |
|
## - in these cases, these tags are treated as same as |
| 4746 |
|
## normal in-body tags. |
| 4747 |
|
!!!cp ('t103.3'); |
| 4748 |
|
# |
| 4749 |
|
} |
| 4750 |
|
} elsif ($token->{tag_name} eq 'meta') { |
| 4751 |
|
## NOTE: There is a "as if in head" code clone. |
| 4752 |
|
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4753 |
|
!!!cp ('t104'); |
| 4754 |
|
!!!parse-error (type => 'after head', |
| 4755 |
|
text => $token->{tag_name}, token => $token); |
| 4756 |
|
push @{$self->{open_elements}}, |
| 4757 |
|
[$self->{head_element}, $el_category->{head}]; |
| 4758 |
|
$self->{head_element_inserted} = 1; |
| 4759 |
|
} else { |
| 4760 |
|
!!!cp ('t105'); |
| 4761 |
|
} |
| 4762 |
|
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 4763 |
|
my $meta_el = pop @{$self->{open_elements}}; |
| 4764 |
|
|
| 4765 |
unless ($self->{confident}) { |
unless ($self->{confident}) { |
| 4766 |
if ($token->{attributes}->{charset}) { |
if ($token->{attributes}->{charset}) { |
| 4778 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 4779 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 4780 |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 4781 |
[\x09-\x0D\x20]*= |
[\x09\x0A\x0C\x0D\x20]*= |
| 4782 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 4783 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) { |
([^"'\x09\x0A\x0C\x0D\x20] |
| 4784 |
|
[^\x09\x0A\x0C\x0D\x20\x3B]*))/x) { |
| 4785 |
!!!cp ('t107'); |
!!!cp ('t107'); |
| 4786 |
## NOTE: Whether the encoding is supported or not is handled |
## NOTE: Whether the encoding is supported or not is handled |
| 4787 |
## in the {change_encoding} callback. |
## in the {change_encoding} callback. |
| 4818 |
!!!ack ('t110.1'); |
!!!ack ('t110.1'); |
| 4819 |
!!!next-token; |
!!!next-token; |
| 4820 |
next B; |
next B; |
| 4821 |
} elsif ($token->{tag_name} eq 'title') { |
} elsif ($token->{tag_name} eq 'title') { |
| 4822 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 4823 |
!!!cp ('t111'); |
!!!cp ('t111'); |
| 4824 |
## As if </noscript> |
## As if </noscript> |
| 4825 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 4826 |
!!!parse-error (type => 'in noscript', text => 'title', |
!!!parse-error (type => 'in noscript', text => 'title', |
| 4827 |
token => $token); |
token => $token); |
| 4828 |
|
|
| 4829 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 4830 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 4831 |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4832 |
!!!cp ('t112'); |
!!!cp ('t112'); |
| 4833 |
!!!parse-error (type => 'after head', |
!!!parse-error (type => 'after head', |
| 4834 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 4835 |
push @{$self->{open_elements}}, |
push @{$self->{open_elements}}, |
| 4836 |
[$self->{head_element}, $el_category->{head}]; |
[$self->{head_element}, $el_category->{head}]; |
| 4837 |
} else { |
$self->{head_element_inserted} = 1; |
| 4838 |
!!!cp ('t113'); |
} else { |
| 4839 |
} |
!!!cp ('t113'); |
| 4840 |
|
} |
| 4841 |
|
|
| 4842 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 4843 |
my $parent = defined $self->{head_element} ? $self->{head_element} |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
| 4844 |
: $self->{open_elements}->[-1]->[0]; |
## ISSUE: A spec bug [Bug 6038] |
| 4845 |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
| 4846 |
pop @{$self->{open_elements}} # <head> |
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
| 4847 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
next B; |
| 4848 |
next B; |
} elsif ($token->{tag_name} eq 'style' or |
| 4849 |
} elsif ($token->{tag_name} eq 'style' or |
$token->{tag_name} eq 'noframes') { |
| 4850 |
$token->{tag_name} eq 'noframes') { |
## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and |
| 4851 |
## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and |
## insertion mode IN_HEAD_IM) |
| 4852 |
## insertion mode IN_HEAD_IM) |
## NOTE: There is a "as if in head" code clone. |
| 4853 |
## NOTE: There is a "as if in head" code clone. |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4854 |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
!!!cp ('t114'); |
| 4855 |
!!!cp ('t114'); |
!!!parse-error (type => 'after head', |
| 4856 |
!!!parse-error (type => 'after head', |
text => $token->{tag_name}, token => $token); |
| 4857 |
text => $token->{tag_name}, token => $token); |
push @{$self->{open_elements}}, |
| 4858 |
push @{$self->{open_elements}}, |
[$self->{head_element}, $el_category->{head}]; |
| 4859 |
[$self->{head_element}, $el_category->{head}]; |
$self->{head_element_inserted} = 1; |
| 4860 |
} else { |
} else { |
| 4861 |
!!!cp ('t115'); |
!!!cp ('t115'); |
| 4862 |
} |
} |
| 4863 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
| 4864 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
| 4865 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
| 4866 |
next B; |
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
| 4867 |
} elsif ($token->{tag_name} eq 'noscript') { |
next B; |
| 4868 |
|
} elsif ($token->{tag_name} eq 'noscript') { |
| 4869 |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
| 4870 |
!!!cp ('t116'); |
!!!cp ('t116'); |
| 4871 |
## NOTE: and scripting is disalbed |
## NOTE: and scripting is disalbed |
| 4886 |
!!!cp ('t118'); |
!!!cp ('t118'); |
| 4887 |
# |
# |
| 4888 |
} |
} |
| 4889 |
} elsif ($token->{tag_name} eq 'script') { |
} elsif ($token->{tag_name} eq 'script') { |
| 4890 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 4891 |
!!!cp ('t119'); |
!!!cp ('t119'); |
| 4892 |
## As if </noscript> |
## As if </noscript> |
| 4893 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 4894 |
!!!parse-error (type => 'in noscript', text => 'script', |
!!!parse-error (type => 'in noscript', text => 'script', |
| 4895 |
token => $token); |
token => $token); |
| 4896 |
|
|
| 4897 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 4898 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 4899 |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4900 |
!!!cp ('t120'); |
!!!cp ('t120'); |
| 4901 |
!!!parse-error (type => 'after head', |
!!!parse-error (type => 'after head', |
| 4902 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 4903 |
push @{$self->{open_elements}}, |
push @{$self->{open_elements}}, |
| 4904 |
[$self->{head_element}, $el_category->{head}]; |
[$self->{head_element}, $el_category->{head}]; |
| 4905 |
} else { |
$self->{head_element_inserted} = 1; |
| 4906 |
!!!cp ('t121'); |
} else { |
| 4907 |
} |
!!!cp ('t121'); |
| 4908 |
|
} |
| 4909 |
|
|
| 4910 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 4911 |
$script_start_tag->(); |
$script_start_tag->(); |
| 4912 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
| 4913 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1 # <head> |
| 4914 |
next B; |
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
| 4915 |
} elsif ($token->{tag_name} eq 'body' or |
next B; |
| 4916 |
$token->{tag_name} eq 'frameset') { |
} elsif ($token->{tag_name} eq 'body' or |
| 4917 |
|
$token->{tag_name} eq 'frameset') { |
| 4918 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 4919 |
!!!cp ('t122'); |
!!!cp ('t122'); |
| 4920 |
## As if </noscript> |
## As if </noscript> |
| 5049 |
} elsif ({ |
} elsif ({ |
| 5050 |
body => 1, html => 1, |
body => 1, html => 1, |
| 5051 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 5052 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM or |
## TODO: This branch is entirely redundant. |
| 5053 |
|
if ($self->{insertion_mode} == BEFORE_HEAD_IM or |
| 5054 |
$self->{insertion_mode} == IN_HEAD_IM or |
$self->{insertion_mode} == IN_HEAD_IM or |
| 5055 |
$self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
$self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 5056 |
!!!cp ('t140'); |
!!!cp ('t140'); |
| 5222 |
} else { |
} else { |
| 5223 |
die "$0: $token->{type}: Unknown token type"; |
die "$0: $token->{type}: Unknown token type"; |
| 5224 |
} |
} |
|
|
|
|
## ISSUE: An issue in the spec. |
|
| 5225 |
} elsif ($self->{insertion_mode} & BODY_IMS) { |
} elsif ($self->{insertion_mode} & BODY_IMS) { |
| 5226 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 5227 |
!!!cp ('t150'); |
!!!cp ('t150'); |
| 5593 |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
| 5594 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 5595 |
if (not $open_tables->[-1]->[1] and # tainted |
if (not $open_tables->[-1]->[1] and # tainted |
| 5596 |
$token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
$token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 5597 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5598 |
|
|
| 5599 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5607 |
|
|
| 5608 |
!!!parse-error (type => 'in table:#text', token => $token); |
!!!parse-error (type => 'in table:#text', token => $token); |
| 5609 |
|
|
| 5610 |
## As if in body, but insert into foster parent element |
## NOTE: As if in body, but insert into the foster parent element. |
| 5611 |
## ISSUE: Spec says that "whenever a node would be inserted |
$reconstruct_active_formatting_elements->($insert_to_foster); |
|
## into the current node" while characters might not be |
|
|
## result in a new Text node. |
|
|
$reconstruct_active_formatting_elements->($insert_to_foster); |
|
| 5612 |
|
|
| 5613 |
if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) { |
if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) { |
| 5614 |
# MUST |
# MUST |
| 5615 |
my $foster_parent_element; |
my $foster_parent_element; |
| 5616 |
my $next_sibling; |
my $next_sibling; |
| 5617 |
my $prev_sibling; |
my $prev_sibling; |
| 5618 |
OE: for (reverse 0..$#{$self->{open_elements}}) { |
OE: for (reverse 0..$#{$self->{open_elements}}) { |
| 5619 |
if ($self->{open_elements}->[$_]->[1] & TABLE_EL) { |
if ($self->{open_elements}->[$_]->[1] & TABLE_EL) { |
| 5620 |
my $parent = $self->{open_elements}->[$_]->[0]->parent_node; |
my $parent = $self->{open_elements}->[$_]->[0]->parent_node; |
| 5621 |
if (defined $parent and $parent->node_type == 1) { |
if (defined $parent and $parent->node_type == 1) { |
| 5622 |
!!!cp ('t196'); |
$foster_parent_element = $parent; |
| 5623 |
$foster_parent_element = $parent; |
!!!cp ('t196'); |
| 5624 |
$next_sibling = $self->{open_elements}->[$_]->[0]; |
$next_sibling = $self->{open_elements}->[$_]->[0]; |
| 5625 |
$prev_sibling = $next_sibling->previous_sibling; |
$prev_sibling = $next_sibling->previous_sibling; |
| 5626 |
} else { |
# |
|
!!!cp ('t197'); |
|
|
$foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; |
|
|
$prev_sibling = $foster_parent_element->last_child; |
|
|
} |
|
|
last OE; |
|
|
} |
|
|
} # OE |
|
|
$foster_parent_element = $self->{open_elements}->[0]->[0] and |
|
|
$prev_sibling = $foster_parent_element->last_child |
|
|
unless defined $foster_parent_element; |
|
|
if (defined $prev_sibling and |
|
|
$prev_sibling->node_type == 3) { |
|
|
!!!cp ('t198'); |
|
|
$prev_sibling->manakai_append_text ($token->{data}); |
|
| 5627 |
} else { |
} else { |
| 5628 |
!!!cp ('t199'); |
!!!cp ('t197'); |
| 5629 |
$foster_parent_element->insert_before |
$foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; |
| 5630 |
($self->{document}->create_text_node ($token->{data}), |
$prev_sibling = $foster_parent_element->last_child; |
| 5631 |
$next_sibling); |
# |
| 5632 |
} |
} |
| 5633 |
|
last OE; |
| 5634 |
|
} |
| 5635 |
|
} # OE |
| 5636 |
|
$foster_parent_element = $self->{open_elements}->[0]->[0] and |
| 5637 |
|
$prev_sibling = $foster_parent_element->last_child |
| 5638 |
|
unless defined $foster_parent_element; |
| 5639 |
|
undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted |
| 5640 |
|
if (defined $prev_sibling and |
| 5641 |
|
$prev_sibling->node_type == 3) { |
| 5642 |
|
!!!cp ('t198'); |
| 5643 |
|
$prev_sibling->manakai_append_text ($token->{data}); |
| 5644 |
|
} else { |
| 5645 |
|
!!!cp ('t199'); |
| 5646 |
|
$foster_parent_element->insert_before |
| 5647 |
|
($self->{document}->create_text_node ($token->{data}), |
| 5648 |
|
$next_sibling); |
| 5649 |
|
} |
| 5650 |
$open_tables->[-1]->[1] = 1; # tainted |
$open_tables->[-1]->[1] = 1; # tainted |
| 5651 |
|
$open_tables->[-1]->[2] = 1; # ~node inserted |
| 5652 |
} else { |
} else { |
| 5653 |
|
## NOTE: Fragment case or in a foster parent'ed element |
| 5654 |
|
## (e.g. |<table><span>a|). In fragment case, whether the |
| 5655 |
|
## character is appended to existing node or a new node is |
| 5656 |
|
## created is irrelevant, since the foster parent'ed nodes |
| 5657 |
|
## are discarded and fragment parsing does not invoke any |
| 5658 |
|
## script. |
| 5659 |
!!!cp ('t200'); |
!!!cp ('t200'); |
| 5660 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text |
| 5661 |
|
($token->{data}); |
| 5662 |
} |
} |
| 5663 |
|
|
| 5664 |
!!!next-token; |
!!!next-token; |
| 5695 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 5696 |
} |
} |
| 5697 |
|
|
| 5698 |
$self->{insertion_mode} = IN_ROW_IM; |
$self->{insertion_mode} = IN_ROW_IM; |
| 5699 |
if ($token->{tag_name} eq 'tr') { |
if ($token->{tag_name} eq 'tr') { |
| 5700 |
!!!cp ('t204'); |
!!!cp ('t204'); |
| 5701 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 5702 |
!!!nack ('t204'); |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 5703 |
!!!next-token; |
!!!nack ('t204'); |
| 5704 |
next B; |
!!!next-token; |
| 5705 |
} else { |
next B; |
| 5706 |
!!!cp ('t205'); |
} else { |
| 5707 |
!!!insert-element ('tr',, $token); |
!!!cp ('t205'); |
| 5708 |
## reprocess in the "in row" insertion mode |
!!!insert-element ('tr',, $token); |
| 5709 |
} |
## reprocess in the "in row" insertion mode |
| 5710 |
} else { |
} |
| 5711 |
!!!cp ('t206'); |
} else { |
| 5712 |
} |
!!!cp ('t206'); |
| 5713 |
|
} |
| 5714 |
|
|
| 5715 |
## Clear back to table row context |
## Clear back to table row context |
| 5716 |
while (not ($self->{open_elements}->[-1]->[1] |
while (not ($self->{open_elements}->[-1]->[1] |
| 5719 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 5720 |
} |
} |
| 5721 |
|
|
| 5722 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 5723 |
$self->{insertion_mode} = IN_CELL_IM; |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 5724 |
|
$self->{insertion_mode} = IN_CELL_IM; |
| 5725 |
|
|
| 5726 |
push @$active_formatting_elements, ['#marker', '']; |
push @$active_formatting_elements, ['#marker', '']; |
| 5727 |
|
|
| 5728 |
!!!nack ('t207.1'); |
!!!nack ('t207.1'); |
| 5729 |
|
!!!next-token; |
| 5730 |
|
next B; |
| 5731 |
|
} elsif ({ |
| 5732 |
|
caption => 1, col => 1, colgroup => 1, |
| 5733 |
|
tbody => 1, tfoot => 1, thead => 1, |
| 5734 |
|
tr => 1, # $self->{insertion_mode} == IN_ROW_IM |
| 5735 |
|
}->{$token->{tag_name}}) { |
| 5736 |
|
if ($self->{insertion_mode} == IN_ROW_IM) { |
| 5737 |
|
## As if </tr> |
| 5738 |
|
## have an element in table scope |
| 5739 |
|
my $i; |
| 5740 |
|
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 5741 |
|
my $node = $self->{open_elements}->[$_]; |
| 5742 |
|
if ($node->[1] & TABLE_ROW_EL) { |
| 5743 |
|
!!!cp ('t208'); |
| 5744 |
|
$i = $_; |
| 5745 |
|
last INSCOPE; |
| 5746 |
|
} elsif ($node->[1] & TABLE_SCOPING_EL) { |
| 5747 |
|
!!!cp ('t209'); |
| 5748 |
|
last INSCOPE; |
| 5749 |
|
} |
| 5750 |
|
} # INSCOPE |
| 5751 |
|
unless (defined $i) { |
| 5752 |
|
!!!cp ('t210'); |
| 5753 |
|
## TODO: This type is wrong. |
| 5754 |
|
!!!parse-error (type => 'unmacthed end tag', |
| 5755 |
|
text => $token->{tag_name}, token => $token); |
| 5756 |
|
## Ignore the token |
| 5757 |
|
!!!nack ('t210.1'); |
| 5758 |
!!!next-token; |
!!!next-token; |
| 5759 |
next B; |
next B; |
| 5760 |
} elsif ({ |
} |
|
caption => 1, col => 1, colgroup => 1, |
|
|
tbody => 1, tfoot => 1, thead => 1, |
|
|
tr => 1, # $self->{insertion_mode} == IN_ROW_IM |
|
|
}->{$token->{tag_name}}) { |
|
|
if ($self->{insertion_mode} == IN_ROW_IM) { |
|
|
## As if </tr> |
|
|
## have an element in table scope |
|
|
my $i; |
|
|
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
|
|
my $node = $self->{open_elements}->[$_]; |
|
|
if ($node->[1] & TABLE_ROW_EL) { |
|
|
!!!cp ('t208'); |
|
|
$i = $_; |
|
|
last INSCOPE; |
|
|
} elsif ($node->[1] & TABLE_SCOPING_EL) { |
|
|
!!!cp ('t209'); |
|
|
last INSCOPE; |
|
|
} |
|
|
} # INSCOPE |
|
|
unless (defined $i) { |
|
|
!!!cp ('t210'); |
|
|
## TODO: This type is wrong. |
|
|
!!!parse-error (type => 'unmacthed end tag', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
|
!!!nack ('t210.1'); |
|
|
!!!next-token; |
|
|
next B; |
|
|
} |
|
| 5761 |
|
|
| 5762 |
## Clear back to table row context |
## Clear back to table row context |
| 5763 |
while (not ($self->{open_elements}->[-1]->[1] |
while (not ($self->{open_elements}->[-1]->[1] |
| 5827 |
!!!cp ('t218'); |
!!!cp ('t218'); |
| 5828 |
} |
} |
| 5829 |
|
|
| 5830 |
if ($token->{tag_name} eq 'col') { |
if ($token->{tag_name} eq 'col') { |
| 5831 |
## Clear back to table context |
## Clear back to table context |
| 5832 |
while (not ($self->{open_elements}->[-1]->[1] |
while (not ($self->{open_elements}->[-1]->[1] |
| 5833 |
& TABLE_SCOPING_EL)) { |
& TABLE_SCOPING_EL)) { |
| 5834 |
!!!cp ('t219'); |
!!!cp ('t219'); |
| 5835 |
## ISSUE: Can this state be reached? |
## ISSUE: Can this state be reached? |
| 5836 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 5837 |
} |
} |
| 5838 |
|
|
| 5839 |
!!!insert-element ('colgroup',, $token); |
!!!insert-element ('colgroup',, $token); |
| 5840 |
$self->{insertion_mode} = IN_COLUMN_GROUP_IM; |
$self->{insertion_mode} = IN_COLUMN_GROUP_IM; |
| 5841 |
## reprocess |
## reprocess |
| 5842 |
!!!ack-later; |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 5843 |
next B; |
!!!ack-later; |
| 5844 |
} elsif ({ |
next B; |
| 5845 |
caption => 1, |
} elsif ({ |
| 5846 |
colgroup => 1, |
caption => 1, |
| 5847 |
tbody => 1, tfoot => 1, thead => 1, |
colgroup => 1, |
| 5848 |
}->{$token->{tag_name}}) { |
tbody => 1, tfoot => 1, thead => 1, |
| 5849 |
## Clear back to table context |
}->{$token->{tag_name}}) { |
| 5850 |
|
## Clear back to table context |
| 5851 |
while (not ($self->{open_elements}->[-1]->[1] |
while (not ($self->{open_elements}->[-1]->[1] |
| 5852 |
& TABLE_SCOPING_EL)) { |
& TABLE_SCOPING_EL)) { |
| 5853 |
!!!cp ('t220'); |
!!!cp ('t220'); |
| 5855 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 5856 |
} |
} |
| 5857 |
|
|
| 5858 |
push @$active_formatting_elements, ['#marker', ''] |
push @$active_formatting_elements, ['#marker', ''] |
| 5859 |
if $token->{tag_name} eq 'caption'; |
if $token->{tag_name} eq 'caption'; |
| 5860 |
|
|
| 5861 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 5862 |
$self->{insertion_mode} = { |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 5863 |
caption => IN_CAPTION_IM, |
$self->{insertion_mode} = { |
| 5864 |
colgroup => IN_COLUMN_GROUP_IM, |
caption => IN_CAPTION_IM, |
| 5865 |
tbody => IN_TABLE_BODY_IM, |
colgroup => IN_COLUMN_GROUP_IM, |
| 5866 |
tfoot => IN_TABLE_BODY_IM, |
tbody => IN_TABLE_BODY_IM, |
| 5867 |
thead => IN_TABLE_BODY_IM, |
tfoot => IN_TABLE_BODY_IM, |
| 5868 |
}->{$token->{tag_name}}; |
thead => IN_TABLE_BODY_IM, |
| 5869 |
!!!next-token; |
}->{$token->{tag_name}}; |
| 5870 |
!!!nack ('t220.1'); |
!!!next-token; |
| 5871 |
next B; |
!!!nack ('t220.1'); |
| 5872 |
} else { |
next B; |
| 5873 |
die "$0: in table: <>: $token->{tag_name}"; |
} else { |
| 5874 |
} |
die "$0: in table: <>: $token->{tag_name}"; |
| 5875 |
|
} |
| 5876 |
} elsif ($token->{tag_name} eq 'table') { |
} elsif ($token->{tag_name} eq 'table') { |
| 5877 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 5878 |
text => $self->{open_elements}->[-1]->[0] |
text => $self->{open_elements}->[-1]->[0] |
| 5935 |
!!!cp ('t227.8'); |
!!!cp ('t227.8'); |
| 5936 |
## NOTE: This is a "as if in head" code clone. |
## NOTE: This is a "as if in head" code clone. |
| 5937 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
| 5938 |
|
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 5939 |
next B; |
next B; |
| 5940 |
} else { |
} else { |
| 5941 |
!!!cp ('t227.7'); |
!!!cp ('t227.7'); |
| 5946 |
!!!cp ('t227.6'); |
!!!cp ('t227.6'); |
| 5947 |
## NOTE: This is a "as if in head" code clone. |
## NOTE: This is a "as if in head" code clone. |
| 5948 |
$script_start_tag->(); |
$script_start_tag->(); |
| 5949 |
|
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 5950 |
next B; |
next B; |
| 5951 |
} else { |
} else { |
| 5952 |
!!!cp ('t227.5'); |
!!!cp ('t227.5'); |
| 5962 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 5963 |
|
|
| 5964 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 5965 |
|
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 5966 |
|
|
| 5967 |
## TODO: form element pointer |
## TODO: form element pointer |
| 5968 |
|
|
| 6292 |
} |
} |
| 6293 |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
| 6294 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6295 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6296 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 6297 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6298 |
!!!cp ('t260'); |
!!!cp ('t260'); |
| 6633 |
} |
} |
| 6634 |
} elsif ($self->{insertion_mode} & BODY_AFTER_IMS) { |
} elsif ($self->{insertion_mode} & BODY_AFTER_IMS) { |
| 6635 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6636 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6637 |
my $data = $1; |
my $data = $1; |
| 6638 |
## As if in body |
## As if in body |
| 6639 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 6650 |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 6651 |
!!!cp ('t301'); |
!!!cp ('t301'); |
| 6652 |
!!!parse-error (type => 'after html:#text', token => $token); |
!!!parse-error (type => 'after html:#text', token => $token); |
| 6653 |
|
# |
|
## Reprocess in the "after body" insertion mode. |
|
| 6654 |
} else { |
} else { |
| 6655 |
!!!cp ('t302'); |
!!!cp ('t302'); |
| 6656 |
|
## "after body" insertion mode |
| 6657 |
|
!!!parse-error (type => 'after body:#text', token => $token); |
| 6658 |
|
# |
| 6659 |
} |
} |
|
|
|
|
## "after body" insertion mode |
|
|
!!!parse-error (type => 'after body:#text', token => $token); |
|
| 6660 |
|
|
| 6661 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6662 |
## reprocess |
## reprocess |
| 6666 |
!!!cp ('t303'); |
!!!cp ('t303'); |
| 6667 |
!!!parse-error (type => 'after html', |
!!!parse-error (type => 'after html', |
| 6668 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 6669 |
|
# |
|
## Reprocess in the "after body" insertion mode. |
|
| 6670 |
} else { |
} else { |
| 6671 |
!!!cp ('t304'); |
!!!cp ('t304'); |
| 6672 |
|
## "after body" insertion mode |
| 6673 |
|
!!!parse-error (type => 'after body', |
| 6674 |
|
text => $token->{tag_name}, token => $token); |
| 6675 |
|
# |
| 6676 |
} |
} |
| 6677 |
|
|
|
## "after body" insertion mode |
|
|
!!!parse-error (type => 'after body', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
|
|
| 6678 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6679 |
!!!ack-later; |
!!!ack-later; |
| 6680 |
## reprocess |
## reprocess |
| 6685 |
!!!parse-error (type => 'after html:/', |
!!!parse-error (type => 'after html:/', |
| 6686 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 6687 |
|
|
| 6688 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6689 |
## Reprocess in the "after body" insertion mode. |
## Reprocess. |
| 6690 |
|
next B; |
| 6691 |
} else { |
} else { |
| 6692 |
!!!cp ('t306'); |
!!!cp ('t306'); |
| 6693 |
} |
} |
| 6725 |
} |
} |
| 6726 |
} elsif ($self->{insertion_mode} & FRAME_IMS) { |
} elsif ($self->{insertion_mode} & FRAME_IMS) { |
| 6727 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6728 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6729 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 6730 |
|
|
| 6731 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6735 |
} |
} |
| 6736 |
} |
} |
| 6737 |
|
|
| 6738 |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) { |
| 6739 |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
| 6740 |
!!!cp ('t311'); |
!!!cp ('t311'); |
| 6741 |
!!!parse-error (type => 'in frameset:#text', token => $token); |
!!!parse-error (type => 'in frameset:#text', token => $token); |
| 6864 |
} else { |
} else { |
| 6865 |
die "$0: $token->{type}: Unknown token type"; |
die "$0: $token->{type}: Unknown token type"; |
| 6866 |
} |
} |
|
|
|
|
## ISSUE: An issue in spec here |
|
| 6867 |
} else { |
} else { |
| 6868 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 6869 |
} |
} |
| 6881 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
| 6882 |
next B; |
next B; |
| 6883 |
} elsif ({ |
} elsif ({ |
| 6884 |
base => 1, link => 1, |
base => 1, command => 1, eventsource => 1, link => 1, |
| 6885 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 6886 |
!!!cp ('t334'); |
!!!cp ('t334'); |
| 6887 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 6888 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 6889 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; |
| 6890 |
!!!ack ('t334.1'); |
!!!ack ('t334.1'); |
| 6891 |
!!!next-token; |
!!!next-token; |
| 6892 |
next B; |
next B; |
| 6893 |
} elsif ($token->{tag_name} eq 'meta') { |
} elsif ($token->{tag_name} eq 'meta') { |
| 6894 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 6895 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 6896 |
my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
my $meta_el = pop @{$self->{open_elements}}; |
| 6897 |
|
|
| 6898 |
unless ($self->{confident}) { |
unless ($self->{confident}) { |
| 6899 |
if ($token->{attributes}->{charset}) { |
if ($token->{attributes}->{charset}) { |
| 6910 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 6911 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 6912 |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 6913 |
[\x09-\x0D\x20]*= |
[\x09\x0A\x0C\x0D\x20]*= |
| 6914 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 6915 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) { |
([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*)) |
| 6916 |
|
/x) { |
| 6917 |
!!!cp ('t336'); |
!!!cp ('t336'); |
| 6918 |
## NOTE: Whether the encoding is supported or not is handled |
## NOTE: Whether the encoding is supported or not is handled |
| 6919 |
## in the {change_encoding} callback. |
## in the {change_encoding} callback. |
| 6972 |
!!!next-token; |
!!!next-token; |
| 6973 |
next B; |
next B; |
| 6974 |
} elsif ({ |
} elsif ({ |
| 6975 |
address => 1, blockquote => 1, center => 1, dir => 1, |
## NOTE: Start tags for non-phrasing flow content elements |
| 6976 |
div => 1, dl => 1, fieldset => 1, |
|
| 6977 |
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, |
## NOTE: The normal one |
| 6978 |
menu => 1, ol => 1, p => 1, ul => 1, |
address => 1, article => 1, aside => 1, blockquote => 1, |
| 6979 |
|
center => 1, datagrid => 1, details => 1, dialog => 1, |
| 6980 |
|
dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1, |
| 6981 |
|
footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, |
| 6982 |
|
h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1, |
| 6983 |
|
section => 1, ul => 1, |
| 6984 |
|
## NOTE: As normal, but drops leading newline |
| 6985 |
pre => 1, listing => 1, |
pre => 1, listing => 1, |
| 6986 |
|
## NOTE: As normal, but interacts with the form element pointer |
| 6987 |
form => 1, |
form => 1, |
| 6988 |
|
|
| 6989 |
table => 1, |
table => 1, |
| 6990 |
hr => 1, |
hr => 1, |
| 6991 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7052 |
!!!next-token; |
!!!next-token; |
| 7053 |
} |
} |
| 7054 |
next B; |
next B; |
| 7055 |
} elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) { |
} elsif ($token->{tag_name} eq 'li') { |
| 7056 |
## has a p element in scope |
## NOTE: As normal, but imply </li> when there's another <li> ... |
| 7057 |
|
|
| 7058 |
|
## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>) |
| 7059 |
|
## Interpreted as <li><foo/></li><li/> (non-conforming) |
| 7060 |
|
## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7), |
| 7061 |
|
## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S), |
| 7062 |
|
## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S), |
| 7063 |
|
## object (Fx) |
| 7064 |
|
## Generate non-tree (non-conforming) |
| 7065 |
|
## basefont (IE7 (where basefont is non-void)), center (IE), |
| 7066 |
|
## form (IE), hn (IE) |
| 7067 |
|
## address, div, p (<li><foo><li> == <li><foo/></li><li/>) |
| 7068 |
|
## Interpreted as <li><foo><li/></foo></li> (non-conforming) |
| 7069 |
|
## div (Fx, S) |
| 7070 |
|
|
| 7071 |
|
my $non_optional; |
| 7072 |
|
my $i = -1; |
| 7073 |
|
|
| 7074 |
|
## 1. |
| 7075 |
|
for my $node (reverse @{$self->{open_elements}}) { |
| 7076 |
|
if ($node->[1] & LI_EL) { |
| 7077 |
|
## 2. (a) As if </li> |
| 7078 |
|
{ |
| 7079 |
|
## If no </li> - not applied |
| 7080 |
|
# |
| 7081 |
|
|
| 7082 |
|
## Otherwise |
| 7083 |
|
|
| 7084 |
|
## 1. generate implied end tags, except for </li> |
| 7085 |
|
# |
| 7086 |
|
|
| 7087 |
|
## 2. If current node != "li", parse error |
| 7088 |
|
if ($non_optional) { |
| 7089 |
|
!!!parse-error (type => 'not closed', |
| 7090 |
|
text => $non_optional->[0]->manakai_local_name, |
| 7091 |
|
token => $token); |
| 7092 |
|
!!!cp ('t355'); |
| 7093 |
|
} else { |
| 7094 |
|
!!!cp ('t356'); |
| 7095 |
|
} |
| 7096 |
|
|
| 7097 |
|
## 3. Pop |
| 7098 |
|
splice @{$self->{open_elements}}, $i; |
| 7099 |
|
} |
| 7100 |
|
|
| 7101 |
|
last; ## 2. (b) goto 5. |
| 7102 |
|
} elsif ( |
| 7103 |
|
## NOTE: not "formatting" and not "phrasing" |
| 7104 |
|
($node->[1] & SPECIAL_EL or |
| 7105 |
|
$node->[1] & SCOPING_EL) and |
| 7106 |
|
## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|. |
| 7107 |
|
|
| 7108 |
|
(not $node->[1] & ADDRESS_EL) & |
| 7109 |
|
(not $node->[1] & DIV_EL) & |
| 7110 |
|
(not $node->[1] & P_EL)) { |
| 7111 |
|
## 3. |
| 7112 |
|
!!!cp ('t357'); |
| 7113 |
|
last; ## goto 5. |
| 7114 |
|
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) { |
| 7115 |
|
!!!cp ('t358'); |
| 7116 |
|
# |
| 7117 |
|
} else { |
| 7118 |
|
!!!cp ('t359'); |
| 7119 |
|
$non_optional ||= $node; |
| 7120 |
|
# |
| 7121 |
|
} |
| 7122 |
|
## 4. |
| 7123 |
|
## goto 2. |
| 7124 |
|
$i--; |
| 7125 |
|
} |
| 7126 |
|
|
| 7127 |
|
## 5. (a) has a |p| element in scope |
| 7128 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 7129 |
if ($_->[1] & P_EL) { |
if ($_->[1] & P_EL) { |
| 7130 |
!!!cp ('t353'); |
!!!cp ('t353'); |
| 7131 |
|
|
| 7132 |
|
## NOTE: |<p><li>|, for example. |
| 7133 |
|
|
| 7134 |
!!!back-token; # <x> |
!!!back-token; # <x> |
| 7135 |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
| 7136 |
line => $token->{line}, column => $token->{column}}; |
line => $token->{line}, column => $token->{column}}; |
| 7140 |
last INSCOPE; |
last INSCOPE; |
| 7141 |
} |
} |
| 7142 |
} # INSCOPE |
} # INSCOPE |
| 7143 |
|
|
| 7144 |
## Step 1 |
## 5. (b) insert |
| 7145 |
|
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 7146 |
|
!!!nack ('t359.1'); |
| 7147 |
|
!!!next-token; |
| 7148 |
|
next B; |
| 7149 |
|
} elsif ($token->{tag_name} eq 'dt' or |
| 7150 |
|
$token->{tag_name} eq 'dd') { |
| 7151 |
|
## NOTE: As normal, but imply </dt> or </dd> when ... |
| 7152 |
|
|
| 7153 |
|
my $non_optional; |
| 7154 |
my $i = -1; |
my $i = -1; |
| 7155 |
my $node = $self->{open_elements}->[$i]; |
|
| 7156 |
my $li_or_dtdd = {li => {li => 1}, |
## 1. |
| 7157 |
dt => {dt => 1, dd => 1}, |
for my $node (reverse @{$self->{open_elements}}) { |
| 7158 |
dd => {dt => 1, dd => 1}}->{$token->{tag_name}}; |
if ($node->[1] & DT_EL or $node->[1] & DD_EL) { |
| 7159 |
LI: { |
## 2. (a) As if </li> |
| 7160 |
## Step 2 |
{ |
| 7161 |
if ($li_or_dtdd->{$node->[0]->manakai_local_name}) { |
## If no </li> - not applied |
| 7162 |
if ($i != -1) { |
# |
| 7163 |
!!!cp ('t355'); |
|
| 7164 |
!!!parse-error (type => 'not closed', |
## Otherwise |
| 7165 |
text => $self->{open_elements}->[-1]->[0] |
|
| 7166 |
->manakai_local_name, |
## 1. generate implied end tags, except for </dt> or </dd> |
| 7167 |
token => $token); |
# |
| 7168 |
} else { |
|
| 7169 |
!!!cp ('t356'); |
## 2. If current node != "dt"|"dd", parse error |
| 7170 |
|
if ($non_optional) { |
| 7171 |
|
!!!parse-error (type => 'not closed', |
| 7172 |
|
text => $non_optional->[0]->manakai_local_name, |
| 7173 |
|
token => $token); |
| 7174 |
|
!!!cp ('t355.1'); |
| 7175 |
|
} else { |
| 7176 |
|
!!!cp ('t356.1'); |
| 7177 |
|
} |
| 7178 |
|
|
| 7179 |
|
## 3. Pop |
| 7180 |
|
splice @{$self->{open_elements}}, $i; |
| 7181 |
} |
} |
| 7182 |
splice @{$self->{open_elements}}, $i; |
|
| 7183 |
last LI; |
last; ## 2. (b) goto 5. |
| 7184 |
|
} elsif ( |
| 7185 |
|
## NOTE: not "formatting" and not "phrasing" |
| 7186 |
|
($node->[1] & SPECIAL_EL or |
| 7187 |
|
$node->[1] & SCOPING_EL) and |
| 7188 |
|
## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|. |
| 7189 |
|
|
| 7190 |
|
(not $node->[1] & ADDRESS_EL) & |
| 7191 |
|
(not $node->[1] & DIV_EL) & |
| 7192 |
|
(not $node->[1] & P_EL)) { |
| 7193 |
|
## 3. |
| 7194 |
|
!!!cp ('t357.1'); |
| 7195 |
|
last; ## goto 5. |
| 7196 |
|
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) { |
| 7197 |
|
!!!cp ('t358.1'); |
| 7198 |
|
# |
| 7199 |
} else { |
} else { |
| 7200 |
!!!cp ('t357'); |
!!!cp ('t359.1'); |
| 7201 |
} |
$non_optional ||= $node; |
| 7202 |
|
# |
|
## Step 3 |
|
|
if (not ($node->[1] & FORMATTING_EL) and |
|
|
#not $phrasing_category->{$node->[1]} and |
|
|
($node->[1] & SPECIAL_EL or |
|
|
$node->[1] & SCOPING_EL) and |
|
|
not ($node->[1] & ADDRESS_EL) and |
|
|
not ($node->[1] & DIV_EL)) { |
|
|
!!!cp ('t358'); |
|
|
last LI; |
|
| 7203 |
} |
} |
| 7204 |
|
## 4. |
| 7205 |
!!!cp ('t359'); |
## goto 2. |
|
## Step 4 |
|
| 7206 |
$i--; |
$i--; |
| 7207 |
$node = $self->{open_elements}->[$i]; |
} |
| 7208 |
redo LI; |
|
| 7209 |
} # LI |
## 5. (a) has a |p| element in scope |
| 7210 |
|
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 7211 |
|
if ($_->[1] & P_EL) { |
| 7212 |
|
!!!cp ('t353.1'); |
| 7213 |
|
!!!back-token; # <x> |
| 7214 |
|
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
| 7215 |
|
line => $token->{line}, column => $token->{column}}; |
| 7216 |
|
next B; |
| 7217 |
|
} elsif ($_->[1] & SCOPING_EL) { |
| 7218 |
|
!!!cp ('t354.1'); |
| 7219 |
|
last INSCOPE; |
| 7220 |
|
} |
| 7221 |
|
} # INSCOPE |
| 7222 |
|
|
| 7223 |
|
## 5. (b) insert |
| 7224 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 7225 |
!!!nack ('t359.1'); |
!!!nack ('t359.2'); |
| 7226 |
!!!next-token; |
!!!next-token; |
| 7227 |
next B; |
next B; |
| 7228 |
} elsif ($token->{tag_name} eq 'plaintext') { |
} elsif ($token->{tag_name} eq 'plaintext') { |
| 7229 |
|
## NOTE: As normal, but effectively ends parsing |
| 7230 |
|
|
| 7231 |
## has a p element in scope |
## has a p element in scope |
| 7232 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 7233 |
if ($_->[1] & P_EL) { |
if ($_->[1] & P_EL) { |
| 7419 |
next B; |
next B; |
| 7420 |
} |
} |
| 7421 |
} elsif ($token->{tag_name} eq 'textarea') { |
} elsif ($token->{tag_name} eq 'textarea') { |
| 7422 |
my $tag_name = $token->{tag_name}; |
## Step 1 |
| 7423 |
my $el; |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
|
!!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token); |
|
| 7424 |
|
|
| 7425 |
|
## Step 2 |
| 7426 |
## TODO: $self->{form_element} if defined |
## TODO: $self->{form_element} if defined |
| 7427 |
|
|
| 7428 |
|
## Step 3 |
| 7429 |
|
$self->{ignore_newline} = 1; |
| 7430 |
|
|
| 7431 |
|
## Step 4 |
| 7432 |
|
## ISSUE: This step is wrong. (r2302 enbugged) |
| 7433 |
|
|
| 7434 |
|
## Step 5 |
| 7435 |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
| 7436 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 7437 |
|
|
| 7438 |
$insert->($el); |
## Step 6-7 |
| 7439 |
|
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
| 7440 |
my $text = ''; |
|
| 7441 |
!!!nack ('t392.1'); |
!!!nack ('t392.1'); |
| 7442 |
!!!next-token; |
!!!next-token; |
| 7443 |
if ($token->{type} == CHARACTER_TOKEN) { |
next B; |
| 7444 |
$token->{data} =~ s/^\x0A//; |
} elsif ($token->{tag_name} eq 'optgroup' or |
| 7445 |
unless (length $token->{data}) { |
$token->{tag_name} eq 'option') { |
| 7446 |
!!!cp ('t392'); |
## has an |option| element in scope |
| 7447 |
!!!next-token; |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 7448 |
} else { |
my $node = $self->{open_elements}->[$_]; |
| 7449 |
!!!cp ('t393'); |
if ($node->[1] & OPTION_EL) { |
| 7450 |
|
!!!cp ('t397.1'); |
| 7451 |
|
## NOTE: As if </option> |
| 7452 |
|
!!!back-token; # <option> or <optgroup> |
| 7453 |
|
$token = {type => END_TAG_TOKEN, tag_name => 'option', |
| 7454 |
|
line => $token->{line}, column => $token->{column}}; |
| 7455 |
|
next B; |
| 7456 |
|
} elsif ($node->[1] & SCOPING_EL) { |
| 7457 |
|
!!!cp ('t397.2'); |
| 7458 |
|
last INSCOPE; |
| 7459 |
} |
} |
| 7460 |
} else { |
} # INSCOPE |
| 7461 |
!!!cp ('t394'); |
|
| 7462 |
} |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 7463 |
while ($token->{type} == CHARACTER_TOKEN) { |
|
| 7464 |
!!!cp ('t395'); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 7465 |
$text .= $token->{data}; |
|
| 7466 |
!!!next-token; |
!!!nack ('t397.3'); |
|
} |
|
|
if (length $text) { |
|
|
!!!cp ('t396'); |
|
|
$el->manakai_append_text ($text); |
|
|
} |
|
|
|
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
|
|
|
|
if ($token->{type} == END_TAG_TOKEN and |
|
|
$token->{tag_name} eq $tag_name) { |
|
|
!!!cp ('t397'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
!!!cp ('t398'); |
|
|
!!!parse-error (type => 'in RCDATA:#eof', token => $token); |
|
|
} |
|
| 7467 |
!!!next-token; |
!!!next-token; |
| 7468 |
next B; |
redo B; |
| 7469 |
} elsif ($token->{tag_name} eq 'rt' or |
} elsif ($token->{tag_name} eq 'rt' or |
| 7470 |
$token->{tag_name} eq 'rp') { |
$token->{tag_name} eq 'rp') { |
| 7471 |
## has a |ruby| element in scope |
## has a |ruby| element in scope |
| 7513 |
|
|
| 7514 |
if ($self->{self_closing}) { |
if ($self->{self_closing}) { |
| 7515 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 7516 |
!!!ack ('t398.1'); |
!!!ack ('t398.6'); |
| 7517 |
} else { |
} else { |
| 7518 |
!!!cp ('t398.2'); |
!!!cp ('t398.7'); |
| 7519 |
$self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM; |
$self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM; |
| 7520 |
## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion |
## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion |
| 7521 |
## mode, "in body" (not "in foreign content") secondary insertion |
## mode, "in body" (not "in foreign content") secondary insertion |
| 7526 |
next B; |
next B; |
| 7527 |
} elsif ({ |
} elsif ({ |
| 7528 |
caption => 1, col => 1, colgroup => 1, frame => 1, |
caption => 1, col => 1, colgroup => 1, frame => 1, |
| 7529 |
frameset => 1, head => 1, option => 1, optgroup => 1, |
frameset => 1, head => 1, |
| 7530 |
tbody => 1, td => 1, tfoot => 1, th => 1, |
tbody => 1, td => 1, tfoot => 1, th => 1, |
| 7531 |
thead => 1, tr => 1, |
thead => 1, tr => 1, |
| 7532 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7537 |
!!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error. |
!!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error. |
| 7538 |
!!!next-token; |
!!!next-token; |
| 7539 |
next B; |
next B; |
| 7540 |
|
} elsif ($token->{tag_name} eq 'param' or |
| 7541 |
## ISSUE: An issue on HTML5 new elements in the spec. |
$token->{tag_name} eq 'source') { |
| 7542 |
|
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 7543 |
|
pop @{$self->{open_elements}}; |
| 7544 |
|
|
| 7545 |
|
!!!ack ('t398.5'); |
| 7546 |
|
!!!next-token; |
| 7547 |
|
redo B; |
| 7548 |
} else { |
} else { |
| 7549 |
if ($token->{tag_name} eq 'image') { |
if ($token->{tag_name} eq 'image') { |
| 7550 |
!!!cp ('t384'); |
!!!cp ('t384'); |
| 7567 |
!!!nack ('t380.1'); |
!!!nack ('t380.1'); |
| 7568 |
} elsif ({ |
} elsif ({ |
| 7569 |
b => 1, big => 1, em => 1, font => 1, i => 1, |
b => 1, big => 1, em => 1, font => 1, i => 1, |
| 7570 |
s => 1, small => 1, strile => 1, |
s => 1, small => 1, strike => 1, |
| 7571 |
strong => 1, tt => 1, u => 1, |
strong => 1, tt => 1, u => 1, |
| 7572 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7573 |
!!!cp ('t375'); |
!!!cp ('t375'); |
| 7580 |
!!!ack ('t388.2'); |
!!!ack ('t388.2'); |
| 7581 |
} elsif ({ |
} elsif ({ |
| 7582 |
area => 1, basefont => 1, bgsound => 1, br => 1, |
area => 1, basefont => 1, bgsound => 1, br => 1, |
| 7583 |
embed => 1, img => 1, param => 1, spacer => 1, wbr => 1, |
embed => 1, img => 1, spacer => 1, wbr => 1, |
|
#image => 1, |
|
| 7584 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7585 |
!!!cp ('t388.1'); |
!!!cp ('t388.1'); |
| 7586 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 7621 |
} |
} |
| 7622 |
} |
} |
| 7623 |
|
|
| 7624 |
!!!parse-error (type => 'start tag not allowed', |
## NOTE: |<marquee></body>|, |<svg><foreignobject></body>| |
| 7625 |
|
|
| 7626 |
|
!!!parse-error (type => 'unmatched end tag', |
| 7627 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 7628 |
## NOTE: Ignore the token. |
## NOTE: Ignore the token. |
| 7629 |
!!!next-token; |
!!!next-token; |
| 7650 |
## up-to-date, though it has same effect as speced. |
## up-to-date, though it has same effect as speced. |
| 7651 |
if (@{$self->{open_elements}} > 1 and |
if (@{$self->{open_elements}} > 1 and |
| 7652 |
$self->{open_elements}->[1]->[1] & BODY_EL) { |
$self->{open_elements}->[1]->[1] & BODY_EL) { |
|
## ISSUE: There is an issue in the spec. |
|
| 7653 |
unless ($self->{open_elements}->[-1]->[1] & BODY_EL) { |
unless ($self->{open_elements}->[-1]->[1] & BODY_EL) { |
| 7654 |
!!!cp ('t406'); |
!!!cp ('t406'); |
| 7655 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 7671 |
next B; |
next B; |
| 7672 |
} |
} |
| 7673 |
} elsif ({ |
} elsif ({ |
| 7674 |
address => 1, blockquote => 1, center => 1, dir => 1, |
## NOTE: End tags for non-phrasing flow content elements |
| 7675 |
div => 1, dl => 1, fieldset => 1, listing => 1, |
|
| 7676 |
menu => 1, ol => 1, pre => 1, ul => 1, |
## NOTE: The normal ones |
| 7677 |
|
address => 1, article => 1, aside => 1, blockquote => 1, |
| 7678 |
|
center => 1, datagrid => 1, details => 1, dialog => 1, |
| 7679 |
|
dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1, |
| 7680 |
|
footer => 1, header => 1, listing => 1, menu => 1, nav => 1, |
| 7681 |
|
ol => 1, pre => 1, section => 1, ul => 1, |
| 7682 |
|
|
| 7683 |
|
## NOTE: As normal, but ... optional tags |
| 7684 |
dd => 1, dt => 1, li => 1, |
dd => 1, dt => 1, li => 1, |
| 7685 |
|
|
| 7686 |
applet => 1, button => 1, marquee => 1, object => 1, |
applet => 1, button => 1, marquee => 1, object => 1, |
| 7687 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7688 |
|
## NOTE: Code for <li> start tags includes "as if </li>" code. |
| 7689 |
|
## Code for <dt> or <dd> start tags includes "as if </dt> or |
| 7690 |
|
## </dd>" code. |
| 7691 |
|
|
| 7692 |
## has an element in scope |
## has an element in scope |
| 7693 |
my $i; |
my $i; |
| 7694 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 7715 |
dd => ($token->{tag_name} ne 'dd'), |
dd => ($token->{tag_name} ne 'dd'), |
| 7716 |
dt => ($token->{tag_name} ne 'dt'), |
dt => ($token->{tag_name} ne 'dt'), |
| 7717 |
li => ($token->{tag_name} ne 'li'), |
li => ($token->{tag_name} ne 'li'), |
| 7718 |
|
option => 1, |
| 7719 |
|
optgroup => 1, |
| 7720 |
p => 1, |
p => 1, |
| 7721 |
rt => 1, |
rt => 1, |
| 7722 |
rp => 1, |
rp => 1, |
| 7749 |
!!!next-token; |
!!!next-token; |
| 7750 |
next B; |
next B; |
| 7751 |
} elsif ($token->{tag_name} eq 'form') { |
} elsif ($token->{tag_name} eq 'form') { |
| 7752 |
|
## NOTE: As normal, but interacts with the form element pointer |
| 7753 |
|
|
| 7754 |
undef $self->{form_element}; |
undef $self->{form_element}; |
| 7755 |
|
|
| 7756 |
## has an element in scope |
## has an element in scope |
| 7798 |
!!!next-token; |
!!!next-token; |
| 7799 |
next B; |
next B; |
| 7800 |
} elsif ({ |
} elsif ({ |
| 7801 |
|
## NOTE: As normal, except acts as a closer for any ... |
| 7802 |
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, |
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, |
| 7803 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7804 |
## has an element in scope |
## has an element in scope |
| 7844 |
!!!next-token; |
!!!next-token; |
| 7845 |
next B; |
next B; |
| 7846 |
} elsif ($token->{tag_name} eq 'p') { |
} elsif ($token->{tag_name} eq 'p') { |
| 7847 |
|
## NOTE: As normal, except </p> implies <p> and ... |
| 7848 |
|
|
| 7849 |
## has an element in scope |
## has an element in scope |
| 7850 |
|
my $non_optional; |
| 7851 |
my $i; |
my $i; |
| 7852 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 7853 |
my $node = $self->{open_elements}->[$_]; |
my $node = $self->{open_elements}->[$_]; |
| 7858 |
} elsif ($node->[1] & SCOPING_EL) { |
} elsif ($node->[1] & SCOPING_EL) { |
| 7859 |
!!!cp ('t411.1'); |
!!!cp ('t411.1'); |
| 7860 |
last INSCOPE; |
last INSCOPE; |
| 7861 |
|
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) { |
| 7862 |
|
## NOTE: |END_TAG_OPTIONAL_EL| includes "p" |
| 7863 |
|
!!!cp ('t411.2'); |
| 7864 |
|
# |
| 7865 |
|
} else { |
| 7866 |
|
!!!cp ('t411.3'); |
| 7867 |
|
$non_optional ||= $node; |
| 7868 |
|
# |
| 7869 |
} |
} |
| 7870 |
} # INSCOPE |
} # INSCOPE |
| 7871 |
|
|
| 7872 |
if (defined $i) { |
if (defined $i) { |
| 7873 |
if ($self->{open_elements}->[-1]->[0]->manakai_local_name |
## 1. Generate implied end tags |
| 7874 |
ne $token->{tag_name}) { |
# |
| 7875 |
|
|
| 7876 |
|
## 2. If current node != "p", parse error |
| 7877 |
|
if ($non_optional) { |
| 7878 |
!!!cp ('t412.1'); |
!!!cp ('t412.1'); |
| 7879 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 7880 |
text => $self->{open_elements}->[-1]->[0] |
text => $non_optional->[0]->manakai_local_name, |
|
->manakai_local_name, |
|
| 7881 |
token => $token); |
token => $token); |
| 7882 |
} else { |
} else { |
| 7883 |
!!!cp ('t414.1'); |
!!!cp ('t414.1'); |
| 7884 |
} |
} |
| 7885 |
|
|
| 7886 |
|
## 3. Pop |
| 7887 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 7888 |
} else { |
} else { |
| 7889 |
!!!cp ('t413.1'); |
!!!cp ('t413.1'); |
| 7903 |
} elsif ({ |
} elsif ({ |
| 7904 |
a => 1, |
a => 1, |
| 7905 |
b => 1, big => 1, em => 1, font => 1, i => 1, |
b => 1, big => 1, em => 1, font => 1, i => 1, |
| 7906 |
nobr => 1, s => 1, small => 1, strile => 1, |
nobr => 1, s => 1, small => 1, strike => 1, |
| 7907 |
strong => 1, tt => 1, u => 1, |
strong => 1, tt => 1, u => 1, |
| 7908 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7909 |
!!!cp ('t427'); |
!!!cp ('t427'); |
| 7924 |
## Ignore the token. |
## Ignore the token. |
| 7925 |
!!!next-token; |
!!!next-token; |
| 7926 |
next B; |
next B; |
|
} elsif ({ |
|
|
caption => 1, col => 1, colgroup => 1, frame => 1, |
|
|
frameset => 1, head => 1, option => 1, optgroup => 1, |
|
|
tbody => 1, td => 1, tfoot => 1, th => 1, |
|
|
thead => 1, tr => 1, |
|
|
area => 1, basefont => 1, bgsound => 1, |
|
|
embed => 1, hr => 1, iframe => 1, image => 1, |
|
|
img => 1, input => 1, isindex => 1, noembed => 1, |
|
|
noframes => 1, param => 1, select => 1, spacer => 1, |
|
|
table => 1, textarea => 1, wbr => 1, |
|
|
noscript => 0, ## TODO: if scripting is enabled |
|
|
}->{$token->{tag_name}}) { |
|
|
!!!cp ('t429'); |
|
|
!!!parse-error (type => 'unmatched end tag', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
next B; |
|
|
|
|
|
## ISSUE: Issue on HTML5 new elements in spec |
|
|
|
|
| 7927 |
} else { |
} else { |
| 7928 |
|
if ($token->{tag_name} eq 'sarcasm') { |
| 7929 |
|
sleep 0.001; # take a deep breath |
| 7930 |
|
} |
| 7931 |
|
|
| 7932 |
## Step 1 |
## Step 1 |
| 7933 |
my $node_i = -1; |
my $node_i = -1; |
| 7934 |
my $node = $self->{open_elements}->[$node_i]; |
my $node = $self->{open_elements}->[$node_i]; |
| 7935 |
|
|
| 7936 |
## Step 2 |
## Step 2 |
| 7937 |
S2: { |
S2: { |
| 7938 |
if ($node->[0]->manakai_local_name eq $token->{tag_name}) { |
my $node_tag_name = $node->[0]->manakai_local_name; |
| 7939 |
|
$node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names |
| 7940 |
|
if ($node_tag_name eq $token->{tag_name}) { |
| 7941 |
## Step 1 |
## Step 1 |
| 7942 |
## generate implied end tags |
## generate implied end tags |
| 7943 |
while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) { |
while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) { |
| 7950 |
} |
} |
| 7951 |
|
|
| 7952 |
## Step 2 |
## Step 2 |
| 7953 |
if ($self->{open_elements}->[-1]->[0]->manakai_local_name |
my $current_tag_name |
| 7954 |
ne $token->{tag_name}) { |
= $self->{open_elements}->[-1]->[0]->manakai_local_name; |
| 7955 |
|
$current_tag_name =~ tr/A-Z/a-z/; |
| 7956 |
|
if ($current_tag_name ne $token->{tag_name}) { |
| 7957 |
!!!cp ('t431'); |
!!!cp ('t431'); |
| 7958 |
## NOTE: <x><y></x> |
## NOTE: <x><y></x> |
| 7959 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 7981 |
## Ignore the token |
## Ignore the token |
| 7982 |
!!!next-token; |
!!!next-token; |
| 7983 |
last S2; |
last S2; |
|
} |
|
| 7984 |
|
|
| 7985 |
|
## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera |
| 7986 |
|
## 9.27, "a" is a child of <dd> (conforming). In |
| 7987 |
|
## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7, |
| 7988 |
|
## "a" is a child of both <body> and <dd>. |
| 7989 |
|
} |
| 7990 |
|
|
| 7991 |
!!!cp ('t434'); |
!!!cp ('t434'); |
| 7992 |
} |
} |
| 7993 |
|
|
| 8223 |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
| 8224 |
|
|
| 8225 |
undef $p->{head_element}; |
undef $p->{head_element}; |
| 8226 |
|
undef $p->{head_element_inserted}; |
| 8227 |
|
|
| 8228 |
## Step 6 # MUST |
## Step 6 # MUST |
| 8229 |
$p->_reset_insertion_mode; |
$p->_reset_insertion_mode; |