| 179 |
dt => DTDD_EL, |
dt => DTDD_EL, |
| 180 |
em => FORMATTING_EL, |
em => FORMATTING_EL, |
| 181 |
embed => MISC_SPECIAL_EL, |
embed => MISC_SPECIAL_EL, |
|
eventsource => MISC_SPECIAL_EL, |
|
| 182 |
fieldset => MISC_SPECIAL_EL, |
fieldset => MISC_SPECIAL_EL, |
| 183 |
figure => MISC_SPECIAL_EL, |
figure => MISC_SPECIAL_EL, |
| 184 |
font => FORMATTING_EL, |
font => FORMATTING_EL, |
| 202 |
#image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec. |
#image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec. |
| 203 |
input => MISC_SPECIAL_EL, |
input => MISC_SPECIAL_EL, |
| 204 |
isindex => MISC_SPECIAL_EL, |
isindex => MISC_SPECIAL_EL, |
| 205 |
|
## XXX keygen? (Whether a void element is in Special or not does not |
| 206 |
|
## affect to the processing, however.) |
| 207 |
li => LI_EL, |
li => LI_EL, |
| 208 |
link => MISC_SPECIAL_EL, |
link => MISC_SPECIAL_EL, |
| 209 |
listing => MISC_SPECIAL_EL, |
listing => MISC_SPECIAL_EL, |
| 912 |
|
|
| 913 |
INITIAL: { |
INITIAL: { |
| 914 |
if ($token->{type} == DOCTYPE_TOKEN) { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 915 |
## NOTE: Conformance checkers MAY, instead of reporting "not HTML5" |
## NOTE: Conformance checkers MAY, instead of reporting "not |
| 916 |
## error, switch to a conformance checking mode for another |
## HTML5" error, switch to a conformance checking mode for |
| 917 |
## language. |
## another language. (We don't support such mode switchings; it |
| 918 |
|
## is nonsense to do anything different from what browsers do.) |
| 919 |
my $doctype_name = $token->{name}; |
my $doctype_name = $token->{name}; |
| 920 |
$doctype_name = '' unless defined $doctype_name; |
$doctype_name = '' unless defined $doctype_name; |
| 921 |
$doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive |
my $doctype = $self->{document}->create_document_type_definition |
| 922 |
if (not defined $token->{name} or # <!DOCTYPE> |
($doctype_name); |
| 923 |
defined $token->{sysid}) { |
|
| 924 |
|
$doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive |
| 925 |
|
if ($doctype_name ne 'html') { |
| 926 |
!!!cp ('t1'); |
!!!cp ('t1'); |
| 927 |
!!!parse-error (type => 'not HTML5', token => $token); |
!!!parse-error (type => 'not HTML5', token => $token); |
| 928 |
} elsif ($doctype_name ne 'HTML') { |
} elsif (defined $token->{pubid}) { |
| 929 |
!!!cp ('t2'); |
!!!cp ('t2'); |
| 930 |
|
## XXX Obsolete permitted DOCTYPEs |
| 931 |
!!!parse-error (type => 'not HTML5', token => $token); |
!!!parse-error (type => 'not HTML5', token => $token); |
| 932 |
} elsif (defined $token->{pubid}) { |
} elsif (defined $token->{sysid}) { |
| 933 |
if ($token->{pubid} eq 'XSLT-compat') { |
if ($token->{sysid} eq 'about:legacy-compat') { |
| 934 |
!!!cp ('t1.2'); |
!!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat"> |
| 935 |
!!!parse-error (type => 'XSLT-compat', token => $token, |
!!!parse-error (type => 'XSLT-compat', token => $token, |
| 936 |
level => $self->{level}->{should}); |
level => $self->{level}->{should}); |
| 937 |
} else { |
} else { |
| 938 |
!!!parse-error (type => 'not HTML5', token => $token); |
!!!parse-error (type => 'not HTML5', token => $token); |
| 939 |
} |
} |
| 940 |
} else { |
} else { ## <!DOCTYPE HTML> |
| 941 |
!!!cp ('t3'); |
!!!cp ('t3'); |
| 942 |
# |
# |
| 943 |
} |
} |
| 944 |
|
|
|
my $doctype = $self->{document}->create_document_type_definition |
|
|
($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)? |
|
| 945 |
## NOTE: Default value for both |public_id| and |system_id| attributes |
## NOTE: Default value for both |public_id| and |system_id| attributes |
| 946 |
## are empty strings, so that we don't set any value in missing cases. |
## are empty strings, so that we don't set any value in missing cases. |
| 947 |
$doctype->public_id ($token->{pubid}) if defined $token->{pubid}; |
$doctype->public_id ($token->{pubid}) if defined $token->{pubid}; |
| 948 |
$doctype->system_id ($token->{sysid}) if defined $token->{sysid}; |
$doctype->system_id ($token->{sysid}) if defined $token->{sysid}; |
| 949 |
|
|
| 950 |
## NOTE: Other DocumentType attributes are null or empty lists. |
## NOTE: Other DocumentType attributes are null or empty lists. |
| 951 |
## In Firefox3, |internalSubset| attribute is set to the empty |
## In Firefox3, |internalSubset| attribute is set to the empty |
| 952 |
## string, while |null| is an allowed value for the attribute |
## string, while |null| is an allowed value for the attribute |
| 953 |
## according to DOM3 Core. |
## according to DOM3 Core. |
| 954 |
$self->{document}->append_child ($doctype); |
$self->{document}->append_child ($doctype); |
| 955 |
|
|
| 956 |
if ($token->{quirks} or $doctype_name ne 'HTML') { |
if ($token->{quirks} or $doctype_name ne 'html') { |
| 957 |
!!!cp ('t4'); |
!!!cp ('t4'); |
| 958 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
| 959 |
} elsif (defined $token->{pubid}) { |
} elsif (defined $token->{pubid}) { |
| 1439 |
}; # $script_start_tag |
}; # $script_start_tag |
| 1440 |
|
|
| 1441 |
## NOTE: $open_tables->[-1]->[0] is the "current table" element node. |
## NOTE: $open_tables->[-1]->[0] is the "current table" element node. |
| 1442 |
## NOTE: $open_tables->[-1]->[1] is the "tainted" flag. |
## NOTE: $open_tables->[-1]->[1] is the "tainted" flag (OBSOLETE; unused). |
| 1443 |
## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted. |
## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted. |
| 1444 |
my $open_tables = [[$self->{open_elements}->[0]->[0]]]; |
my $open_tables = [[$self->{open_elements}->[0]->[0]]]; |
| 1445 |
|
|
| 1735 |
## document.write ("b")</script>| |
## document.write ("b")</script>| |
| 1736 |
|
|
| 1737 |
B: while (1) { |
B: while (1) { |
| 1738 |
|
|
| 1739 |
|
## The "in table text" insertion mode. |
| 1740 |
|
if ($self->{insertion_mode} & TABLE_IMS and |
| 1741 |
|
not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
| 1742 |
|
not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) { |
| 1743 |
|
C: { |
| 1744 |
|
my $s; |
| 1745 |
|
if ($token->{type} == CHARACTER_TOKEN) { |
| 1746 |
|
!!!cp ('t194'); |
| 1747 |
|
$self->{pending_chars} ||= []; |
| 1748 |
|
push @{$self->{pending_chars}}, $token; |
| 1749 |
|
!!!next-token; |
| 1750 |
|
next B; |
| 1751 |
|
} else { |
| 1752 |
|
if ($self->{pending_chars}) { |
| 1753 |
|
$s = join '', map { $_->{data} } @{$self->{pending_chars}}; |
| 1754 |
|
delete $self->{pending_chars}; |
| 1755 |
|
if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) { |
| 1756 |
|
!!!cp ('t195'); |
| 1757 |
|
# |
| 1758 |
|
} else { |
| 1759 |
|
!!!cp ('t195.1'); |
| 1760 |
|
#$self->{open_elements}->[-1]->[0]->manakai_append_text ($s); |
| 1761 |
|
$self->{open_elements}->[-1]->[0]->append_child |
| 1762 |
|
($self->{document}->create_text_node ($s)); |
| 1763 |
|
last C; |
| 1764 |
|
} |
| 1765 |
|
} else { |
| 1766 |
|
!!!cp ('t195.2'); |
| 1767 |
|
last C; |
| 1768 |
|
} |
| 1769 |
|
} |
| 1770 |
|
|
| 1771 |
|
## Foster parenting |
| 1772 |
|
!!!parse-error (type => 'in table:#text', token => $token); |
| 1773 |
|
|
| 1774 |
|
## NOTE: As if in body, but insert into the foster parent element. |
| 1775 |
|
$reconstruct_active_formatting_elements->($insert_to_foster); |
| 1776 |
|
|
| 1777 |
|
if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) { |
| 1778 |
|
# MUST |
| 1779 |
|
my $foster_parent_element; |
| 1780 |
|
my $next_sibling; |
| 1781 |
|
#my $prev_sibling; |
| 1782 |
|
OE: for (reverse 0..$#{$self->{open_elements}}) { |
| 1783 |
|
if ($self->{open_elements}->[$_]->[1] == TABLE_EL) { |
| 1784 |
|
my $parent = $self->{open_elements}->[$_]->[0]->parent_node; |
| 1785 |
|
if (defined $parent and $parent->node_type == 1) { |
| 1786 |
|
$foster_parent_element = $parent; |
| 1787 |
|
!!!cp ('t196'); |
| 1788 |
|
$next_sibling = $self->{open_elements}->[$_]->[0]; |
| 1789 |
|
# $prev_sibling = $next_sibling->previous_sibling; |
| 1790 |
|
# |
| 1791 |
|
} else { |
| 1792 |
|
!!!cp ('t197'); |
| 1793 |
|
$foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; |
| 1794 |
|
# $prev_sibling = $foster_parent_element->last_child; |
| 1795 |
|
# |
| 1796 |
|
} |
| 1797 |
|
last OE; |
| 1798 |
|
} |
| 1799 |
|
} # OE |
| 1800 |
|
$foster_parent_element = $self->{open_elements}->[0]->[0] #and |
| 1801 |
|
#$prev_sibling = $foster_parent_element->last_child |
| 1802 |
|
unless defined $foster_parent_element; |
| 1803 |
|
#undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted |
| 1804 |
|
#if (defined $prev_sibling and |
| 1805 |
|
# $prev_sibling->node_type == 3) { |
| 1806 |
|
# !!! cp ('t198'); |
| 1807 |
|
# $prev_sibling->manakai_append_text ($s); |
| 1808 |
|
#} else { |
| 1809 |
|
!!!cp ('t199'); |
| 1810 |
|
$foster_parent_element->insert_before |
| 1811 |
|
($self->{document}->create_text_node ($s), $next_sibling); |
| 1812 |
|
#} |
| 1813 |
|
$open_tables->[-1]->[1] = 1; # tainted |
| 1814 |
|
$open_tables->[-1]->[2] = 1; # ~node inserted |
| 1815 |
|
} else { |
| 1816 |
|
## NOTE: Fragment case or in a foster parent'ed element |
| 1817 |
|
## (e.g. |<table><span>a|). In fragment case, whether the |
| 1818 |
|
## character is appended to existing node or a new node is |
| 1819 |
|
## created is irrelevant, since the foster parent'ed nodes |
| 1820 |
|
## are discarded and fragment parsing does not invoke any |
| 1821 |
|
## script. |
| 1822 |
|
!!!cp ('t200'); |
| 1823 |
|
$self->{open_elements}->[-1]->[0]->manakai_append_text ($s); |
| 1824 |
|
} |
| 1825 |
|
} # C |
| 1826 |
|
} # TABLE_IMS |
| 1827 |
|
|
| 1828 |
if ($token->{type} == DOCTYPE_TOKEN) { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 1829 |
!!!cp ('t73'); |
!!!cp ('t73'); |
| 1830 |
!!!parse-error (type => 'in html:#DOCTYPE', token => $token); |
!!!parse-error (type => 'in html:#DOCTYPE', token => $token); |
| 2243 |
!!!ack ('t103.1'); |
!!!ack ('t103.1'); |
| 2244 |
!!!next-token; |
!!!next-token; |
| 2245 |
next B; |
next B; |
| 2246 |
} elsif ($token->{tag_name} eq 'command' or |
} elsif ($token->{tag_name} eq 'command') { |
|
$token->{tag_name} eq 'eventsource') { |
|
| 2247 |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2248 |
## NOTE: If the insertion mode at the time of the emission |
## NOTE: If the insertion mode at the time of the emission |
| 2249 |
## of the token was "before head", $self->{insertion_mode} |
## of the token was "before head", $self->{insertion_mode} |
| 3111 |
$insert = $insert_to_current; |
$insert = $insert_to_current; |
| 3112 |
# |
# |
| 3113 |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
| 3114 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == START_TAG_TOKEN) { |
|
if (not $open_tables->[-1]->[1] and # tainted |
|
|
$token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
|
|
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
|
|
|
|
|
unless (length $token->{data}) { |
|
|
!!!cp ('t194'); |
|
|
!!!next-token; |
|
|
next B; |
|
|
} else { |
|
|
!!!cp ('t195'); |
|
|
} |
|
|
} |
|
|
|
|
|
!!!parse-error (type => 'in table:#text', token => $token); |
|
|
|
|
|
## NOTE: As if in body, but insert into the foster parent element. |
|
|
$reconstruct_active_formatting_elements->($insert_to_foster); |
|
|
|
|
|
if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) { |
|
|
# MUST |
|
|
my $foster_parent_element; |
|
|
my $next_sibling; |
|
|
my $prev_sibling; |
|
|
OE: for (reverse 0..$#{$self->{open_elements}}) { |
|
|
if ($self->{open_elements}->[$_]->[1] == TABLE_EL) { |
|
|
my $parent = $self->{open_elements}->[$_]->[0]->parent_node; |
|
|
if (defined $parent and $parent->node_type == 1) { |
|
|
$foster_parent_element = $parent; |
|
|
!!!cp ('t196'); |
|
|
$next_sibling = $self->{open_elements}->[$_]->[0]; |
|
|
$prev_sibling = $next_sibling->previous_sibling; |
|
|
# |
|
|
} else { |
|
|
!!!cp ('t197'); |
|
|
$foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; |
|
|
$prev_sibling = $foster_parent_element->last_child; |
|
|
# |
|
|
} |
|
|
last OE; |
|
|
} |
|
|
} # OE |
|
|
$foster_parent_element = $self->{open_elements}->[0]->[0] and |
|
|
$prev_sibling = $foster_parent_element->last_child |
|
|
unless defined $foster_parent_element; |
|
|
undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted |
|
|
if (defined $prev_sibling and |
|
|
$prev_sibling->node_type == 3) { |
|
|
!!!cp ('t198'); |
|
|
$prev_sibling->manakai_append_text ($token->{data}); |
|
|
} else { |
|
|
!!!cp ('t199'); |
|
|
$foster_parent_element->insert_before |
|
|
($self->{document}->create_text_node ($token->{data}), |
|
|
$next_sibling); |
|
|
} |
|
|
$open_tables->[-1]->[1] = 1; # tainted |
|
|
$open_tables->[-1]->[2] = 1; # ~node inserted |
|
|
} else { |
|
|
## NOTE: Fragment case or in a foster parent'ed element |
|
|
## (e.g. |<table><span>a|). In fragment case, whether the |
|
|
## character is appended to existing node or a new node is |
|
|
## created is irrelevant, since the foster parent'ed nodes |
|
|
## are discarded and fragment parsing does not invoke any |
|
|
## script. |
|
|
!!!cp ('t200'); |
|
|
$self->{open_elements}->[-1]->[0]->manakai_append_text |
|
|
($token->{data}); |
|
|
} |
|
|
|
|
|
!!!next-token; |
|
|
next B; |
|
|
} elsif ($token->{type} == START_TAG_TOKEN) { |
|
| 3115 |
if ({ |
if ({ |
| 3116 |
tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM), |
tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM), |
| 3117 |
th => 1, td => 1, |
th => 1, td => 1, |
| 3379 |
!!!ack-later; |
!!!ack-later; |
| 3380 |
next B; |
next B; |
| 3381 |
} elsif ($token->{tag_name} eq 'style') { |
} elsif ($token->{tag_name} eq 'style') { |
| 3382 |
if (not $open_tables->[-1]->[1]) { # tainted |
!!!cp ('t227.8'); |
| 3383 |
!!!cp ('t227.8'); |
## NOTE: This is a "as if in head" code clone. |
| 3384 |
## NOTE: This is a "as if in head" code clone. |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
| 3385 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 3386 |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
next B; |
|
next B; |
|
|
} else { |
|
|
!!!cp ('t227.7'); |
|
|
# |
|
|
} |
|
| 3387 |
} elsif ($token->{tag_name} eq 'script') { |
} elsif ($token->{tag_name} eq 'script') { |
| 3388 |
if (not $open_tables->[-1]->[1]) { # tainted |
!!!cp ('t227.6'); |
| 3389 |
!!!cp ('t227.6'); |
## NOTE: This is a "as if in head" code clone. |
| 3390 |
## NOTE: This is a "as if in head" code clone. |
$script_start_tag->(); |
| 3391 |
$script_start_tag->(); |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 3392 |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
next B; |
|
next B; |
|
|
} else { |
|
|
!!!cp ('t227.5'); |
|
|
# |
|
|
} |
|
| 3393 |
} elsif ($token->{tag_name} eq 'input') { |
} elsif ($token->{tag_name} eq 'input') { |
| 3394 |
if (not $open_tables->[-1]->[1]) { # tainted |
if ($token->{attributes}->{type}) { |
| 3395 |
if ($token->{attributes}->{type}) { ## TODO: case |
my $type = $token->{attributes}->{type}->{value}; |
| 3396 |
my $type = lc $token->{attributes}->{type}->{value}; |
$type =~ tr/A-Z/a-z/; ## ASCII case-insensitive. |
| 3397 |
if ($type eq 'hidden') { |
if ($type eq 'hidden') { |
| 3398 |
!!!cp ('t227.3'); |
!!!cp ('t227.3'); |
| 3399 |
!!!parse-error (type => 'in table', |
!!!parse-error (type => 'in table', |
| 3400 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 3401 |
|
|
| 3402 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 3403 |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
$open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted |
| 3404 |
|
|
| 3405 |
## TODO: form element pointer |
## TODO: form element pointer |
| 3406 |
|
|
| 3407 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 3408 |
|
|
| 3409 |
!!!next-token; |
!!!next-token; |
| 3410 |
!!!ack ('t227.2.1'); |
!!!ack ('t227.2.1'); |
| 3411 |
next B; |
next B; |
|
} else { |
|
|
!!!cp ('t227.2'); |
|
|
# |
|
|
} |
|
| 3412 |
} else { |
} else { |
| 3413 |
!!!cp ('t227.1'); |
!!!cp ('t227.1'); |
| 3414 |
# |
# |
| 3921 |
## Reprocess the token. |
## Reprocess the token. |
| 3922 |
next B; |
next B; |
| 3923 |
} |
} |
| 3924 |
|
} elsif ($token->{tag_name} eq 'script') { |
| 3925 |
|
!!!cp ('t281.3'); |
| 3926 |
|
## NOTE: This is an "as if in head" code clone |
| 3927 |
|
$script_start_tag->(); |
| 3928 |
|
next B; |
| 3929 |
} else { |
} else { |
| 3930 |
!!!cp ('t282'); |
!!!cp ('t282'); |
| 3931 |
!!!parse-error (type => 'in select', |
!!!parse-error (type => 'in select', |
| 4337 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
| 4338 |
next B; |
next B; |
| 4339 |
} elsif ({ |
} elsif ({ |
| 4340 |
base => 1, command => 1, eventsource => 1, link => 1, |
base => 1, command => 1, link => 1, |
| 4341 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 4342 |
!!!cp ('t334'); |
!!!cp ('t334'); |
| 4343 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 5049 |
} elsif ({ |
} elsif ({ |
| 5050 |
area => 1, basefont => 1, bgsound => 1, br => 1, |
area => 1, basefont => 1, bgsound => 1, br => 1, |
| 5051 |
embed => 1, img => 1, spacer => 1, wbr => 1, |
embed => 1, img => 1, spacer => 1, wbr => 1, |
| 5052 |
|
keygen => 1, |
| 5053 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 5054 |
!!!cp ('t388.1'); |
!!!cp ('t388.1'); |
| 5055 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |