|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
## NOTE: Ignore the token.
$token = $self->_get_next_token;
next B;
} # INSCOPE
## 2. If unclosed elements:
for (@{$self->{open_elements}}) {
unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
$_->[1] == OPTGROUP_EL ||
$_->[1] == OPTION_EL ||
$_->[1] == RUBY_COMPONENT_EL) {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed',
text => $_->[0]->manakai_local_name,
token => $token);
last;
} else {
}
}
## 3. Switch the insertion mode.
$self->{insertion_mode} = AFTER_BODY_IM;
$token = $self->_get_next_token;
next B;
} elsif ($token->{tag_name} eq 'html') {
## TODO: Update this code. It seems that the code below is not
## up-to-date, though it has same effect as speced.
if (@{$self->{open_elements}} > 1 and
$self->{open_elements}->[1]->[1] == BODY_EL) {
unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed',
text => $self->{open_elements}->[1]->[0]
->manakai_local_name,
token => $token);
} else {
}
$self->{insertion_mode} = AFTER_BODY_IM;
## reprocess
next B;
} else {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
## Ignore the token
$token = $self->_get_next_token;
next B;
}
} elsif ({
## NOTE: End tags for non-phrasing flow content elements
## NOTE: The normal ones
address => 1, article => 1, aside => 1, blockquote => 1,
center => 1, datagrid => 1, details => 1, dialog => 1,
dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
ol => 1, pre => 1, section => 1, ul => 1,
## NOTE: As normal, but ... optional tags
dd => 1, dt => 1, li => 1,
applet => 1, button => 1, marquee => 1, object => 1,
}->{$token->{tag_name}}) {
## NOTE: Code for start tags includes "as if " code.
## Code for or start tags includes "as if or
## " code.
## has an element in scope
my $i;
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
my $node = $self->{open_elements}->[$_];
if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
$i = $_;
last INSCOPE;
} elsif ($node->[1] & SCOPING_EL) {
last INSCOPE;
}
} # INSCOPE
unless (defined $i) { # has an element in scope
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
## NOTE: Ignore the token.
} else {
## Step 1. generate implied end tags
while ({
## END_TAG_OPTIONAL_EL
dd => ($token->{tag_name} ne 'dd'),
dt => ($token->{tag_name} ne 'dt'),
li => ($token->{tag_name} ne 'li'),
option => 1,
optgroup => 1,
p => 1,
rt => 1,
rp => 1,
}->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
pop @{$self->{open_elements}};
}
## Step 2.
if ($self->{open_elements}->[-1]->[0]->manakai_local_name
ne $token->{tag_name}) {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed',
text => $self->{open_elements}->[-1]->[0]
->manakai_local_name,
token => $token);
} else {
}
## Step 3.
splice @{$self->{open_elements}}, $i;
## Step 4.
$clear_up_to_marker->()
if {
applet => 1, button => 1, marquee => 1, object => 1,
}->{$token->{tag_name}};
}
$token = $self->_get_next_token;
next B;
} elsif ($token->{tag_name} eq 'form') {
## NOTE: As normal, but interacts with the form element pointer
undef $self->{form_element};
## has an element in scope
my $i;
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
my $node = $self->{open_elements}->[$_];
if ($node->[1] == FORM_EL) {
$i = $_;
last INSCOPE;
} elsif ($node->[1] & SCOPING_EL) {
last INSCOPE;
}
} # INSCOPE
unless (defined $i) { # has an element in scope
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
## NOTE: Ignore the token.
} else {
## Step 1. generate implied end tags
while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
pop @{$self->{open_elements}};
}
## Step 2.
if ($self->{open_elements}->[-1]->[0]->manakai_local_name
ne $token->{tag_name}) {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed',
text => $self->{open_elements}->[-1]->[0]
->manakai_local_name,
token => $token);
} else {
}
## Step 3.
splice @{$self->{open_elements}}, $i;
}
$token = $self->_get_next_token;
next B;
} elsif ({
## NOTE: As normal, except acts as a closer for any ...
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
}->{$token->{tag_name}}) {
## has an element in scope
my $i;
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
my $node = $self->{open_elements}->[$_];
if ($node->[1] == HEADING_EL) {
$i = $_;
last INSCOPE;
} elsif ($node->[1] & SCOPING_EL) {
last INSCOPE;
}
} # INSCOPE
unless (defined $i) { # has an element in scope
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
## NOTE: Ignore the token.
} else {
## Step 1. generate implied end tags
while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
pop @{$self->{open_elements}};
}
## Step 2.
if ($self->{open_elements}->[-1]->[0]->manakai_local_name
ne $token->{tag_name}) {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
} else {
}
## Step 3.
splice @{$self->{open_elements}}, $i;
}
$token = $self->_get_next_token;
next B;
} elsif ($token->{tag_name} eq 'p') {
## NOTE: As normal, except implies and ...
## has an element in scope
my $non_optional;
my $i;
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
my $node = $self->{open_elements}->[$_];
if ($node->[1] == P_EL) {
$i = $_;
last INSCOPE;
} elsif ($node->[1] & SCOPING_EL) {
last INSCOPE;
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
#
} else {
$non_optional ||= $node;
#
}
} # INSCOPE
if (defined $i) {
## 1. Generate implied end tags
#
## 2. If current node != "p", parse error
if ($non_optional) {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed',
text => $non_optional->[0]->manakai_local_name,
token => $token);
} else {
}
## 3. Pop
splice @{$self->{open_elements}}, $i;
} else {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
## As if
, then reprocess the current token
my $el;
$el = $self->{document}->create_element_ns
($HTML_NS, [undef, 'p']);
$el->set_user_data (manakai_source_line => $token->{line})
if defined $token->{line};
$el->set_user_data (manakai_source_column => $token->{column})
if defined $token->{column};
$insert->($el);
## NOTE: Not inserted into |$self->{open_elements}|.
}
$token = $self->_get_next_token;
next B;
} elsif ({
a => 1,
b => 1, big => 1, em => 1, font => 1, i => 1,
nobr => 1, s => 1, small => 1, strike => 1,
strong => 1, tt => 1, u => 1,
}->{$token->{tag_name}}) {
$formatting_end_tag->($token);
next B;
} elsif ($token->{tag_name} eq 'br') {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => 'br', token => $token);
## As if
$reconstruct_active_formatting_elements->($insert_to_current);
my $el;
$el = $self->{document}->create_element_ns
($HTML_NS, [undef, 'br']);
$el->set_user_data (manakai_source_line => $token->{line})
if defined $token->{line};
$el->set_user_data (manakai_source_column => $token->{column})
if defined $token->{column};
$insert->($el);
## Ignore the token.
$token = $self->_get_next_token;
next B;
} else {
if ($token->{tag_name} eq 'sarcasm') {
sleep 0.001; # take a deep breath
}
## Step 1
my $node_i = -1;
my $node = $self->{open_elements}->[$node_i];
## Step 2
S2: {
my $node_tag_name = $node->[0]->manakai_local_name;
$node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
if ($node_tag_name eq $token->{tag_name}) {
## Step 1
## generate implied end tags
while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
## NOTE: ||.
## ISSUE: will also take this code path,
## which seems wrong.
pop @{$self->{open_elements}};
$node_i++;
}
## Step 2
my $current_tag_name
= $self->{open_elements}->[-1]->[0]->manakai_local_name;
$current_tag_name =~ tr/A-Z/a-z/;
if ($current_tag_name ne $token->{tag_name}) {
## NOTE:
$self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed',
text => $self->{open_elements}->[-1]->[0]
->manakai_local_name,
token => $token);
} else {
}
## Step 3
splice @{$self->{open_elements}}, $node_i if $node_i < 0;
$token = $self->_get_next_token;
last S2;
} else {
## Step 3
if (not ($node->[1] & FORMATTING_EL) and
#not $phrasing_category->{$node->[1]} and
($node->[1] & SPECIAL_EL or
$node->[1] & SCOPING_EL)) {
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched end tag',
text => $token->{tag_name}, token => $token);
## Ignore the token
$token = $self->_get_next_token;
last S2;
## NOTE: |a|: In Safari 3.1.2 and Opera
## 9.27, "a" is a child of (conforming). In
## Firefox 3.0.2, "a" is a child of . In WinIE 7,
## "a" is a child of both and .
}
}
## Step 4
$node_i--;
$node = $self->{open_elements}->[$node_i];
## Step 5;
redo S2;
} # S2
next B;
}
}
next B;
} continue { # B
if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
## NOTE: The code below is executed in cases where it does not have
## to be, but it it is harmless even in those cases.
## has an element in scope
INSCOPE: {
for (reverse 0..$#{$self->{open_elements}}) {
my $node = $self->{open_elements}->[$_];
if ($node->[1] & FOREIGN_EL) {
last INSCOPE;
} elsif ($node->[1] & SCOPING_EL) {
last;
}
}
## NOTE: No foreign element in scope.
$self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
} # INSCOPE
}
} # B
## Stop parsing # MUST
## TODO: script stuffs
} # _tree_construct_main
## XXX: How this method is organized is somewhat out of date, although
## it still does what the current spec documents.
sub set_inner_html ($$$$;$) {
my $class = shift;
my $node = shift; # /context/
#my $s = \$_[0];
my $onerror = $_[1];
my $get_wrapper = $_[2] || sub ($) { return $_[0] };
## ISSUE: Should {confident} be true?
my $nt = $node->node_type;
if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
# MUST
## Step 1 # MUST
## TODO: If the document has an active parser, ...
## ISSUE: There is an issue in the spec.
## Step 2 # MUST
my @cn = @{$node->child_nodes};
for (@cn) {
$node->remove_child ($_);
}
## Step 3, 4, 5 # MUST
$class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
} elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
## TODO: If non-html element
## NOTE: Most of this code is copied from |parse_string|
## TODO: Support for $get_wrapper
## F1. Create an HTML document.
my $this_doc = $node->owner_document;
my $doc = $this_doc->implementation->create_document;
$doc->manakai_is_html (1);
## F2. Propagate quirkness flag
my $node_doc = $node->owner_document;
$doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
## F3. Create an HTML parser
my $p = $class->new;
$p->{document} = $doc;
## Step 8 # MUST
my $i = 0;
$p->{line_prev} = $p->{line} = 1;
$p->{column_prev} = $p->{column} = 0;
require Whatpm::Charset::DecodeHandle;
my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
$input = $get_wrapper->($input);
$p->{set_nc} = sub {
my $self = shift;
my $char = '';
if (defined $self->{next_nc}) {
$char = $self->{next_nc};
delete $self->{next_nc};
$self->{nc} = ord $char;
} else {
$self->{char_buffer} = '';
$self->{char_buffer_pos} = 0;
my $count = $input->manakai_read_until
($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
$self->{char_buffer_pos});
if ($count) {
$self->{line_prev} = $self->{line};
$self->{column_prev} = $self->{column};
$self->{column}++;
$self->{nc}
= ord substr ($self->{char_buffer},
$self->{char_buffer_pos}++, 1);
return;
}
if ($input->read ($char, 1)) {
$self->{nc} = ord $char;
} else {
$self->{nc} = -1;
return;
}
}
($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
$p->{column}++;
if ($self->{nc} == 0x000A) { # LF
$p->{line}++;
$p->{column} = 0;
} elsif ($self->{nc} == 0x000D) { # CR
## TODO: support for abort/streaming
my $next = '';
if ($input->read ($next, 1) and $next ne "\x0A") {
$self->{next_nc} = $next;
}
$self->{nc} = 0x000A; # LF # MUST
$p->{line}++;
$p->{column} = 0;
} elsif ($self->{nc} == 0x0000) { # NULL
$self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL');
$self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
}
};
$p->{read_until} = sub {
#my ($scalar, $specials_range, $offset) = @_;
return 0 if defined $p->{next_nc};
my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
my $offset = $_[2] || 0;
if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
pos ($p->{char_buffer}) = $p->{char_buffer_pos};
if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
substr ($_[0], $offset)
= substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
my $count = $+[0] - $-[0];
if ($count) {
$p->{column} += $count;
$p->{char_buffer_pos} += $count;
$p->{line_prev} = $p->{line};
$p->{column_prev} = $p->{column} - 1;
$p->{nc} = -1;
}
return $count;
} else {
return 0;
}
} else {
my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
if ($count) {
$p->{column} += $count;
$p->{column_prev} += $count;
$p->{nc} = -1;
}
return $count;
}
}; # $p->{read_until}
my $ponerror = $onerror || sub {
my (%opt) = @_;
my $line = $opt{line};
my $column = $opt{column};
if (defined $opt{token} and defined $opt{token}->{line}) {
$line = $opt{token}->{line};
$column = $opt{token}->{column};
}
warn "Parse error ($opt{type}) at line $line column $column\n";
};
$p->{parse_error} = sub {
$ponerror->(line => $p->{line}, column => $p->{column}, @_);
};
my $char_onerror = sub {
my (undef, $type, %opt) = @_;
$ponerror->(layer => 'encode',
line => $p->{line}, column => $p->{column} + 1,
%opt, type => $type);
}; # $char_onerror
$input->onerror ($char_onerror);
$p->_initialize_tokenizer;
$p->_initialize_tree_constructor;
## F4. If /context/ is not undef...
## F4.1. content model flag
my $node_ln = $node->manakai_local_name;
$p->{content_model} = {
title => RCDATA_CONTENT_MODEL,
textarea => RCDATA_CONTENT_MODEL,
style => CDATA_CONTENT_MODEL,
script => CDATA_CONTENT_MODEL,
xmp => CDATA_CONTENT_MODEL,
iframe => CDATA_CONTENT_MODEL,
noembed => CDATA_CONTENT_MODEL,
noframes => CDATA_CONTENT_MODEL,
noscript => CDATA_CONTENT_MODEL,
plaintext => PLAINTEXT_CONTENT_MODEL,
}->{$node_ln};
$p->{content_model} = PCDATA_CONTENT_MODEL
unless defined $p->{content_model};
$p->{inner_html_node} = [$node, $el_category->{$node_ln}];
## TODO: Foreign element OK?
## F4.2. Root |html| element
my $root = $doc->create_element_ns
('http://www.w3.org/1999/xhtml', [undef, 'html']);
## F4.3.
$doc->append_child ($root);
## F4.4.
push @{$p->{open_elements}}, [$root, $el_category->{html}];
undef $p->{head_element};
undef $p->{head_element_inserted};
## F4.5.
$p->_reset_insertion_mode;
## F4.6.
my $anode = $node;
AN: while (defined $anode) {
if ($anode->node_type == 1) {
my $nsuri = $anode->namespace_uri;
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
if ($anode->manakai_local_name eq 'form') {
$p->{form_element} = $anode;
last AN;
}
}
}
$anode = $anode->parent_node;
} # AN
## F.6. Start the parser.
{
my $self = $p;
$token = $self->_get_next_token;
}
$p->_tree_construction_main;
## F.7.
my @cn = @{$node->child_nodes};
for (@cn) {
$node->remove_child ($_);
}
## ISSUE: mutation events? read-only?
## Step 11 # MUST
@cn = @{$root->child_nodes};
for (@cn) {
$this_doc->adopt_node ($_);
$node->append_child ($_);
}
## ISSUE: mutation events?
$p->_terminate_tree_constructor;
delete $p->{parse_error}; # delete loop
} else {
die "$0: |set_inner_html| is not defined for node of type $nt";
}
} # set_inner_html
} # tree construction stage
package Whatpm::HTML::RestartParser;
push our @ISA, 'Error';
1;
# $Date: 2009/09/06 02:20:52 $