X redo S4; } } ## Step 7 $i++; $entry = $active_formatting_elements->[$i]; } # S4 S7: { ## Step 8 my $clone = [$entry->[0]->clone_node (0), $entry->[1]]; ## Step 9 $insert->($clone->[0]); push @{$self->{open_elements}}, $clone; ## Step 10 $active_formatting_elements->[$i] = $self->{open_elements}->[-1]; ## Step 11 unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) { ## Step 7' $i++; $entry = $active_formatting_elements->[$i]; redo S7; } } # S7 }; # $reconstruct_active_formatting_elements my $clear_up_to_marker = sub { for (reverse 0..$#$active_formatting_elements) { if ($active_formatting_elements->[$_]->[0] eq '#marker') { splice @$active_formatting_elements, $_; return; } } }; # $clear_up_to_marker my $insert; my $parse_rcdata = sub ($) { my ($content_model_flag) = @_; ## Step 1 my $start_tag_name = $token->{tag_name}; my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, $start_tag_name]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $el->set_attribute_node_ns ($attr); } $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; ## Step 2 $insert->($el); ## Step 3 $self->{content_model} = $content_model_flag; # CDATA or RCDATA delete $self->{escape}; # MUST ## Step 4 my $text = ''; $token = $self->_get_next_token; while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing $text .= $token->{data}; $token = $self->_get_next_token; } ## Step 5 if (length $text) { my $text = $self->{document}->create_text_node ($text); $el->append_child ($text); } ## Step 6 $self->{content_model} = PCDATA_CONTENT_MODEL; ## Step 7 if ($token->{type} == END_TAG_TOKEN and $token->{tag_name} eq $start_tag_name) { ## Ignore the token } else { ## NOTE: An end-of-file token. if ($content_model_flag == CDATA_CONTENT_MODEL) { $self->{parse_error}->(level => $self->{must_level}, type => 'in CDATA:#'.$token->{type}, token => $token); } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) { $self->{parse_error}->(level => $self->{must_level}, type => 'in RCDATA:#'.$token->{type}, token => $token); } else { die "$0: $content_model_flag in parse_rcdata"; } } $token = $self->_get_next_token; }; # $parse_rcdata my $script_start_tag = sub () { my $script_el; $script_el = $self->{document}->create_element_ns ($HTML_NS, [undef, 'script']); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $script_el->set_attribute_node_ns ($attr); } $script_el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $script_el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; ## TODO: mark as "parser-inserted" $self->{content_model} = CDATA_CONTENT_MODEL; delete $self->{escape}; # MUST my $text = ''; $token = $self->_get_next_token; while ($token->{type} == CHARACTER_TOKEN) { $text .= $token->{data}; $token = $self->_get_next_token; } # stop if non-character token or tokenizer stops tokenising if (length $text) { $script_el->manakai_append_text ($text); } $self->{content_model} = PCDATA_CONTENT_MODEL; if ($token->{type} == END_TAG_TOKEN and $token->{tag_name} eq 'script') { ## Ignore the token } else { $self->{parse_error}->(level => $self->{must_level}, type => 'in CDATA:#'.$token->{type}, token => $token); ## ISSUE: And ignore? ## TODO: mark as "already executed" } if (defined $self->{inner_html_node}) { ## TODO: mark as "already executed" } else { ## TODO: $old_insertion_point = current insertion point ## TODO: insertion point = just before the next input character $insert->($script_el); ## TODO: insertion point = $old_insertion_point (might be "undefined") ## TODO: if there is a script that will execute as soon as the parser resume, then... } $token = $self->_get_next_token; }; # $script_start_tag ## NOTE: $open_tables->[-1]->[0] is the "current table" element node. ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag. my $open_tables = [[$self->{open_elements}->[0]->[0]]]; my $formatting_end_tag = sub { my $end_tag_token = shift; my $tag_name = $end_tag_token->{tag_name}; ## NOTE: The adoption agency algorithm (AAA). FET: { ## Step 1 my $formatting_element; my $formatting_element_i_in_active; AFE: for (reverse 0..$#$active_formatting_elements) { if ($active_formatting_elements->[$_]->[0] eq '#marker') { last AFE; } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name eq $tag_name) { $formatting_element = $active_formatting_elements->[$_]; $formatting_element_i_in_active = $_; last AFE; } } # AFE unless (defined $formatting_element) { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:'.$tag_name, token => $end_tag_token); ## Ignore the token $token = $self->_get_next_token; return; } ## has an element in scope my $in_scope = 1; my $formatting_element_i_in_open; INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { my $node = $self->{open_elements}->[$_]; if ($node->[0] eq $formatting_element->[0]) { if ($in_scope) { $formatting_element_i_in_open = $_; last INSCOPE; } else { # in open elements but not in scope $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:'.$token->{tag_name}, token => $end_tag_token); ## Ignore the token $token = $self->_get_next_token; return; } } elsif ($node->[1] & SCOPING_EL) { $in_scope = 0; } } # INSCOPE unless (defined $formatting_element_i_in_open) { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:'.$token->{tag_name}, token => $end_tag_token); pop @$active_formatting_elements; # $formatting_element $token = $self->_get_next_token; ## TODO: ok? return; } if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) { $self->{parse_error}->(level => $self->{must_level}, type => 'not closed', value => $self->{open_elements}->[-1]->[0] ->manakai_local_name, token => $end_tag_token); } ## Step 2 my $furthest_block; my $furthest_block_i_in_open; OE: for (reverse 0..$#{$self->{open_elements}}) { my $node = $self->{open_elements}->[$_]; if (not ($node->[1] & FORMATTING_EL) and #not $phrasing_category->{$node->[1]} and ($node->[1] & SPECIAL_EL or $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe $furthest_block = $node; $furthest_block_i_in_open = $_; } elsif ($node->[0] eq $formatting_element->[0]) { last OE; } } # OE ## Step 3 unless (defined $furthest_block) { # MUST splice @{$self->{open_elements}}, $formatting_element_i_in_open; splice @$active_formatting_elements, $formatting_element_i_in_active, 1; $token = $self->_get_next_token; return; } ## Step 4 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1]; ## Step 5 my $furthest_block_parent = $furthest_block->[0]->parent_node; if (defined $furthest_block_parent) { $furthest_block_parent->remove_child ($furthest_block->[0]); } ## Step 6 my $bookmark_prev_el = $active_formatting_elements->[$formatting_element_i_in_active - 1] ->[0]; ## Step 7 my $node = $furthest_block; my $node_i_in_open = $furthest_block_i_in_open; my $last_node = $furthest_block; S7: { ## Step 1 $node_i_in_open--; $node = $self->{open_elements}->[$node_i_in_open]; ## Step 2 my $node_i_in_active; S7S2: { for (reverse 0..$#$active_formatting_elements) { if ($active_formatting_elements->[$_]->[0] eq $node->[0]) { $node_i_in_active = $_; last S7S2; } } splice @{$self->{open_elements}}, $node_i_in_open, 1; redo S7; } # S7S2 ## Step 3 last S7 if $node->[0] eq $formatting_element->[0]; ## Step 4 if ($last_node->[0] eq $furthest_block->[0]) { $bookmark_prev_el = $node->[0]; } ## Step 5 if ($node->[0]->has_child_nodes ()) { my $clone = [$node->[0]->clone_node (0), $node->[1]]; $active_formatting_elements->[$node_i_in_active] = $clone; $self->{open_elements}->[$node_i_in_open] = $clone; $node = $clone; } ## Step 6 $node->[0]->append_child ($last_node->[0]); ## Step 7 $last_node = $node; ## Step 8 redo S7; } # S7 ## Step 8 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) { my $foster_parent_element; my $next_sibling; OE: for (reverse 0..$#{$self->{open_elements}}) { if ($self->{open_elements}->[$_]->[1] & TABLE_EL) { my $parent = $self->{open_elements}->[$_]->[0]->parent_node; if (defined $parent and $parent->node_type == 1) { $foster_parent_element = $parent; $next_sibling = $self->{open_elements}->[$_]->[0]; } else { $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; } last OE; } } # OE $foster_parent_element = $self->{open_elements}->[0]->[0] unless defined $foster_parent_element; $foster_parent_element->insert_before ($last_node->[0], $next_sibling); $open_tables->[-1]->[1] = 1; # tainted } else { $common_ancestor_node->[0]->append_child ($last_node->[0]); } ## Step 9 my $clone = [$formatting_element->[0]->clone_node (0), $formatting_element->[1]]; ## Step 10 my @cn = @{$furthest_block->[0]->child_nodes}; $clone->[0]->append_child ($_) for @cn; ## Step 11 $furthest_block->[0]->append_child ($clone->[0]); ## Step 12 my $i; AFE: for (reverse 0..$#$active_formatting_elements) { if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) { splice @$active_formatting_elements, $_, 1; $i-- and last AFE if defined $i; } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) { $i = $_; } } # AFE splice @$active_formatting_elements, $i + 1, 0, $clone; ## Step 13 undef $i; OE: for (reverse 0..$#{$self->{open_elements}}) { if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) { splice @{$self->{open_elements}}, $_, 1; $i-- and last OE if defined $i; } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) { $i = $_; } } # OE splice @{$self->{open_elements}}, $i + 1, 1, $clone; ## Step 14 redo FET; } # FET }; # $formatting_end_tag $insert = my $insert_to_current = sub { $self->{open_elements}->[-1]->[0]->append_child ($_[0]); }; # $insert_to_current my $insert_to_foster = sub { my $child = shift; if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) { # MUST my $foster_parent_element; my $next_sibling; OE: for (reverse 0..$#{$self->{open_elements}}) { if ($self->{open_elements}->[$_]->[1] & TABLE_EL) { my $parent = $self->{open_elements}->[$_]->[0]->parent_node; if (defined $parent and $parent->node_type == 1) { $foster_parent_element = $parent; $next_sibling = $self->{open_elements}->[$_]->[0]; } else { $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; } last OE; } } # OE $foster_parent_element = $self->{open_elements}->[0]->[0] unless defined $foster_parent_element; $foster_parent_element->insert_before ($child, $next_sibling); $open_tables->[-1]->[1] = 1; # tainted } else { $self->{open_elements}->[-1]->[0]->append_child ($child); } }; # $insert_to_foster B: while (1) { if ($token->{type} == DOCTYPE_TOKEN) { $self->{parse_error}->(level => $self->{must_level}, type => 'DOCTYPE in the middle', token => $token); ## Ignore the token ## Stay in the phase $token = $self->_get_next_token; next B; } elsif ($token->{type} == START_TAG_TOKEN and $token->{tag_name} eq 'html') { if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after html:html', token => $token); $self->{insertion_mode} = AFTER_BODY_IM; } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after html:html', token => $token); $self->{insertion_mode} = AFTER_FRAMESET_IM; } else { } $self->{parse_error}->(level => $self->{must_level}, type => 'not first start tag', token => $token); my $top_el = $self->{open_elements}->[0]->[0]; for my $attr_name (keys %{$token->{attributes}}) { unless ($top_el->has_attribute_ns (undef, $attr_name)) { $top_el->set_attribute_ns (undef, [undef, $attr_name], $token->{attributes}->{$attr_name}->{value}); } } $token = $self->_get_next_token; next B; } elsif ($token->{type} == COMMENT_TOKEN) { my $comment = $self->{document}->create_comment ($token->{data}); if ($self->{insertion_mode} & AFTER_HTML_IMS) { $self->{document}->append_child ($comment); } elsif ($self->{insertion_mode} == AFTER_BODY_IM) { $self->{open_elements}->[0]->[0]->append_child ($comment); } else { $self->{open_elements}->[-1]->[0]->append_child ($comment); } $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { if ($token->{type} == CHARACTER_TOKEN) { $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); $token = $self->_get_next_token; next B; } elsif ($token->{type} == START_TAG_TOKEN) { if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or ($token->{tag_name} eq 'svg' and $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) { ## NOTE: "using the rules for secondary insertion mode"then"continue" # } elsif ({ b => 1, big => 1, blockquote => 1, body => 1, br => 1, center => 1, code => 1, dd => 1, div => 1, dl => 1, em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1, ## No h4! h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, img => 1, li => 1, menu => 1, meta => 1, nobr => 1, p => 1, pre => 1, ruby => 1, s => 1, small => 1, span => 1, strong => 1, sub => 1, sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1, }->{$token->{tag_name}}) { $self->{parse_error}->(level => $self->{must_level}, type => 'not closed', value => $self->{open_elements}->[-1]->[0] ->manakai_local_name, token => $token); pop @{$self->{open_elements}} while $self->{open_elements}->[-1]->[1] & FOREIGN_EL; $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM; ## Reprocess. next B; } else { my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri; my $tag_name = $token->{tag_name}; if ($nsuri eq $SVG_NS) { $tag_name = { altglyph => 'altGlyph', altglyphdef => 'altGlyphDef', altglyphitem => 'altGlyphItem', animatecolor => 'animateColor', animatemotion => 'animateMotion', animatetransform => 'animateTransform', clippath => 'clipPath', feblend => 'feBlend', fecolormatrix => 'feColorMatrix', fecomponenttransfer => 'feComponentTransfer', fecomposite => 'feComposite', feconvolvematrix => 'feConvolveMatrix', fediffuselighting => 'feDiffuseLighting', fedisplacementmap => 'feDisplacementMap', fedistantlight => 'feDistantLight', feflood => 'feFlood', fefunca => 'feFuncA', fefuncb => 'feFuncB', fefuncg => 'feFuncG', fefuncr => 'feFuncR', fegaussianblur => 'feGaussianBlur', feimage => 'feImage', femerge => 'feMerge', femergenode => 'feMergeNode', femorphology => 'feMorphology', feoffset => 'feOffset', fepointlight => 'fePointLight', fespecularlighting => 'feSpecularLighting', fespotlight => 'feSpotLight', fetile => 'feTile', feturbulence => 'feTurbulence', foreignobject => 'foreignObject', glyphref => 'glyphRef', lineargradient => 'linearGradient', radialgradient => 'radialGradient', #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2) textpath => 'textPath', }->{$tag_name} || $tag_name; } ## "adjust SVG attributes" (SVG only) - done in insert-element-f ## "adjust foreign attributes" - done in insert-element-f { my $el; $el = $self->{document}->create_element_ns ($nsuri, [undef, $tag_name]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns ( @{ $foreign_attr_xname->{$attr_name} || [undef, [undef, $nsuri eq $SVG_NS ? ($svg_attr_name->{$attr_name} || $attr_name) : $attr_name]] } ); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $el->set_attribute_node_ns ($attr); } $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $insert->($el); push @{$self->{open_elements}}, [$el, ($el_category_f->{$nsuri}->{ $tag_name} || 0) | FOREIGN_EL]; if ( $token->{attributes}->{xmlns} and $token->{attributes}->{xmlns}->{value} ne ($nsuri)) { $self->{parse_error}->(level => $self->{must_level}, type => 'bad namespace', token => $token); ## TODO: Error type documentation } } if ($self->{self_closing}) { pop @{$self->{open_elements}}; delete $self->{self_closing}; } else { } $token = $self->_get_next_token; next B; } } elsif ($token->{type} == END_TAG_TOKEN) { ## NOTE: "using the rules for secondary insertion mode" then "continue" # } elsif ($token->{type} == END_OF_FILE_TOKEN) { ## NOTE: "using the rules for secondary insertion mode" then "continue" # ## TODO: ... } else { die "$0: $token->{type}: Unknown token type"; } } if ($self->{insertion_mode} & HEAD_IMS) { if ($token->{type} == CHARACTER_TOKEN) { if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { unless ($self->{insertion_mode} == BEFORE_HEAD_IM) { $self->{open_elements}->[-1]->[0]->manakai_append_text ($1); } else { ## Ignore the token. $token = $self->_get_next_token; next B; } unless (length $token->{data}) { $token = $self->_get_next_token; next B; } } if ($self->{insertion_mode} == BEFORE_HEAD_IM) { ## As if
$self->{head_element} = $self->{document}->create_element_ns ($HTML_NS, [undef, 'head']); $self->{head_element}->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $self->{head_element}->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; ## Reprocess in the "in head" insertion mode... pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:#character', token => $token); ## Reprocess in the "in head" insertion mode... ## As if pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == IN_HEAD_IM) { pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } else { } ## "after head" insertion mode ## As if { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, 'body']); $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{'body'} || 0]; } $self->{insertion_mode} = IN_BODY_IM; ## reprocess next B; } elsif ($token->{type} == START_TAG_TOKEN) { if ($token->{tag_name} eq 'head') { if ($self->{insertion_mode} == BEFORE_HEAD_IM) { $self->{head_element} = $self->{document}->create_element_ns ($HTML_NS, [undef, $token->{tag_name}]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $self->{head_element}->set_attribute_node_ns ($attr); } $self->{head_element}->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $self->{head_element}->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; $self->{insertion_mode} = IN_HEAD_IM; $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after head:head', token => $token); ## TODO: error type ## Ignore the token $token = $self->_get_next_token; next B; } else { $self->{parse_error}->(level => $self->{must_level}, type => 'in head:head', token => $token); # or in head noscript ## Ignore the token $token = $self->_get_next_token; next B; } } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { ## As if $self->{head_element} = $self->{document}->create_element_ns ($HTML_NS, [undef, 'head']); $self->{head_element}->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $self->{head_element}->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; $self->{insertion_mode} = IN_HEAD_IM; ## Reprocess in the "in head" insertion mode... } else { } if ($token->{tag_name} eq 'base') { if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:base', token => $token); $self->{insertion_mode} = IN_HEAD_IM; ## Reprocess in the "in head" insertion mode... } else { } ## NOTE: There is a "as if in head" code clone. if ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after head:'.$token->{tag_name}, token => $token); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; } else { } { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, $token->{tag_name}]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $el->set_attribute_node_ns ($attr); } $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{$token->{tag_name}} || 0]; } pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. pop @{$self->{open_elements}} # if $self->{insertion_mode} == AFTER_HEAD_IM; $token = $self->_get_next_token; next B; } elsif ($token->{tag_name} eq 'link') { ## NOTE: There is a "as if in head" code clone. if ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after head:'.$token->{tag_name}, token => $token); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; } else { } { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, $token->{tag_name}]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $el->set_attribute_node_ns ($attr); } $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{$token->{tag_name}} || 0]; } pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. pop @{$self->{open_elements}} # if $self->{insertion_mode} == AFTER_HEAD_IM; delete $self->{self_closing}; $token = $self->_get_next_token; next B; } elsif ($token->{tag_name} eq 'meta') { ## NOTE: There is a "as if in head" code clone. if ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after head:'.$token->{tag_name}, token => $token); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; } else { } { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, $token->{tag_name}]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $el->set_attribute_node_ns ($attr); } $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{$token->{tag_name}} || 0]; } my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. unless ($self->{confident}) { if ($token->{attributes}->{charset}) { ## NOTE: Whether the encoding is supported or not is handled ## in the {change_encoding} callback. $self->{change_encoding} ->($self, $token->{attributes}->{charset}->{value}, $token); $meta_el->[0]->get_attribute_node_ns (undef, 'charset') ->set_user_data (manakai_has_reference => $token->{attributes}->{charset} ->{has_reference}); } elsif ($token->{attributes}->{content}) { if ($token->{attributes}->{content}->{value} =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt] [\x09-\x0D\x20]*= [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { ## NOTE: Whether the encoding is supported or not is handled ## in the {change_encoding} callback. $self->{change_encoding} ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token); $meta_el->[0]->get_attribute_node_ns (undef, 'content') ->set_user_data (manakai_has_reference => $token->{attributes}->{content} ->{has_reference}); } else { } } } else { if ($token->{attributes}->{charset}) { $meta_el->[0]->get_attribute_node_ns (undef, 'charset') ->set_user_data (manakai_has_reference => $token->{attributes}->{charset} ->{has_reference}); } if ($token->{attributes}->{content}) { $meta_el->[0]->get_attribute_node_ns (undef, 'content') ->set_user_data (manakai_has_reference => $token->{attributes}->{content} ->{has_reference}); } } pop @{$self->{open_elements}} # if $self->{insertion_mode} == AFTER_HEAD_IM; delete $self->{self_closing}; $token = $self->_get_next_token; next B; } elsif ($token->{tag_name} eq 'title') { if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:title', token => $token); $self->{insertion_mode} = IN_HEAD_IM; ## Reprocess in the "in head" insertion mode... } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after head:'.$token->{tag_name}, token => $token); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; } else { } ## NOTE: There is a "as if in head" code clone. my $parent = defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0]; $parse_rcdata->(RCDATA_CONTENT_MODEL); pop @{$self->{open_elements}} # if $self->{insertion_mode} == AFTER_HEAD_IM; next B; } elsif ($token->{tag_name} eq 'style') { ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and ## insertion mode IN_HEAD_IM) ## NOTE: There is a "as if in head" code clone. if ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after head:'.$token->{tag_name}, token => $token); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; } else { } $parse_rcdata->(CDATA_CONTENT_MODEL); pop @{$self->{open_elements}} # if $self->{insertion_mode} == AFTER_HEAD_IM; next B; } elsif ($token->{tag_name} eq 'noscript') { if ($self->{insertion_mode} == IN_HEAD_IM) { ## NOTE: and scripting is disalbed { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, $token->{tag_name}]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $el->set_attribute_node_ns ($attr); } $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{$token->{tag_name}} || 0]; } $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM; $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:noscript', token => $token); ## Ignore the token $token = $self->_get_next_token; next B; } else { # } } elsif ($token->{tag_name} eq 'script') { if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:script', token => $token); $self->{insertion_mode} = IN_HEAD_IM; ## Reprocess in the "in head" insertion mode... } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'after head:'.$token->{tag_name}, token => $token); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; } else { } ## NOTE: There is a "as if in head" code clone. $script_start_tag->(); pop @{$self->{open_elements}} # if $self->{insertion_mode} == AFTER_HEAD_IM; next B; } elsif ($token->{tag_name} eq 'body' or $token->{tag_name} eq 'frameset') { if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:'.$token->{tag_name}, token => $token); ## Reprocess in the "in head" insertion mode... ## As if pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == IN_HEAD_IM) { pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } else { } ## "after head" insertion mode { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, $token->{tag_name}]); for my $attr_name (keys %{ $token->{attributes}}) { my $attr_t = $token->{attributes}->{$attr_name}; my $attr = $self->{document}->create_attribute_ns (undef, [undef, $attr_name]); $attr->value ($attr_t->{value}); $attr->set_user_data (manakai_source_line => $attr_t->{line}); $attr->set_user_data (manakai_source_column => $attr_t->{column}); $el->set_attribute_node_ns ($attr); } $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{$token->{tag_name}} || 0]; } if ($token->{tag_name} eq 'body') { $self->{insertion_mode} = IN_BODY_IM; } elsif ($token->{tag_name} eq 'frameset') { $self->{insertion_mode} = IN_FRAMESET_IM; } else { die "$0: tag name: $self->{tag_name}"; } $token = $self->_get_next_token; next B; } else { # } if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:/'.$token->{tag_name}, token => $token); ## Reprocess in the "in head" insertion mode... ## As if pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == IN_HEAD_IM) { ## As if pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } else { } ## "after head" insertion mode ## As if { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, 'body']); $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{'body'} || 0]; } $self->{insertion_mode} = IN_BODY_IM; ## reprocess next B; } elsif ($token->{type} == END_TAG_TOKEN) { if ($token->{tag_name} eq 'head') { if ($self->{insertion_mode} == BEFORE_HEAD_IM) { ## As if $self->{head_element} = $self->{document}->create_element_ns ($HTML_NS, [undef, 'head']); $self->{head_element}->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $self->{head_element}->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); push @{$self->{open_elements}}, [$self->{head_element}, $el_category->{head}]; ## Reprocess in the "in head" insertion mode... pop @{$self->{open_elements}}; $self->{insertion_mode} = AFTER_HEAD_IM; $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:/head', token => $token); ## Reprocess in the "in head" insertion mode... pop @{$self->{open_elements}}; $self->{insertion_mode} = AFTER_HEAD_IM; $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} == IN_HEAD_IM) { pop @{$self->{open_elements}}; $self->{insertion_mode} = AFTER_HEAD_IM; $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:head', token => $token); ## Ignore the token $token = $self->_get_next_token; next B; } else { die "$0: $self->{insertion_mode}: Unknown insertion mode"; } } elsif ($token->{tag_name} eq 'noscript') { if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { pop @{$self->{open_elements}}; $self->{insertion_mode} = IN_HEAD_IM; $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or $self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:noscript', token => $token); ## Ignore the token ## ISSUE: An issue in the spec. $token = $self->_get_next_token; next B; } else { # } } elsif ({ body => 1, html => 1, }->{$token->{tag_name}}) { if ($self->{insertion_mode} == BEFORE_HEAD_IM or $self->{insertion_mode} == IN_HEAD_IM or $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:'.$token->{tag_name}, token => $token); ## Ignore the token $token = $self->_get_next_token; next B; } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:' . $token->{tag_name}, token => $token); ## Ignore the token $token = $self->_get_next_token; next B; } else { die "$0: $self->{insertion_mode}: Unknown insertion mode"; } } elsif ($token->{tag_name} eq 'p') { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:p', token => $token); ## Ignore the token $token = $self->_get_next_token; next B; } elsif ($token->{tag_name} eq 'br') { if ($self->{insertion_mode} == BEFORE_HEAD_IM) { ## (before head) as if , (in head) as if $self->{head_element} = $self->{document}->create_element_ns ($HTML_NS, [undef, 'head']); $self->{head_element}->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $self->{head_element}->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); $self->{insertion_mode} = AFTER_HEAD_IM; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == IN_HEAD_IM) { ## As if pop @{$self->{open_elements}}; $self->{insertion_mode} = AFTER_HEAD_IM; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## ISSUE: Two parse errors for pop @{$self->{open_elements}}; $self->{insertion_mode} = IN_HEAD_IM; ## Reprocess in the "in head" insertion mode... ## As if pop @{$self->{open_elements}}; $self->{insertion_mode} = AFTER_HEAD_IM; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { # } else { die "$0: $self->{insertion_mode}: Unknown insertion mode"; } ## ISSUE: does not agree with IE7 - it doesn't ignore . $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:br', token => $token); ## Ignore the token $token = $self->_get_next_token; next B; } else { $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:'.$token->{tag_name}, token => $token); ## Ignore the token $token = $self->_get_next_token; next B; } if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { ## As if pop @{$self->{open_elements}}; $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:/'.$token->{tag_name}, token => $token); ## Reprocess in the "in head" insertion mode... ## As if pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == IN_HEAD_IM) { ## As if pop @{$self->{open_elements}}; ## Reprocess in the "after head" insertion mode... } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { ## ISSUE: This case cannot be reached? $self->{parse_error}->(level => $self->{must_level}, type => 'unmatched end tag:'.$token->{tag_name}, token => $token); ## Ignore the token ## ISSUE: An issue in the spec. $token = $self->_get_next_token; next B; } else { } ## "after head" insertion mode ## As if { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, 'body']); $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{'body'} || 0]; } $self->{insertion_mode} = IN_BODY_IM; ## reprocess next B; } elsif ($token->{type} == END_OF_FILE_TOKEN) { if ($self->{insertion_mode} == BEFORE_HEAD_IM) { ## NOTE: As if $self->{head_element} = $self->{document}->create_element_ns ($HTML_NS, [undef, 'head']); $self->{head_element}->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $self->{head_element}->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); #push @{$self->{open_elements}}, # [$self->{head_element}, $el_category->{head}]; #$self->{insertion_mode} = IN_HEAD_IM; ## NOTE: Reprocess. ## NOTE: As if #pop @{$self->{open_elements}}; #$self->{insertion_mode} = IN_AFTER_HEAD_IM; ## NOTE: Reprocess. # } elsif ($self->{insertion_mode} == IN_HEAD_IM) { ## NOTE: As if pop @{$self->{open_elements}}; #$self->{insertion_mode} = IN_AFTER_HEAD_IM; ## NOTE: Reprocess. # } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { $self->{parse_error}->(level => $self->{must_level}, type => 'in noscript:#eof', token => $token); ## As if pop @{$self->{open_elements}}; #$self->{insertion_mode} = IN_HEAD_IM; ## NOTE: Reprocess. ## NOTE: As if pop @{$self->{open_elements}}; #$self->{insertion_mode} = IN_AFTER_HEAD_IM; ## NOTE: Reprocess. # } else { # } ## NOTE: As if { my $el; $el = $self->{document}->create_element_ns ($HTML_NS, [undef, 'body']); $el->set_user_data (manakai_source_line => $token->{line}) if defined $token->{line}; $el->set_user_data (manakai_source_column => $token->{column}) if defined $token->{column}; $self->{open_elements}->[-1]->[0]->append_child ($el); push @{$self->{open_elements}}, [$el, $el_category->{'body'} || 0]; } $self->{insertion_mode} = IN_BODY_IM; ## NOTE: Reprocess. next B; } else { die "$0: $token->{type}: Unknown token type"; } ## ISSUE: An issue in the spec. } elsif ($self->{insertion_mode} & BODY_IMS) { if ($token->{type} == CHARACTER_TOKEN) { ## NOTE: There is a code clone of "character in body". $reconstruct_active_formatting_elements->($insert_to_current); $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); $token = $self->_get_next_token; next B; } elsif ($token->{type} == START_TAG_TOKEN) { if ({ caption => 1, col => 1, colgroup => 1, tbody => 1, td => 1, tfoot => 1, th => 1, thead => 1, tr => 1, }->{$token->{tag_name}}) { if ($self->{insertion_mode} == IN_CELL_IM) { ## have an element in table scope for (reverse 0..$#{$self->{open_elements}}) { my $node = $self->{open_elements}->[$_]; if ($node->[1] & TABLE_CELL_EL) { ## Close the cell $token->{self_closing} = $self->{self_closing}; unshift @{$self->{token}}, $token; delete $self->{self_closing}; #