/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory | Revision Log | View Patch Patch

-revision 1.204 by wakaba,
Sun Oct  5 05:59:35 2008 UTC
+revision 1.205 by wakaba,
Mon Oct 13 06:18:31 2008 UTC
 Line 918 
 sub IN_FOREIGN_CONTENT_IM () { 0b1000000
      ## NOTE: "in foreign content" insertion mode is special; it is combined
      ## with the secondary insertion mode.  In this parser, they are stored
      ## together in the bit-or'ed form.
+ sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
+     ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
+     ## combined with the original insertion mode.  In thie parser,
+     ## they are stored together in the bit-or'ed form.
  ## NOTE: "initial" and "before html" insertion modes have no constants.
-Line 3975 
 sub _tree_construction_main ($) {
+Line 3979 
 sub _tree_construction_main ($) {
      ## Step 1
      my $start_tag_name = $token->{tag_name};
-     my $el;
+     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
-     !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
      ## Step 2
-     $insert->($el);
-     ## Step 3
      $self->{content_model} = $content_model_flag; # CDATA or RCDATA
      delete $self->{escape}; # MUST
-     ## Step 4
+     ## Step 3, 4
-     my $text = '';
+     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
-     !!!nack ('t40.1');
-     !!!next-token;
-     while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
-       !!!cp ('t40');
-       $text .= $token->{data};
-       !!!next-token;
-     }
-     ## Step 5
+     !!!nack ('t40.1');
-     if (length $text) {
-       !!!cp ('t41');
-       my $text = $self->{document}->create_text_node ($text);
-       $el->append_child ($text);
-     }
-     ## Step 6
-     $self->{content_model} = PCDATA_CONTENT_MODEL;
-     ## Step 7
-     if ($token->{type} == END_TAG_TOKEN and
-         $token->{tag_name} eq $start_tag_name) {
-       !!!cp ('t42');
-       ## Ignore the token
-     } else {
-       ## NOTE: An end-of-file token.
-       if ($content_model_flag == CDATA_CONTENT_MODEL) {
-         !!!cp ('t43');
-         !!!parse-error (type => 'in CDATA:#eof', token => $token);
-       } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
-         !!!cp ('t44');
-         !!!parse-error (type => 'in RCDATA:#eof', token => $token);
-       } else {
-         die "$0: $content_model_flag in parse_rcdata";
-       }
-     }
      !!!next-token;
    }; # $parse_rcdata
    my $script_start_tag = sub () {
+     ## Step 1
      my $script_el;
      !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
+     ## Step 2
      ## TODO: mark as "parser-inserted"
+     ## Step 3
+     ## TODO: Mark as "already executed", if ...
+     ## Step 4
+     $insert->($script_el);
+     ## ISSUE: $script_el is not put into the stack
+     push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
+     ## Step 5
      $self->{content_model} = CDATA_CONTENT_MODEL;
      delete $self->{escape}; # MUST
-     my $text = '';
-     !!!nack ('t45.1');
-     !!!next-token;
-     while ($token->{type} == CHARACTER_TOKEN) {
-       !!!cp ('t45');
-       $text .= $token->{data};
-       !!!next-token;
-     } # stop if non-character token or tokenizer stops tokenising
-     if (length $text) {
-       !!!cp ('t46');
-       $script_el->manakai_append_text ($text);
-     }
-     $self->{content_model} = PCDATA_CONTENT_MODEL;
-     if ($token->{type} == END_TAG_TOKEN and
+     ## Step 6-7
-         $token->{tag_name} eq 'script') {
+     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
-       !!!cp ('t47');
-       ## Ignore the token
-     } else {
-       !!!cp ('t48');
-       !!!parse-error (type => 'in CDATA:#eof', token => $token);
-       ## ISSUE: And ignore?
-       ## TODO: mark as "already executed"
-     }
-     if (defined $self->{inner_html_node}) {
-       !!!cp ('t49');
-       ## TODO: mark as "already executed"
-     } else {
-       !!!cp ('t50');
-       ## TODO: $old_insertion_point = current insertion point
-       ## TODO: insertion point = just before the next input character
-       $insert->($script_el);
+     !!!nack ('t40.2');
-       ## TODO: insertion point = $old_insertion_point (might be "undefined")
-       ## TODO: if there is a script that will execute as soon as the parser resume, then...
-     }
      !!!next-token;
    }; # $script_start_tag
-Line 4424 
 sub _tree_construction_main ($) {
+Line 4367 
 sub _tree_construction_main ($) {
        }
        !!!next-token;
        next B;
+     } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
+       if ($token->{type} == CHARACTER_TOKEN) {
+         $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
+         delete $self->{ignore_newline};
+         if (length $token->{data}) {
+           !!!cp ('t43');
+           $self->{open_elements}->[-1]->[0]->manakai_append_text
+               ($token->{data});
+         } else {
+           !!!cp ('t43.1');
+         }
+         !!!next-token;
+         next B;
+       } elsif ($token->{type} == END_TAG_TOKEN) {
+         delete $self->{ignore_newline};
+         if ($token->{tag_name} eq 'script') {
+           !!!cp ('t50');
+           ## Para 1-2
+           my $script = pop @{$self->{open_elements}};
+           ## Para 3
+           $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
+           ## Para 4
+           ## TODO: $old_insertion_point = $current_insertion_point;
+           ## TODO: $current_insertion_point = just before $self->{nc};
+           ## Para 5
+           ## TODO: Run the $script->[0].
+           ## Para 6
+           ## TODO: $current_insertion_point = $old_insertion_point;
+           ## Para 7
+           ## TODO: if ($pending_external_script) {
+             ## TODO: ...
+           ## TODO: }
+           !!!next-token;
+           next B;
+         } else {
+           !!!cp ('t42');
+           pop @{$self->{open_elements}};
+           $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
+           !!!next-token;
+           next B;
+         }
+       } elsif ($token->{type} == END_OF_FILE_TOKEN) {
+         delete $self->{ignore_newline};
+         !!!cp ('t44');
+         !!!parse-error (type => 'not closed',
+                         text => $self->{open_elements}->[-1]->[0]
+                             ->manakai_local_name,
+                         token => $token);
+         #if ($self->{open_elements}->[-1]->[1] & SCRIPT_EL) {
+         #  ## TODO: Mark as "already executed"
+         #}
+         pop @{$self->{open_elements}};
+         $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
+         ## Reprocess.
+         next B;
+       } else {
+         die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
+       }
      } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
        if ($token->{type} == CHARACTER_TOKEN) {
          !!!cp ('t87.1');
-Line 4825 
 sub _tree_construction_main ($) {
+Line 4841 
 sub _tree_construction_main ($) {
            ## NOTE: There is a "as if in head" code clone.
            $parse_rcdata->(RCDATA_CONTENT_MODEL);
-           pop @{$self->{open_elements}} # <head>
+           ## ISSUE: A spec bug [Bug 6038]
-               if $self->{insertion_mode} == AFTER_HEAD_IM;
+           splice @{$self->{open_elements}}, -2, 1, () # <head>
+               if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM;
            next B;
          } elsif ($token->{tag_name} eq 'style' or
                   $token->{tag_name} eq 'noframes') {
-Line 4844 
 sub _tree_construction_main ($) {
+Line 4861 
 sub _tree_construction_main ($) {
              !!!cp ('t115');
            }
            $parse_rcdata->(CDATA_CONTENT_MODEL);
-           pop @{$self->{open_elements}} # <head>
+           ## ISSUE: A spec bug [Bug 6038]
-               if $self->{insertion_mode} == AFTER_HEAD_IM;
+           splice @{$self->{open_elements}}, -2, 1, () # <head>
+               if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM;
            next B;
-             } elsif ($token->{tag_name} eq 'noscript') {
+         } elsif ($token->{tag_name} eq 'noscript') {
                if ($self->{insertion_mode} == IN_HEAD_IM) {
                  !!!cp ('t116');
                  ## NOTE: and scripting is disalbed
-Line 4891 
 sub _tree_construction_main ($) {
+Line 4909 
 sub _tree_construction_main ($) {
            ## NOTE: There is a "as if in head" code clone.
            $script_start_tag->();
-           pop @{$self->{open_elements}} # <head>
+           ## ISSUE: A spec bug  [Bug 6038]
-               if $self->{insertion_mode} == AFTER_HEAD_IM;
+           splice @{$self->{open_elements}}, -2, 1 # <head>
+               if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM;
            next B;
          } elsif ($token->{tag_name} eq 'body' or
                   $token->{tag_name} eq 'frameset') {
-Line 7400 
 sub _tree_construction_main ($) {
+Line 7419 
 sub _tree_construction_main ($) {
            next B;
          }
        } elsif ($token->{tag_name} eq 'textarea') {
-         my $tag_name = $token->{tag_name};
+         ## Step 1
-         my $el;
+         !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
-         !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
+         ## Step 2
          ## TODO: $self->{form_element} if defined
+         ## Step 3
+         $self->{ignore_newline} = 1;
+         ## Step 4
+         ## ISSUE: This step is wrong. (r2302 enbugged)
+         ## Step 5
          $self->{content_model} = RCDATA_CONTENT_MODEL;
          delete $self->{escape}; # MUST
-         $insert->($el);
+         ## Step 6-7
+         $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
-         my $text = '';
          !!!nack ('t392.1');
          !!!next-token;
-         if ($token->{type} == CHARACTER_TOKEN) {
-           $token->{data} =~ s/^\x0A//;
-           unless (length $token->{data}) {
-             !!!cp ('t392');
-             !!!next-token;
-           } else {
-             !!!cp ('t393');
-           }
-         } else {
-           !!!cp ('t394');
-         }
-         while ($token->{type} == CHARACTER_TOKEN) {
-           !!!cp ('t395');
-           $text .= $token->{data};
-           !!!next-token;
-         }
-         if (length $text) {
-           !!!cp ('t396');
-           $el->manakai_append_text ($text);
-         }
-         $self->{content_model} = PCDATA_CONTENT_MODEL;
-         if ($token->{type} == END_TAG_TOKEN and
-             $token->{tag_name} eq $tag_name) {
-           !!!cp ('t397');
-           ## Ignore the token
-         } else {
-           !!!cp ('t398');
-           !!!parse-error (type => 'in RCDATA:#eof', token => $token);
-         }
-         !!!next-token;
          next B;
        } elsif ($token->{tag_name} eq 'optgroup' or
                 $token->{tag_name} eq 'option') {

 Legend:



Removed from v.1.204
 


changed lines


 
Added in v.1.205
 Legend:



Removed from v.1.204
 


changed lines


 
Added in v.1.205
-Removed from v.1.204
+Added in v.1.205

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24