/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory | Revision Log | View Patch Patch

-revision 1.27 by wakaba,
Sun Jun 24 14:24:21 2007 UTC
+revision 1.41 by wakaba,
Sat Jul 21 06:04:07 2007 UTC
 Line 7 
 our $VERSION=do{my @r=(q$Revision$=~/\d+
  ## doc.write ('');
  ## alert (doc.compatMode);
+ ## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT
+ ## strip BOM and the HTML layer MUST ignore it.  Whether we can do it
+ ## is not yet clear.
+ ## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters?
+ ## "{U+FEFF}..." in GB18030?
  my $permitted_slash_tag_name = {
    base => 1,
    link => 1,
-Line 144 
 sub new ($) {
+Line 150 
 sub new ($) {
    return $self;
  } # new
+ sub CM_ENTITY () { 0b001 } # & markup in data
+ sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
+ sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
+ sub PLAINTEXT_CONTENT_MODEL () { 0 }
+ sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
+ sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
+ sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
  ## Implementations MUST act as if state machine in the spec
  sub _initialize_tokenizer ($) {
    my $self = shift;
    $self->{state} = 'data'; # MUST
-   $self->{content_model_flag} = 'PCDATA'; # be
+   $self->{content_model} = PCDATA_CONTENT_MODEL; # be
    undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
    undef $self->{current_attribute};
    undef $self->{last_emitted_start_tag_name};
-Line 188 
 sub _get_next_token ($) {
+Line 203 
 sub _get_next_token ($) {
    A: {
      if ($self->{state} eq 'data') {
        if ($self->{next_input_character} == 0x0026) { # &
-         if ($self->{content_model_flag} eq 'PCDATA' or
+         if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA
-             $self->{content_model_flag} eq 'RCDATA') {
            $self->{state} = 'entity data';
            !!!next-input-character;
            redo A;
-Line 197 
 sub _get_next_token ($) {
+Line 211 
 sub _get_next_token ($) {
            #
          }
        } elsif ($self->{next_input_character} == 0x002D) { # -
-         if ($self->{content_model_flag} eq 'RCDATA' or
+         if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
-             $self->{content_model_flag} eq 'CDATA') {
            unless ($self->{escape}) {
              if ($self->{prev_input_character}->[0] == 0x002D and # -
                  $self->{prev_input_character}->[1] == 0x0021 and # !
-Line 210 
 sub _get_next_token ($) {
+Line 223 
 sub _get_next_token ($) {
          #
        } elsif ($self->{next_input_character} == 0x003C) { # <
-         if ($self->{content_model_flag} eq 'PCDATA' or
+         if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
-             (($self->{content_model_flag} eq 'CDATA' or
+             (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
-               $self->{content_model_flag} eq 'RCDATA') and
               not $self->{escape})) {
            $self->{state} = 'tag open';
            !!!next-input-character;
-Line 222 
 sub _get_next_token ($) {
+Line 234 
 sub _get_next_token ($) {
          }
        } elsif ($self->{next_input_character} == 0x003E) { # >
          if ($self->{escape} and
-             ($self->{content_model_flag} eq 'RCDATA' or
+             ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
-              $self->{content_model_flag} eq 'CDATA')) {
            if ($self->{prev_input_character}->[0] == 0x002D and # -
                $self->{prev_input_character}->[1] == 0x002D) { # -
              delete $self->{escape};
-Line 260 
 sub _get_next_token ($) {
+Line 271 
 sub _get_next_token ($) {
        redo A;
      } elsif ($self->{state} eq 'tag open') {
-       if ($self->{content_model_flag} eq 'RCDATA' or
+       if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
-           $self->{content_model_flag} eq 'CDATA') {
          if ($self->{next_input_character} == 0x002F) { # /
            !!!next-input-character;
            $self->{state} = 'close tag open';
-Line 274 
 sub _get_next_token ($) {
+Line 284 
 sub _get_next_token ($) {
            redo A;
          }
-       } elsif ($self->{content_model_flag} eq 'PCDATA') {
+       } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
          if ($self->{next_input_character} == 0x0021) { # !
            $self->{state} = 'markup declaration open';
            !!!next-input-character;
-Line 321 
 sub _get_next_token ($) {
+Line 331 
 sub _get_next_token ($) {
            redo A;
          }
        } else {
-         die "$0: $self->{content_model_flag}: Unknown content model flag";
+         die "$0: $self->{content_model} in tag open";
        }
      } elsif ($self->{state} eq 'close tag open') {
-       if ($self->{content_model_flag} eq 'RCDATA' or
+       if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
-           $self->{content_model_flag} eq 'CDATA') {
          if (defined $self->{last_emitted_start_tag_name}) {
+           ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
            my @next_char;
            TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
              push @next_char, $self->{next_input_character};
-Line 418 
 sub _get_next_token ($) {
+Line 428 
 sub _get_next_token ($) {
          redo A;
        } elsif ($self->{next_input_character} == 0x003E) { # >
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 443 
 sub _get_next_token ($) {
+Line 455 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == -1) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 489 
 sub _get_next_token ($) {
+Line 503 
 sub _get_next_token ($) {
          redo A;
        } elsif ($self->{next_input_character} == 0x003E) { # >
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 527 
 sub _get_next_token ($) {
+Line 543 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == -1) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 553 
 sub _get_next_token ($) {
+Line 571 
 sub _get_next_token ($) {
        my $before_leave = sub {
          if (exists $self->{current_token}->{attributes} # start tag or end tag
              ->{$self->{current_attribute}->{name}}) { # MUST
-           !!!parse-error (type => 'dupulicate attribute');
+           !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name});
            ## Discard $self->{current_attribute} # MUST
          } else {
            $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
-Line 578 
 sub _get_next_token ($) {
+Line 596 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == 0x003E) { # >
          $before_leave->();
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 617 
 sub _get_next_token ($) {
+Line 637 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed tag');
          $before_leave->();
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 653 
 sub _get_next_token ($) {
+Line 675 
 sub _get_next_token ($) {
          redo A;
        } elsif ($self->{next_input_character} == 0x003E) { # >
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 684 
 sub _get_next_token ($) {
+Line 708 
 sub _get_next_token ($) {
            #
          } else {
            !!!parse-error (type => 'nestc');
+           ## TODO: Different error type for <aa / bb> than <aa/>
          }
          $self->{state} = 'before attribute name';
          # next-input-character is already done
-Line 691 
 sub _get_next_token ($) {
+Line 716 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == -1) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 736 
 sub _get_next_token ($) {
+Line 763 
 sub _get_next_token ($) {
          redo A;
        } elsif ($self->{next_input_character} == 0x003E) { # >
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 754 
 sub _get_next_token ($) {
+Line 783 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == -1) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 788 
 sub _get_next_token ($) {
+Line 819 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == -1) {
          !!!parse-error (type => 'unclosed attribute value');
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 822 
 sub _get_next_token ($) {
+Line 855 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == -1) {
          !!!parse-error (type => 'unclosed attribute value');
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 859 
 sub _get_next_token ($) {
+Line 894 
 sub _get_next_token ($) {
          redo A;
        } elsif ($self->{next_input_character} == 0x003E) { # >
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 877 
 sub _get_next_token ($) {
+Line 914 
 sub _get_next_token ($) {
        } elsif ($self->{next_input_character} == -1) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} eq 'start tag') {
+           $self->{current_token}->{first_start_tag}
+               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} eq 'end tag') {
-           $self->{content_model_flag} = 'PCDATA'; # MUST
+           $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
            if ($self->{current_token}->{attributes}) {
              !!!parse-error (type => 'end tag attribute');
            }
-Line 990 
 sub _get_next_token ($) {
+Line 1029 
 sub _get_next_token ($) {
          }
        }
-       !!!parse-error (type => 'bogus comment open');
+       !!!parse-error (type => 'bogus comment');
        $self->{next_input_character} = shift @next_char;
        !!!back-next-input-character (@next_char);
        $self->{state} = 'bogus comment';
-Line 1049 
 sub _get_next_token ($) {
+Line 1088 
 sub _get_next_token ($) {
          redo A;
        } else {
          $self->{current_token}->{data} # comment
-             .= chr ($self->{next_input_character});
+             .= '-' . chr ($self->{next_input_character});
          $self->{state} = 'comment';
          !!!next-input-character;
          redo A;
-Line 1459 
 sub _get_next_token ($) {
+Line 1498 
 sub _get_next_token ($) {
          redo A;
        } else {
-         !!!parse-error (type => 'string after PUBLIC literal');
+         !!!parse-error (type => 'string after SYSTEM');
          $self->{state} = 'bogus DOCTYPE';
          !!!next-input-character;
          redo A;
-Line 1608 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1647 
 sub _tokenize_attempt_to_consume_an_enti
            redo X;
          } elsif (not defined $code) { # no hexadecimal digit
            !!!parse-error (type => 'bare hcro');
+           !!!back-next-input-character ($x_char, $self->{next_input_character});
            $self->{next_input_character} = 0x0023; # #
-           !!!back-next-input-character ($x_char);
            return undef;
          } elsif ($self->{next_input_character} == 0x003B) { # ;
            !!!next-input-character;
-Line 1627 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1666 
 sub _tokenize_attempt_to_consume_an_enti
            !!!parse-error (type => 'CR character reference');
            $code = 0x000A;
          } elsif (0x80 <= $code and $code <= 0x9F) {
-           !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
+           !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
            $code = $c1_entity_char->{$code};
          }
-Line 1662 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1701 
 sub _tokenize_attempt_to_consume_an_enti
          !!!parse-error (type => 'CR character reference');
          $code = 0x000A;
        } elsif (0x80 <= $code and $code <= 0x9F) {
-         !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
+         !!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code);
          $code = $c1_entity_char->{$code};
        }
-Line 1681 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1720 
 sub _tokenize_attempt_to_consume_an_enti
      !!!next-input-character;
      my $value = $entity_name;
-     my $match;
+     my $match = 0;
      require Whatpm::_NamedEntityList;
      our $EntityChar;
-Line 1701 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1740 
 sub _tokenize_attempt_to_consume_an_enti
            $match = 1;
            !!!next-input-character;
            last;
-         } elsif (not $in_attr) {
+         } else {
            $value = $EntityChar->{$entity_name};
            $match = -1;
-         } else {
+           !!!next-input-character;
-           $value .= chr $self->{next_input_character};
          }
        } else {
          $value .= chr $self->{next_input_character};
+         $match *= 2;
+         !!!next-input-character;
        }
-       !!!next-input-character;
      }
      if ($match > 0) {
        return {type => 'character', data => $value};
      } elsif ($match < 0) {
-       !!!parse-error (type => 'refc');
+       !!!parse-error (type => 'no refc');
-       return {type => 'character', data => $value};
+       if ($in_attr and $match < -1) {
+         return {type => 'character', data => '&'.$entity_name};
+       } else {
+         return {type => 'character', data => $value};
+       }
      } else {
        !!!parse-error (type => 'bare ero');
        ## NOTE: No characters are consumed in the spec.
-Line 1982 
 sub _tree_construction_root_element ($)
+Line 2025 
 sub _tree_construction_root_element ($)
        my $root_element; !!!create-element ($root_element, 'html');
        $self->{document}->append_child ($root_element);
        push @{$self->{open_elements}}, [$root_element, 'html'];
-       #$phase = 'main';
        ## reprocess
        #redo B;
-       return;
+       return; ## Go to the main phase.
    } # B
  } # _tree_construction_root_element
-Line 2001 
 sub _reset_insertion_mode ($) {
+Line 2043 
 sub _reset_insertion_mode ($) {
      ## Step 3
      S3: {
-       $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
+       ## ISSUE: Oops! "If node is the first node in the stack of open
-       if (defined $self->{inner_html_node}) {
+       ## elements, then set last to true. If the context element of the
-         if ($self->{inner_html_node}->[1] eq 'td' or
+       ## HTML fragment parsing algorithm is neither a td element nor a
-             $self->{inner_html_node}->[1] eq 'th') {
+       ## th element, then set node to the context element. (fragment case)":
-           #
+       ## The second "if" is in the scope of the first "if"!?
-         } else {
+       if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
-           $node = $self->{inner_html_node};
+         $last = 1;
+         if (defined $self->{inner_html_node}) {
+           if ($self->{inner_html_node}->[1] eq 'td' or
+               $self->{inner_html_node}->[1] eq 'th') {
+             #
+           } else {
+             $node = $self->{inner_html_node};
+           }
          }
        }
-Line 2054 
 sub _reset_insertion_mode ($) {
+Line 2103 
 sub _reset_insertion_mode ($) {
  sub _tree_construction_main ($) {
    my $self = shift;
-   my $phase = 'main';
+   my $previous_insertion_mode;
    my $active_formatting_elements = [];
-Line 2150 
 sub _tree_construction_main ($) {
+Line 2199 
 sub _tree_construction_main ($) {
      $insert->($el); # /context node/->append_child ($el)
      ## Step 3
-     $self->{content_model_flag} = $content_model_flag; # CDATA or RCDATA
+     $self->{content_model} = $content_model_flag; # CDATA or RCDATA
      delete $self->{escape}; # MUST
      ## Step 4
-Line 2168 
 sub _tree_construction_main ($) {
+Line 2217 
 sub _tree_construction_main ($) {
      }
      ## Step 6
-     $self->{content_model_flag} = 'PCDATA';
+     $self->{content_model} = PCDATA_CONTENT_MODEL;
      ## Step 7
      if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) {
        ## Ignore the token
+     } elsif ($content_model_flag == CDATA_CONTENT_MODEL) {
+       !!!parse-error (type => 'in CDATA:#'.$token->{type});
+     } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
+       !!!parse-error (type => 'in RCDATA:#'.$token->{type});
      } else {
-       !!!parse-error (type => 'in '.$content_model_flag.':#'.$token->{type});
+       die "$0: $content_model_flag in parse_rcdata";
      }
      !!!next-token;
    }; # $parse_rcdata
-Line 2185 
 sub _tree_construction_main ($) {
+Line 2238 
 sub _tree_construction_main ($) {
      !!!create-element ($script_el, 'script', $token->{attributes});
      ## TODO: mark as "parser-inserted"
-     $self->{content_model_flag} = 'CDATA';
+     $self->{content_model} = CDATA_CONTENT_MODEL;
      delete $self->{escape}; # MUST
      my $text = '';
-Line 2198 
 sub _tree_construction_main ($) {
+Line 2251 
 sub _tree_construction_main ($) {
        $script_el->manakai_append_text ($text);
      }
-     $self->{content_model_flag} = 'PCDATA';
+     $self->{content_model} = PCDATA_CONTENT_MODEL;
      if ($token->{type} eq 'end tag' and
          $token->{tag_name} eq 'script') {
-Line 2452 
 sub _tree_construction_main ($) {
+Line 2505 
 sub _tree_construction_main ($) {
          return;
        } elsif ($token->{tag_name} eq 'style') {
          ## NOTE: This is an "as if in head" code clone
-         $parse_rcdata->('CDATA', $insert);
+         $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
          return;
        } elsif ({
-                 base => 1, link => 1, meta => 1,
+                 base => 1, link => 1,
                 }->{$token->{tag_name}}) {
          ## NOTE: This is an "as if in head" code clone, only "-t" differs
          !!!insert-element-t ($token->{tag_name}, $token->{attributes});
          pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
          !!!next-token;
-         ## TODO: Extracting |charset| from |meta|.
+         return;
+       } elsif ($token->{tag_name} eq 'meta') {
+         ## NOTE: This is an "as if in head" code clone, only "-t" differs
+         !!!insert-element-t ($token->{tag_name}, $token->{attributes});
+         pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
+         unless ($self->{confident}) {
+           my $charset;
+           if ($token->{attributes}->{charset}) { ## TODO: And if supported
+             $charset = $token->{attributes}->{charset}->{value};
+           }
+           if ($token->{attributes}->{'http-equiv'}) {
+             ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
+             if ($token->{attributes}->{'http-equiv'}->{value}
+                 =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
+                     [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
+                     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
+               $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
+             } ## TODO: And if supported
+           }
+           ## TODO: Change the encoding
+         }
+         !!!next-token;
          return;
        } elsif ($token->{tag_name} eq 'title') {
          !!!parse-error (type => 'in body:title');
          ## NOTE: This is an "as if in head" code clone
-         $parse_rcdata->('RCDATA', $insert);
+         $parse_rcdata->(RCDATA_CONTENT_MODEL, sub {
+           if (defined $self->{head_element}) {
+             $self->{head_element}->append_child ($_[0]);
+           } else {
+             $insert->($_[0]);
+           }
+         });
          return;
        } elsif ($token->{tag_name} eq 'body') {
          !!!parse-error (type => 'in body:body');
-Line 2569 
 sub _tree_construction_main ($) {
+Line 2651 
 sub _tree_construction_main ($) {
              if ($i != -1) {
                !!!parse-error (type => 'end tag missing:'.
                                $self->{open_elements}->[-1]->[1]);
-               ## TODO: test
              }
              splice @{$self->{open_elements}}, $i;
              last LI;
-Line 2617 
 sub _tree_construction_main ($) {
+Line 2698 
 sub _tree_construction_main ($) {
              if ($i != -1) {
                !!!parse-error (type => 'end tag missing:'.
                                $self->{open_elements}->[-1]->[1]);
-               ## TODO: test
              }
              splice @{$self->{open_elements}}, $i;
              last LI;
-Line 2658 
 sub _tree_construction_main ($) {
+Line 2738 
 sub _tree_construction_main ($) {
          !!!insert-element-t ($token->{tag_name}, $token->{attributes});
-         $self->{content_model_flag} = 'PLAINTEXT';
+         $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
          !!!next-token;
          return;
-Line 2761 
 sub _tree_construction_main ($) {
+Line 2841 
 sub _tree_construction_main ($) {
          INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
            my $node = $self->{open_elements}->[$_];
            if ($node->[1] eq 'nobr') {
+             !!!parse-error (type => 'not closed:nobr');
              !!!back-token;
              $token = {type => 'end tag', tag_name => 'nobr'};
              return;
-Line 2812 
 sub _tree_construction_main ($) {
+Line 2893 
 sub _tree_construction_main ($) {
          return;
        } elsif ($token->{tag_name} eq 'xmp') {
          $reconstruct_active_formatting_elements->($insert_to_current);
-         $parse_rcdata->('CDATA', $insert);
+         $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
          return;
        } elsif ($token->{tag_name} eq 'table') {
          ## has a p element in scope
-Line 2844 
 sub _tree_construction_main ($) {
+Line 2925 
 sub _tree_construction_main ($) {
            !!!parse-error (type => 'image');
            $token->{tag_name} = 'img';
          }
+         ## NOTE: There is an "as if <br>" code clone.
          $reconstruct_active_formatting_elements->($insert_to_current);
          !!!insert-element-t ($token->{tag_name}, $token->{attributes});
-Line 2927 
 sub _tree_construction_main ($) {
+Line 3009 
 sub _tree_construction_main ($) {
          !!!create-element ($el, $token->{tag_name}, $token->{attributes});
          ## TODO: $self->{form_element} if defined
-         $self->{content_model_flag} = 'RCDATA';
+         $self->{content_model} = RCDATA_CONTENT_MODEL;
          delete $self->{escape}; # MUST
          $insert->($el);
-Line 2948 
 sub _tree_construction_main ($) {
+Line 3030 
 sub _tree_construction_main ($) {
            $el->manakai_append_text ($text);
          }
-         $self->{content_model_flag} = 'PCDATA';
+         $self->{content_model} = PCDATA_CONTENT_MODEL;
          if ($token->{type} eq 'end tag' and
              $token->{tag_name} eq $tag_name) {
-Line 2964 
 sub _tree_construction_main ($) {
+Line 3046 
 sub _tree_construction_main ($) {
                  noframes => 1,
                  noscript => 0, ## TODO: 1 if scripting is enabled
                 }->{$token->{tag_name}}) {
-         $parse_rcdata->('CDATA', $insert);
+         $parse_rcdata->(CDATA_CONTENT_MODEL, $insert);
          return;
        } elsif ($token->{tag_name} eq 'select') {
          $reconstruct_active_formatting_elements->($insert_to_current);
-Line 3002 
 sub _tree_construction_main ($) {
+Line 3084 
 sub _tree_construction_main ($) {
              unless ({
                         dd => 1, dt => 1, li => 1, p => 1, td => 1,
                         th => 1, tr => 1, body => 1, html => 1,
+                      tbody => 1, tfoot => 1, thead => 1,
                      }->{$_->[1]}) {
                !!!parse-error (type => 'not closed:'.$_->[1]);
              }
-Line 3051 
 sub _tree_construction_main ($) {
+Line 3134 
 sub _tree_construction_main ($) {
                   li => ($token->{tag_name} ne 'li'),
                   p => ($token->{tag_name} ne 'p'),
                   td => 1, th => 1, tr => 1,
+                  tbody => 1, tfoot=> 1, thead => 1,
                  }->{$self->{open_elements}->[-1]->[1]}) {
                !!!back-token;
                $token = {type => 'end tag',
-Line 3068 
 sub _tree_construction_main ($) {
+Line 3152 
 sub _tree_construction_main ($) {
          } # INSCOPE
          if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
-           !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
+           if (defined $i) {
+             !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
+           } else {
+             !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
+           }
          }
-         splice @{$self->{open_elements}}, $i if defined $i;
+         if (defined $i) {
+           splice @{$self->{open_elements}}, $i;
+         } elsif ($token->{tag_name} eq 'p') {
+           ## As if <p>, then reprocess the current token
+           my $el;
+           !!!create-element ($el, 'p');
+           $insert->($el);
+         }
          $clear_up_to_marker->()
            if {
              button => 1, marquee => 1, object => 1,
-Line 3087 
 sub _tree_construction_main ($) {
+Line 3182 
 sub _tree_construction_main ($) {
              if ({
                   dd => 1, dt => 1, li => 1, p => 1,
                   td => 1, th => 1, tr => 1,
+                  tbody => 1, tfoot=> 1, thead => 1,
                  }->{$self->{open_elements}->[-1]->[1]}) {
                !!!back-token;
                $token = {type => 'end tag',
-Line 3125 
 sub _tree_construction_main ($) {
+Line 3221 
 sub _tree_construction_main ($) {
              if ({
                   dd => 1, dt => 1, li => 1, p => 1,
                   td => 1, th => 1, tr => 1,
+                  tbody => 1, tfoot=> 1, thead => 1,
                  }->{$self->{open_elements}->[-1]->[1]}) {
                !!!back-token;
                $token = {type => 'end tag',
-Line 3155 
 sub _tree_construction_main ($) {
+Line 3252 
 sub _tree_construction_main ($) {
                  strong => 1, tt => 1, u => 1,
                 }->{$token->{tag_name}}) {
          $formatting_end_tag->($token->{tag_name});
- ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
+         return;
+       } elsif ($token->{tag_name} eq 'br') {
+         !!!parse-error (type => 'unmatched end tag:br');
+         ## As if <br>
+         $reconstruct_active_formatting_elements->($insert_to_current);
+         my $el;
+         !!!create-element ($el, 'br');
+         $insert->($el);
+         ## Ignore the token.
+         !!!next-token;
          return;
        } elsif ({
                  caption => 1, col => 1, colgroup => 1, frame => 1,
                  frameset => 1, head => 1, option => 1, optgroup => 1,
                  tbody => 1, td => 1, tfoot => 1, th => 1,
                  thead => 1, tr => 1,
-                 area => 1, basefont => 1, bgsound => 1, br => 1,
+                 area => 1, basefont => 1, bgsound => 1,
                  embed => 1, hr => 1, iframe => 1, image => 1,
                  img => 1, input => 1, isindex => 1, noembed => 1,
                  noframes => 1, param => 1, select => 1, spacer => 1,
-Line 3189 
 sub _tree_construction_main ($) {
+Line 3298 
 sub _tree_construction_main ($) {
              if ({
                   dd => 1, dt => 1, li => 1, p => 1,
                   td => 1, th => 1, tr => 1,
+                  tbody => 1, tfoot=> 1, thead => 1,
                  }->{$self->{open_elements}->[-1]->[1]}) {
                !!!back-token;
                $token = {type => 'end tag',
-Line 3232 
 sub _tree_construction_main ($) {
+Line 3342 
 sub _tree_construction_main ($) {
    }; # $in_body
    B: {
-     if ($phase eq 'main') {
+     if ($token->{type} eq 'DOCTYPE') {
-       if ($token->{type} eq 'DOCTYPE') {
+       !!!parse-error (type => 'DOCTYPE in the middle');
-         !!!parse-error (type => 'in html:#DOCTYPE');
+       ## Ignore the token
-         ## Ignore the token
+       ## Stay in the phase
-         ## Stay in the phase
+       !!!next-token;
-         !!!next-token;
+       redo B;
-         redo B;
+     } elsif ($token->{type} eq 'end-of-file') {
-       } elsif ($token->{type} eq 'start tag' and
+       if ($token->{insertion_mode} ne 'trailing end') {
-                $token->{tag_name} eq 'html') {
-         ## TODO: unless it is the first start tag token, parse-error
-         my $top_el = $self->{open_elements}->[0]->[0];
-         for my $attr_name (keys %{$token->{attributes}}) {
-           unless ($top_el->has_attribute_ns (undef, $attr_name)) {
-             $top_el->set_attribute_ns
-               (undef, [undef, $attr_name],
-                $token->{attributes}->{$attr_name}->{value});
-           }
-         }
-         !!!next-token;
-         redo B;
-       } elsif ($token->{type} eq 'end-of-file') {
          ## Generate implied end tags
          if ({
               dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
+              tbody => 1, tfoot=> 1, thead => 1,
              }->{$self->{open_elements}->[-1]->[1]}) {
            !!!back-token;
            $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
-Line 3271 
 sub _tree_construction_main ($) {
+Line 3369 
 sub _tree_construction_main ($) {
            !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
          }
-         ## Stop parsing
-         last B;
          ## ISSUE: There is an issue in the spec.
+       }
+       ## Stop parsing
+       last B;
+     } elsif ($token->{type} eq 'start tag' and
+              $token->{tag_name} eq 'html') {
+       if ($self->{insertion_mode} eq 'trailing end') {
+         ## Turn into the main phase
+         !!!parse-error (type => 'after html:html');
+         $self->{insertion_mode} = $previous_insertion_mode;
+       }
+ ## ISSUE: "aa<html>" is not a parse error.
+ ## ISSUE: "<html>" in fragment is not a parse error.
+       unless ($token->{first_start_tag}) {
+         !!!parse-error (type => 'not first start tag');
+       }
+       my $top_el = $self->{open_elements}->[0]->[0];
+       for my $attr_name (keys %{$token->{attributes}}) {
+         unless ($top_el->has_attribute_ns (undef, $attr_name)) {
+           $top_el->set_attribute_ns
+             (undef, [undef, $attr_name],
+              $token->{attributes}->{$attr_name}->{value});
+         }
+       }
+       !!!next-token;
+       redo B;
+     } elsif ($token->{type} eq 'comment') {
+       my $comment = $self->{document}->create_comment ($token->{data});
+       if ($self->{insertion_mode} eq 'trailing end') {
+         $self->{document}->append_child ($comment);
+       } elsif ($self->{insertion_mode} eq 'after body') {
+         $self->{open_elements}->[0]->[0]->append_child ($comment);
        } else {
-         if ($self->{insertion_mode} eq 'before head') {
+         $self->{open_elements}->[-1]->[0]->append_child ($comment);
+       }
+       !!!next-token;
+       redo B;
+     } elsif ($self->{insertion_mode} eq 'before head') {
            if ($token->{type} eq 'character') {
              if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
                $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
-Line 3292 
 sub _tree_construction_main ($) {
+Line 3424 
 sub _tree_construction_main ($) {
              $self->{insertion_mode} = 'in head';
              ## reprocess
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
              !!!create-element ($self->{head_element}, 'head', $attr);
-Line 3315 
 sub _tree_construction_main ($) {
+Line 3442 
 sub _tree_construction_main ($) {
              }
              redo B;
            } elsif ($token->{type} eq 'end tag') {
-             if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
+             if ({
+                  head => 1, body => 1, html => 1,
+                  p => 1, br => 1,
+                 }->{$token->{tag_name}}) {
                ## As if <head>
                !!!create-element ($self->{head_element}, 'head');
                $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
-Line 3345 
 sub _tree_construction_main ($) {
+Line 3475 
 sub _tree_construction_main ($) {
              }
              #
-           } elsif ($token->{type} eq 'comment') {
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ({base => ($self->{insertion_mode} eq 'in head' or
                            $self->{insertion_mode} eq 'after head'),
-                  link => 1, meta => 1}->{$token->{tag_name}}) {
+                  link => 1}->{$token->{tag_name}}) {
                ## NOTE: There is a "as if in head" code clone.
                if ($self->{insertion_mode} eq 'after head') {
                  !!!parse-error (type => 'after head:'.$token->{tag_name});
-Line 3361 
 sub _tree_construction_main ($) {
+Line 3486 
 sub _tree_construction_main ($) {
                }
                !!!insert-element ($token->{tag_name}, $token->{attributes});
                pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
+               pop @{$self->{open_elements}}
+                   if $self->{insertion_mode} eq 'after head';
+               !!!next-token;
+               redo B;
+             } elsif ($token->{tag_name} eq 'meta') {
+               ## NOTE: There is a "as if in head" code clone.
+               if ($self->{insertion_mode} eq 'after head') {
+                 !!!parse-error (type => 'after head:'.$token->{tag_name});
+                 push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
+               }
+               !!!insert-element ($token->{tag_name}, $token->{attributes});
+               pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
+               unless ($self->{confident}) {
+                 my $charset;
+                 if ($token->{attributes}->{charset}) { ## TODO: And if supported
+                   $charset = $token->{attributes}->{charset}->{value};
+                 }
+                 if ($token->{attributes}->{'http-equiv'}) {
+                   ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
+                   if ($token->{attributes}->{'http-equiv'}->{value}
+                       =~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*=
+                           [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
+                           ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
+                     $charset = defined $1 ? $1 : defined $2 ? $2 : $3;
+                   } ## TODO: And if supported
+                 }
+                 ## TODO: Change the encoding
+               }
                ## TODO: Extracting |charset| from |meta|.
                pop @{$self->{open_elements}}
                    if $self->{insertion_mode} eq 'after head';
-Line 3373 
 sub _tree_construction_main ($) {
+Line 3528 
 sub _tree_construction_main ($) {
                  !!!parse-error (type => 'after head:'.$token->{tag_name});
                  push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
                }
-               $parse_rcdata->('RCDATA', $insert_to_current);
+               my $parent = defined $self->{head_element} ? $self->{head_element}
+                   : $self->{open_elements}->[-1]->[0];
+               $parse_rcdata->(RCDATA_CONTENT_MODEL,
+                               sub { $parent->append_child ($_[0]) });
                pop @{$self->{open_elements}}
                    if $self->{insertion_mode} eq 'after head';
                redo B;
-Line 3385 
 sub _tree_construction_main ($) {
+Line 3543 
 sub _tree_construction_main ($) {
                  !!!parse-error (type => 'after head:'.$token->{tag_name});
                  push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
                }
-               $parse_rcdata->('CDATA', $insert_to_current);
+               $parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current);
                pop @{$self->{open_elements}}
                    if $self->{insertion_mode} eq 'after head';
                redo B;
-Line 3397 
 sub _tree_construction_main ($) {
+Line 3555 
 sub _tree_construction_main ($) {
                  !!!next-token;
                  redo B;
                } elsif ($self->{insertion_mode} eq 'in head noscript') {
-                 !!!parse-error (type => 'noscript in noscript');
+                 !!!parse-error (type => 'in noscript:noscript');
                  ## Ignore the token
+                 !!!next-token;
                  redo B;
                } else {
                  #
-Line 3449 
 sub _tree_construction_main ($) {
+Line 3608 
 sub _tree_construction_main ($) {
                !!!next-token;
                redo B;
              } elsif ($self->{insertion_mode} eq 'in head' and
-                      ($token->{tag_name} eq 'body' or
+                      {
-                       $token->{tag_name} eq 'html')) {
+                       body => 1, html => 1,
+                       p => 1, br => 1,
+                      }->{$token->{tag_name}}) {
+               #
+             } elsif ($self->{insertion_mode} eq 'in head noscript' and
+                      {
+                       p => 1, br => 1,
+                      }->{$token->{tag_name}}) {
                #
              } elsif ($self->{insertion_mode} ne 'after head') {
                !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
-Line 3489 
 sub _tree_construction_main ($) {
+Line 3655 
 sub _tree_construction_main ($) {
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             ## NOTE: There is a code clone of "comment in body".
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } else {
              $in_body->($insert_to_current);
              redo B;
-Line 3558 
 sub _tree_construction_main ($) {
+Line 3718 
 sub _tree_construction_main ($) {
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ({
                   caption => 1,
-Line 3634 
 sub _tree_construction_main ($) {
+Line 3789 
 sub _tree_construction_main ($) {
                if ({
                     dd => 1, dt => 1, li => 1, p => 1,
                     td => 1, th => 1, tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token; # <table>
                  $token = {type => 'end tag', tag_name => 'table'};
-Line 3682 
 sub _tree_construction_main ($) {
+Line 3838 
 sub _tree_construction_main ($) {
                if ({
                     dd => 1, dt => 1, li => 1, p => 1,
                     td => 1, th => 1, tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token;
                  $token = {type => 'end tag',
-Line 3727 
 sub _tree_construction_main ($) {
+Line 3884 
 sub _tree_construction_main ($) {
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             ## NOTE: This is a code clone of "comment in body".
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ({
                   caption => 1, col => 1, colgroup => 1, tbody => 1,
-Line 3765 
 sub _tree_construction_main ($) {
+Line 3916 
 sub _tree_construction_main ($) {
                if ({
                     dd => 1, dt => 1, li => 1, p => 1,
                     td => 1, th => 1, tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token; # <?>
                  $token = {type => 'end tag', tag_name => 'caption'};
-Line 3815 
 sub _tree_construction_main ($) {
+Line 3967 
 sub _tree_construction_main ($) {
                if ({
                     dd => 1, dt => 1, li => 1, p => 1,
                     td => 1, th => 1, tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token;
                  $token = {type => 'end tag',
-Line 3862 
 sub _tree_construction_main ($) {
+Line 4015 
 sub _tree_construction_main ($) {
                if ({
                     dd => 1, dt => 1, li => 1, p => 1,
                     td => 1, th => 1, tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token; # </table>
                  $token = {type => 'end tag', tag_name => 'caption'};
-Line 3890 
 sub _tree_construction_main ($) {
+Line 4044 
 sub _tree_construction_main ($) {
                       }->{$token->{tag_name}}) {
                !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
                ## Ignore the token
+               !!!next-token;
                redo B;
              } else {
                #
-Line 3911 
 sub _tree_construction_main ($) {
+Line 4066 
 sub _tree_construction_main ($) {
              }
              #
-           } elsif ($token->{type} eq 'comment') {
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ($token->{tag_name} eq 'col') {
                !!!insert-element ($token->{tag_name}, $token->{attributes});
-Line 4021 
 sub _tree_construction_main ($) {
+Line 4171 
 sub _tree_construction_main ($) {
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             ## Copied from 'in table'
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ({
                   tr => 1,
-Line 4127 
 sub _tree_construction_main ($) {
+Line 4271 
 sub _tree_construction_main ($) {
                if ({
                     dd => 1, dt => 1, li => 1, p => 1,
                     td => 1, th => 1, tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token; # <table>
                  $token = {type => 'end tag', tag_name => 'table'};
-Line 4305 
 sub _tree_construction_main ($) {
+Line 4450 
 sub _tree_construction_main ($) {
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             ## Copied from 'in table'
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ($token->{tag_name} eq 'th' or
                  $token->{tag_name} eq 'td') {
-Line 4395 
 sub _tree_construction_main ($) {
+Line 4534 
 sub _tree_construction_main ($) {
                if ({
                     dd => 1, dt => 1, li => 1, p => 1,
                     td => 1, th => 1, tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token; # <table>
                  $token = {type => 'end tag', tag_name => 'table'};
-Line 4569 
 sub _tree_construction_main ($) {
+Line 4709 
 sub _tree_construction_main ($) {
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             ## NOTE: This is a code clone of "comment in body".
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ({
                   caption => 1, col => 1, colgroup => 1,
-Line 4636 
 sub _tree_construction_main ($) {
+Line 4770 
 sub _tree_construction_main ($) {
                     td => ($token->{tag_name} eq 'th'),
                     th => ($token->{tag_name} eq 'td'),
                     tr => 1,
+                    tbody => 1, tfoot=> 1, thead => 1,
                    }->{$self->{open_elements}->[-1]->[1]}) {
                  !!!back-token;
                  $token = {type => 'end tag',
-Line 4710 
 sub _tree_construction_main ($) {
+Line 4845 
 sub _tree_construction_main ($) {
              $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'comment') {
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              if ($token->{tag_name} eq 'option') {
                if ($self->{open_elements}->[-1]->[1] eq 'option') {
-Line 4887 
 sub _tree_construction_main ($) {
+Line 5017 
 sub _tree_construction_main ($) {
          } elsif ($self->{insertion_mode} eq 'after body') {
            if ($token->{type} eq 'character') {
              if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
+               my $data = $1;
                ## As if in body
                $reconstruct_active_formatting_elements->($insert_to_current);
-               $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
+               $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
                unless (length $token->{data}) {
                  !!!next-token;
-Line 4899 
 sub _tree_construction_main ($) {
+Line 5030 
 sub _tree_construction_main ($) {
              }
              #
-             !!!parse-error (type => 'after body:#'.$token->{type});
+             !!!parse-error (type => 'after body:#character');
-           } elsif ($token->{type} eq 'comment') {
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[0]->[0]->append_child ($comment);
-             !!!next-token;
-             redo B;
            } elsif ($token->{type} eq 'start tag') {
              !!!parse-error (type => 'after body:'.$token->{tag_name});
              #
-Line 4916 
 sub _tree_construction_main ($) {
+Line 5042 
 sub _tree_construction_main ($) {
                  !!!next-token;
                  redo B;
                } else {
-                 $phase = 'trailing end';
+                 $previous_insertion_mode = $self->{insertion_mode};
+                 $self->{insertion_mode} = 'trailing end';
                  !!!next-token;
                  redo B;
                }
-Line 4924 
 sub _tree_construction_main ($) {
+Line 5051 
 sub _tree_construction_main ($) {
                !!!parse-error (type => 'after body:/'.$token->{tag_name});
              }
            } else {
-             !!!parse-error (type => 'after body:#'.$token->{type});
+             die "$0: $token->{type}: Unknown token type";
            }
            $self->{insertion_mode} = 'in body';
            ## reprocess
            redo B;
-         } elsif ($self->{insertion_mode} eq 'in frameset') {
+     } elsif ($self->{insertion_mode} eq 'in frameset') {
-           if ($token->{type} eq 'character') {
+       if ($token->{type} eq 'character') {
-             if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
+         if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
-               $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
+           $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
-               unless (length $token->{data}) {
-                 !!!next-token;
-                 redo B;
-               }
-             }
-             #
+           unless (length $token->{data}) {
-           } elsif ($token->{type} eq 'comment') {
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
              !!!next-token;
              redo B;
-           } elsif ($token->{type} eq 'start tag') {
-             if ($token->{tag_name} eq 'frameset') {
-               !!!insert-element ($token->{tag_name}, $token->{attributes});
-               !!!next-token;
-               redo B;
-             } elsif ($token->{tag_name} eq 'frame') {
-               !!!insert-element ($token->{tag_name}, $token->{attributes});
-               pop @{$self->{open_elements}};
-               !!!next-token;
-               redo B;
-             } elsif ($token->{tag_name} eq 'noframes') {
-               $in_body->($insert_to_current);
-               redo B;
-             } else {
-               #
-             }
-           } elsif ($token->{type} eq 'end tag') {
-             if ($token->{tag_name} eq 'frameset') {
-               if ($self->{open_elements}->[-1]->[1] eq 'html' and
-                   @{$self->{open_elements}} == 1) {
-                 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
-                 ## Ignore the token
-                 !!!next-token;
-               } else {
-                 pop @{$self->{open_elements}};
-                 !!!next-token;
-               }
-               ## if not inner_html and
-               if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
-                 $self->{insertion_mode} = 'after frameset';
-               }
-               redo B;
-             } else {
-               #
-             }
-           } else {
-             #
            }
+         }
-           if (defined $token->{tag_name}) {
-             !!!parse-error (type => 'in frameset:'.$token->{tag_name});
+         !!!parse-error (type => 'in frameset:#character');
+         ## Ignore the token
+         !!!next-token;
+         redo B;
+       } elsif ($token->{type} eq 'start tag') {
+         if ($token->{tag_name} eq 'frameset') {
+           !!!insert-element ($token->{tag_name}, $token->{attributes});
+           !!!next-token;
+           redo B;
+         } elsif ($token->{tag_name} eq 'frame') {
+           !!!insert-element ($token->{tag_name}, $token->{attributes});
+           pop @{$self->{open_elements}};
+           !!!next-token;
+           redo B;
+         } elsif ($token->{tag_name} eq 'noframes') {
+           $in_body->($insert_to_current);
+           redo B;
+         } else {
+           !!!parse-error (type => 'in frameset:'.$token->{tag_name});
+           ## Ignore the token
+           !!!next-token;
+           redo B;
+         }
+       } elsif ($token->{type} eq 'end tag') {
+         if ($token->{tag_name} eq 'frameset') {
+           if ($self->{open_elements}->[-1]->[1] eq 'html' and
+               @{$self->{open_elements}} == 1) {
+             !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
+             ## Ignore the token
+             !!!next-token;
            } else {
-             !!!parse-error (type => 'in frameset:#'.$token->{type});
+             pop @{$self->{open_elements}};
+             !!!next-token;
+           }
+           if (not defined $self->{inner_html_node} and
+               $self->{open_elements}->[-1]->[1] ne 'frameset') {
+             $self->{insertion_mode} = 'after frameset';
            }
+           redo B;
+         } else {
+           !!!parse-error (type => 'in frameset:/'.$token->{tag_name});
            ## Ignore the token
            !!!next-token;
            redo B;
-         } elsif ($self->{insertion_mode} eq 'after frameset') {
+         }
-           if ($token->{type} eq 'character') {
+       } else {
+         die "$0: $token->{type}: Unknown token type";
+       }
+     } elsif ($self->{insertion_mode} eq 'after frameset') {
+       if ($token->{type} eq 'character') {
              if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
-               $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
+               $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
                unless (length $token->{data}) {
                  !!!next-token;
-Line 5006 
 sub _tree_construction_main ($) {
+Line 5128 
 sub _tree_construction_main ($) {
                }
              }
-             #
+             if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
-           } elsif ($token->{type} eq 'comment') {
+               !!!parse-error (type => 'after frameset:#character');
-             my $comment = $self->{document}->create_comment ($token->{data});
-             $self->{open_elements}->[-1]->[0]->append_child ($comment);
+               ## Ignore the token.
-             !!!next-token;
+               if (length $token->{data}) {
-             redo B;
+                 ## reprocess the rest of characters
-           } elsif ($token->{type} eq 'start tag') {
+               } else {
-             if ($token->{tag_name} eq 'noframes') {
+                 !!!next-token;
-               $in_body->($insert_to_current);
+               }
-               redo B;
-             } else {
-               #
-             }
-           } elsif ($token->{type} eq 'end tag') {
-             if ($token->{tag_name} eq 'html') {
-               $phase = 'trailing end';
-               !!!next-token;
                redo B;
-             } else {
-               #
              }
-           } else {
-             #
+         die qq[$0: Character "$token->{data}"];
-           }
+       } elsif ($token->{type} eq 'start tag') {
+         if ($token->{tag_name} eq 'noframes') {
-           if (defined $token->{tag_name}) {
+           $in_body->($insert_to_current);
-             !!!parse-error (type => 'after frameset:'.$token->{tag_name});
+           redo B;
-           } else {
+         } else {
-             !!!parse-error (type => 'after frameset:#'.$token->{type});
+           !!!parse-error (type => 'after frameset:'.$token->{tag_name});
-           }
            ## Ignore the token
            !!!next-token;
            redo B;
+         }
-           ## ISSUE: An issue in spec there
+       } elsif ($token->{type} eq 'end tag') {
+         if ($token->{tag_name} eq 'html') {
+           $previous_insertion_mode = $self->{insertion_mode};
+           $self->{insertion_mode} = 'trailing end';
+           !!!next-token;
+           redo B;
          } else {
-           die "$0: $self->{insertion_mode}: Unknown insertion mode";
+           !!!parse-error (type => 'after frameset:/'.$token->{tag_name});
+           ## Ignore the token
+           !!!next-token;
+           redo B;
          }
+       } else {
+         die "$0: $token->{type}: Unknown token type";
        }
-     } elsif ($phase eq 'trailing end') {
+       ## ISSUE: An issue in spec here
+     } elsif ($self->{insertion_mode} eq 'trailing end') {
        ## states in the main stage is preserved yet # MUST
-       if ($token->{type} eq 'DOCTYPE') {
+       if ($token->{type} eq 'character') {
-         !!!parse-error (type => 'after html:#DOCTYPE');
-         ## Ignore the token
-         !!!next-token;
-         redo B;
-       } elsif ($token->{type} eq 'comment') {
-         my $comment = $self->{document}->create_comment ($token->{data});
-         $self->{document}->append_child ($comment);
-         !!!next-token;
-         redo B;
-       } elsif ($token->{type} eq 'character') {
          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
            my $data = $1;
            ## As if in the main phase.
            ## NOTE: The insertion mode in the main phase
            ## just before the phase has been changed to the trailing
            ## end phase is either "after body" or "after frameset".
-           $reconstruct_active_formatting_elements->($insert_to_current)
+           $reconstruct_active_formatting_elements->($insert_to_current);
-             if $phase eq 'main';
            $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
-Line 5077 
 sub _tree_construction_main ($) {
+Line 5189 
 sub _tree_construction_main ($) {
          }
          !!!parse-error (type => 'after html:#character');
-         $phase = 'main';
+         $self->{insertion_mode} = $previous_insertion_mode;
          ## reprocess
          redo B;
-       } elsif ($token->{type} eq 'start tag' or
+       } elsif ($token->{type} eq 'start tag') {
-                $token->{type} eq 'end tag') {
          !!!parse-error (type => 'after html:'.$token->{tag_name});
-         $phase = 'main';
+         $self->{insertion_mode} = $previous_insertion_mode;
+         ## reprocess
+         redo B;
+       } elsif ($token->{type} eq 'end tag') {
+         !!!parse-error (type => 'after html:/'.$token->{tag_name});
+         $self->{insertion_mode} = $previous_insertion_mode;
          ## reprocess
          redo B;
-       } elsif ($token->{type} eq 'end-of-file') {
-         ## Stop parsing
-         last B;
        } else {
          die "$0: $token->{type}: Unknown token";
        }
+     } else {
+       die "$0: $self->{insertion_mode}: Unknown insertion mode";
      }
    } # B
-Line 5179 
 sub set_inner_html ($$$) {
+Line 5294 
 sub set_inner_html ($$$) {
      ## Step 2
      my $node_ln = $node->local_name;
-     $p->{content_model_flag} = {
+     $p->{content_model} = {
-       title => 'RCDATA',
+       title => RCDATA_CONTENT_MODEL,
-       textarea => 'RCDATA',
+       textarea => RCDATA_CONTENT_MODEL,
-       style => 'CDATA',
+       style => CDATA_CONTENT_MODEL,
-       script => 'CDATA',
+       script => CDATA_CONTENT_MODEL,
-       xmp => 'CDATA',
+       xmp => CDATA_CONTENT_MODEL,
-       iframe => 'CDATA',
+       iframe => CDATA_CONTENT_MODEL,
-       noembed => 'CDATA',
+       noembed => CDATA_CONTENT_MODEL,
-       noframes => 'CDATA',
+       noframes => CDATA_CONTENT_MODEL,
-       noscript => 'CDATA',
+       noscript => CDATA_CONTENT_MODEL,
-       plaintext => 'PLAINTEXT',
+       plaintext => PLAINTEXT_CONTENT_MODEL,
-     }->{$node_ln} || 'PCDATA';
+     }->{$node_ln};
-        ## ISSUE: What is "the name of the element"? local name?
+     $p->{content_model} = PCDATA_CONTENT_MODEL
+         unless defined $p->{content_model};
+         ## ISSUE: What is "the name of the element"? local name?
      $p->{inner_html_node} = [$node, $node_ln];

 Legend:



Removed from v.1.27
 


changed lines


 
Added in v.1.41
 Legend:



Removed from v.1.27
 


changed lines


 
Added in v.1.41
-Removed from v.1.27
+Added in v.1.41

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24