whatpm/Whatpm/HTML.pm.src

package Whatpm::HTML;
use strict;
our $VERSION=do{my @r=(q$Revision: 1.196 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
use Error qw(:try);

## NOTE: This module don't check all HTML5 parse errors; character
## encoding related parse errors are expected to be handled by relevant
## modules.
## Parse errors for control characters that are not allowed in HTML5
## documents, for surrogate code points, and for noncharacter code
## points, as well as U+FFFD substitions for characters whose code points
## is higher than U+10FFFF may be detected by combining the parser with
## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
## usage example, see |t/HTML-tree.t| in the Whatpm package or the 
## WebHACC::Language::HTML module in the WebHACC package).

## ISSUE:
## var doc = implementation.createDocument (null, null, null);
## doc.write ('');
## alert (doc.compatMode);

require IO::Handle;

my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
my $SVG_NS = q<http://www.w3.org/2000/svg>;
my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;

sub A_EL () { 0b1 }
sub ADDRESS_EL () { 0b10 }
sub BODY_EL () { 0b100 }
sub BUTTON_EL () { 0b1000 }
sub CAPTION_EL () { 0b10000 }
sub DD_EL () { 0b100000 }
sub DIV_EL () { 0b1000000 }
sub DT_EL () { 0b10000000 }
sub FORM_EL () { 0b100000000 }
sub FORMATTING_EL () { 0b1000000000 }
sub FRAMESET_EL () { 0b10000000000 }
sub HEADING_EL () { 0b100000000000 }
sub HTML_EL () { 0b1000000000000 }
sub LI_EL () { 0b10000000000000 }
sub NOBR_EL () { 0b100000000000000 }
sub OPTION_EL () { 0b1000000000000000 }
sub OPTGROUP_EL () { 0b10000000000000000 }
sub P_EL () { 0b100000000000000000 }
sub SELECT_EL () { 0b1000000000000000000 }
sub TABLE_EL () { 0b10000000000000000000 }
sub TABLE_CELL_EL () { 0b100000000000000000000 }
sub TABLE_ROW_EL () { 0b1000000000000000000000 }
sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
sub FOREIGN_EL () { 0b10000000000000000000000000 }
sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
sub MML_AXML_EL () { 0b1000000000000000000000000000 }
sub RUBY_EL () { 0b10000000000000000000000000000 }
sub RUBY_COMPONENT_EL () { 0b100000000000000000000000000000 }

sub TABLE_ROWS_EL () {
  TABLE_EL |
  TABLE_ROW_EL |
  TABLE_ROW_GROUP_EL
}

## NOTE: Used in "generate implied end tags" algorithm.
## NOTE: There is a code where a modified version of
## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
## implementation (search for the algorithm name).
sub END_TAG_OPTIONAL_EL () {
  DD_EL |
  DT_EL |
  LI_EL |
  OPTION_EL |
  OPTGROUP_EL |
  P_EL |
  RUBY_COMPONENT_EL
}

## NOTE: Used in </body> and EOF algorithms.
sub ALL_END_TAG_OPTIONAL_EL () {
  DD_EL |
  DT_EL |
  LI_EL |
  P_EL |

  BODY_EL |
  HTML_EL |
  TABLE_CELL_EL |
  TABLE_ROW_EL |
  TABLE_ROW_GROUP_EL
}

sub SCOPING_EL () {
  BUTTON_EL |
  CAPTION_EL |
  HTML_EL |
  TABLE_EL |
  TABLE_CELL_EL |
  MISC_SCOPING_EL
}

sub TABLE_SCOPING_EL () {
  HTML_EL |
  TABLE_EL
}

sub TABLE_ROWS_SCOPING_EL () {
  HTML_EL |
  TABLE_ROW_GROUP_EL
}

sub TABLE_ROW_SCOPING_EL () {
  HTML_EL |
  TABLE_ROW_EL
}

sub SPECIAL_EL () {
  ADDRESS_EL |
  BODY_EL |
  DIV_EL |

  DD_EL |
  DT_EL |
  LI_EL |
  P_EL |

  FORM_EL |
  FRAMESET_EL |
  HEADING_EL |
  OPTION_EL |
  OPTGROUP_EL |
  SELECT_EL |
  TABLE_ROW_EL |
  TABLE_ROW_GROUP_EL |
  MISC_SPECIAL_EL
}

my $el_category = {
  a => A_EL | FORMATTING_EL,
  address => ADDRESS_EL,
  applet => MISC_SCOPING_EL,
  area => MISC_SPECIAL_EL,
  article => MISC_SPECIAL_EL,
  aside => MISC_SPECIAL_EL,
  b => FORMATTING_EL,
  base => MISC_SPECIAL_EL,
  basefont => MISC_SPECIAL_EL,
  bgsound => MISC_SPECIAL_EL,
  big => FORMATTING_EL,
  blockquote => MISC_SPECIAL_EL,
  body => BODY_EL,
  br => MISC_SPECIAL_EL,
  button => BUTTON_EL,
  caption => CAPTION_EL,
  center => MISC_SPECIAL_EL,
  col => MISC_SPECIAL_EL,
  colgroup => MISC_SPECIAL_EL,
  command => MISC_SPECIAL_EL,
  datagrid => MISC_SPECIAL_EL,
  dd => DD_EL,
  details => MISC_SPECIAL_EL,
  dialog => MISC_SPECIAL_EL,
  dir => MISC_SPECIAL_EL,
  div => DIV_EL,
  dl => MISC_SPECIAL_EL,
  dt => DT_EL,
  em => FORMATTING_EL,
  embed => MISC_SPECIAL_EL,
  eventsource => MISC_SPECIAL_EL,
  fieldset => MISC_SPECIAL_EL,
  figure => MISC_SPECIAL_EL,
  font => FORMATTING_EL,
  footer => MISC_SPECIAL_EL,
  form => FORM_EL,
  frame => MISC_SPECIAL_EL,
  frameset => FRAMESET_EL,
  h1 => HEADING_EL,
  h2 => HEADING_EL,
  h3 => HEADING_EL,
  h4 => HEADING_EL,
  h5 => HEADING_EL,
  h6 => HEADING_EL,
  head => MISC_SPECIAL_EL,
  header => MISC_SPECIAL_EL,
  hr => MISC_SPECIAL_EL,
  html => HTML_EL,
  i => FORMATTING_EL,
  iframe => MISC_SPECIAL_EL,
  img => MISC_SPECIAL_EL,
  #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
  input => MISC_SPECIAL_EL,
  isindex => MISC_SPECIAL_EL,
  li => LI_EL,
  link => MISC_SPECIAL_EL,
  listing => MISC_SPECIAL_EL,
  marquee => MISC_SCOPING_EL,
  menu => MISC_SPECIAL_EL,
  meta => MISC_SPECIAL_EL,
  nav => MISC_SPECIAL_EL,
  nobr => NOBR_EL | FORMATTING_EL,
  noembed => MISC_SPECIAL_EL,
  noframes => MISC_SPECIAL_EL,
  noscript => MISC_SPECIAL_EL,
  object => MISC_SCOPING_EL,
  ol => MISC_SPECIAL_EL,
  optgroup => OPTGROUP_EL,
  option => OPTION_EL,
  p => P_EL,
  param => MISC_SPECIAL_EL,
  plaintext => MISC_SPECIAL_EL,
  pre => MISC_SPECIAL_EL,
  rp => RUBY_COMPONENT_EL,
  rt => RUBY_COMPONENT_EL,
  ruby => RUBY_EL,
  s => FORMATTING_EL,
  script => MISC_SPECIAL_EL,
  select => SELECT_EL,
  section => MISC_SPECIAL_EL,
  small => FORMATTING_EL,
  spacer => MISC_SPECIAL_EL,
  strike => FORMATTING_EL,
  strong => FORMATTING_EL,
  style => MISC_SPECIAL_EL,
  table => TABLE_EL,
  tbody => TABLE_ROW_GROUP_EL,
  td => TABLE_CELL_EL,
  textarea => MISC_SPECIAL_EL,
  tfoot => TABLE_ROW_GROUP_EL,
  th => TABLE_CELL_EL,
  thead => TABLE_ROW_GROUP_EL,
  title => MISC_SPECIAL_EL,
  tr => TABLE_ROW_EL,
  tt => FORMATTING_EL,
  u => FORMATTING_EL,
  ul => MISC_SPECIAL_EL,
  wbr => MISC_SPECIAL_EL,
};

my $el_category_f = {
  $MML_NS => {
    'annotation-xml' => MML_AXML_EL,
    mi => FOREIGN_FLOW_CONTENT_EL,
    mo => FOREIGN_FLOW_CONTENT_EL,
    mn => FOREIGN_FLOW_CONTENT_EL,
    ms => FOREIGN_FLOW_CONTENT_EL,
    mtext => FOREIGN_FLOW_CONTENT_EL,
  },
  $SVG_NS => {
    foreignObject => FOREIGN_FLOW_CONTENT_EL,
    desc => FOREIGN_FLOW_CONTENT_EL,
    title => FOREIGN_FLOW_CONTENT_EL,
  },
  ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
};

my $svg_attr_name = {
  attributename => 'attributeName',
  attributetype => 'attributeType',
  basefrequency => 'baseFrequency',
  baseprofile => 'baseProfile',
  calcmode => 'calcMode',
  clippathunits => 'clipPathUnits',
  contentscripttype => 'contentScriptType',
  contentstyletype => 'contentStyleType',
  diffuseconstant => 'diffuseConstant',
  edgemode => 'edgeMode',
  externalresourcesrequired => 'externalResourcesRequired',
  filterres => 'filterRes',
  filterunits => 'filterUnits',
  glyphref => 'glyphRef',
  gradienttransform => 'gradientTransform',
  gradientunits => 'gradientUnits',
  kernelmatrix => 'kernelMatrix',
  kernelunitlength => 'kernelUnitLength',
  keypoints => 'keyPoints',
  keysplines => 'keySplines',
  keytimes => 'keyTimes',
  lengthadjust => 'lengthAdjust',
  limitingconeangle => 'limitingConeAngle',
  markerheight => 'markerHeight',
  markerunits => 'markerUnits',
  markerwidth => 'markerWidth',
  maskcontentunits => 'maskContentUnits',
  maskunits => 'maskUnits',
  numoctaves => 'numOctaves',
  pathlength => 'pathLength',
  patterncontentunits => 'patternContentUnits',
  patterntransform => 'patternTransform',
  patternunits => 'patternUnits',
  pointsatx => 'pointsAtX',
  pointsaty => 'pointsAtY',
  pointsatz => 'pointsAtZ',
  preservealpha => 'preserveAlpha',
  preserveaspectratio => 'preserveAspectRatio',
  primitiveunits => 'primitiveUnits',
  refx => 'refX',
  refy => 'refY',
  repeatcount => 'repeatCount',
  repeatdur => 'repeatDur',
  requiredextensions => 'requiredExtensions',
  requiredfeatures => 'requiredFeatures',
  specularconstant => 'specularConstant',
  specularexponent => 'specularExponent',
  spreadmethod => 'spreadMethod',
  startoffset => 'startOffset',
  stddeviation => 'stdDeviation',
  stitchtiles => 'stitchTiles',
  surfacescale => 'surfaceScale',
  systemlanguage => 'systemLanguage',
  tablevalues => 'tableValues',
  targetx => 'targetX',
  targety => 'targetY',
  textlength => 'textLength',
  viewbox => 'viewBox',
  viewtarget => 'viewTarget',
  xchannelselector => 'xChannelSelector',
  ychannelselector => 'yChannelSelector',
  zoomandpan => 'zoomAndPan',
};

my $foreign_attr_xname = {
  'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
  'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
  'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
  'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
  'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
  'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
  'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
  'xml:base' => [$XML_NS, ['xml', 'base']],
  'xml:lang' => [$XML_NS, ['xml', 'lang']],
  'xml:space' => [$XML_NS, ['xml', 'space']],
  'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
  'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
};

## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.

my $charref_map = {
  0x0D => 0x000A,
  0x80 => 0x20AC,
  0x81 => 0xFFFD,
  0x82 => 0x201A,
  0x83 => 0x0192,
  0x84 => 0x201E,
  0x85 => 0x2026,
  0x86 => 0x2020,
  0x87 => 0x2021,
  0x88 => 0x02C6,
  0x89 => 0x2030,
  0x8A => 0x0160,
  0x8B => 0x2039,
  0x8C => 0x0152,
  0x8D => 0xFFFD,
  0x8E => 0x017D,
  0x8F => 0xFFFD,
  0x90 => 0xFFFD,
  0x91 => 0x2018,
  0x92 => 0x2019,
  0x93 => 0x201C,
  0x94 => 0x201D,
  0x95 => 0x2022,
  0x96 => 0x2013,
  0x97 => 0x2014,
  0x98 => 0x02DC,
  0x99 => 0x2122,
  0x9A => 0x0161,
  0x9B => 0x203A,
  0x9C => 0x0153,
  0x9D => 0xFFFD,
  0x9E => 0x017E,
  0x9F => 0x0178,
}; # $charref_map
$charref_map->{$_} = 0xFFFD
    for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F, 
        0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF
        0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
        0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
        0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
        0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
        0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF;

## TODO: Invoke the reset algorithm when a resettable element is
## created (cf. HTML5 revision 2259).

sub parse_byte_string ($$$$;$) {
  my $self = shift;
  my $charset_name = shift;
  open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
  return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
} # parse_byte_string

sub parse_byte_stream ($$$$;$$) {
  # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
  my $self = ref $_[0] ? shift : shift->new;
  my $charset_name = shift;
  my $byte_stream = $_[0];

  my $onerror = $_[2] || sub {
    my (%opt) = @_;
    warn "Parse error ($opt{type})\n";
  };
  $self->{parse_error} = $onerror; # updated later by parse_char_string

  my $get_wrapper = $_[3] || sub ($) {
    return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
  };

  ## HTML5 encoding sniffing algorithm
  require Message::Charset::Info;
  my $charset;
  my $buffer;
  my ($char_stream, $e_status);

  SNIFFING: {
    ## NOTE: By setting |allow_fallback| option true when the
    ## |get_decode_handle| method is invoked, we ignore what the HTML5
    ## spec requires, i.e. unsupported encoding should be ignored.
      ## TODO: We should not do this unless the parser is invoked
      ## in the conformance checking mode, in which this behavior
      ## would be useful.

    ## Step 1
    if (defined $charset_name) {
      $charset = Message::Charset::Info->get_by_html_name ($charset_name);
          ## TODO: Is this ok?  Transfer protocol's parameter should be
          ## interpreted in its semantics?

      ($char_stream, $e_status) = $charset->get_decode_handle
          ($byte_stream, allow_error_reporting => 1,
           allow_fallback => 1);
      if ($char_stream) {
        $self->{confident} = 1;
        last SNIFFING;
      } else {
        !!!parse-error (type => 'charset:not supported',
                        layer => 'encode',
                        line => 1, column => 1,
                        value => $charset_name,
                        level => $self->{level}->{uncertain});
      }
    }

    ## Step 2
    my $byte_buffer = '';
    for (1..1024) {
      my $char = $byte_stream->getc;
      last unless defined $char;
      $byte_buffer .= $char;
    } ## TODO: timeout

    ## Step 3
    if ($byte_buffer =~ /^\xFE\xFF/) {
      $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
      ($char_stream, $e_status) = $charset->get_decode_handle
          ($byte_stream, allow_error_reporting => 1,
           allow_fallback => 1, byte_buffer => \$byte_buffer);
      $self->{confident} = 1;
      last SNIFFING;
    } elsif ($byte_buffer =~ /^\xFF\xFE/) {
      $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
      ($char_stream, $e_status) = $charset->get_decode_handle
          ($byte_stream, allow_error_reporting => 1,
           allow_fallback => 1, byte_buffer => \$byte_buffer);
      $self->{confident} = 1;
      last SNIFFING;
    } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
      $charset = Message::Charset::Info->get_by_html_name ('utf-8');
      ($char_stream, $e_status) = $charset->get_decode_handle
          ($byte_stream, allow_error_reporting => 1,
           allow_fallback => 1, byte_buffer => \$byte_buffer);
      $self->{confident} = 1;
      last SNIFFING;
    }

    ## Step 4
    ## TODO: <meta charset>

    ## Step 5
    ## TODO: from history

    ## Step 6
    require Whatpm::Charset::UniversalCharDet;
    $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
        ($byte_buffer);
    if (defined $charset_name) {
      $charset = Message::Charset::Info->get_by_html_name ($charset_name);

      ## ISSUE: Unsupported encoding is not ignored according to the spec.
      require Whatpm::Charset::DecodeHandle;
      $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
          ($byte_stream);
      ($char_stream, $e_status) = $charset->get_decode_handle
          ($buffer, allow_error_reporting => 1,
           allow_fallback => 1, byte_buffer => \$byte_buffer);
      if ($char_stream) {
        $buffer->{buffer} = $byte_buffer;
        !!!parse-error (type => 'sniffing:chardet',
                        text => $charset_name,
                        level => $self->{level}->{info},
                        layer => 'encode',
                        line => 1, column => 1);
        $self->{confident} = 0;
        last SNIFFING;
      }
    }

    ## Step 7: default
    ## TODO: Make this configurable.
    $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
        ## NOTE: We choose |windows-1252| here, since |utf-8| should be 
        ## detectable in the step 6.
    require Whatpm::Charset::DecodeHandle;
    $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
        ($byte_stream);
    ($char_stream, $e_status)
        = $charset->get_decode_handle ($buffer,
                                       allow_error_reporting => 1,
                                       allow_fallback => 1,
                                       byte_buffer => \$byte_buffer);
    $buffer->{buffer} = $byte_buffer;
    !!!parse-error (type => 'sniffing:default',
                    text => 'windows-1252',
                    level => $self->{level}->{info},
                    line => 1, column => 1,
                    layer => 'encode');
    $self->{confident} = 0;
  } # SNIFFING

  if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
    $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
    !!!parse-error (type => 'chardecode:fallback',
                    #text => $self->{input_encoding},
                    level => $self->{level}->{uncertain},
                    line => 1, column => 1,
                    layer => 'encode');
  } elsif (not ($e_status &
                Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
    $self->{input_encoding} = $charset->get_iana_name;
    !!!parse-error (type => 'chardecode:no error',
                    text => $self->{input_encoding},
                    level => $self->{level}->{uncertain},
                    line => 1, column => 1,
                    layer => 'encode');
  } else {
    $self->{input_encoding} = $charset->get_iana_name;
  }

  $self->{change_encoding} = sub {
    my $self = shift;
    $charset_name = shift;
    my $token = shift;

    $charset = Message::Charset::Info->get_by_html_name ($charset_name);
    ($char_stream, $e_status) = $charset->get_decode_handle
        ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
         byte_buffer => \ $buffer->{buffer});
    
    if ($char_stream) { # if supported
      ## "Change the encoding" algorithm:

      ## Step 1    
      if ($charset->{category} &
          Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
        $charset = Message::Charset::Info->get_by_html_name ('utf-8');
        ($char_stream, $e_status) = $charset->get_decode_handle
            ($byte_stream,
             byte_buffer => \ $buffer->{buffer});
      }
      $charset_name = $charset->get_iana_name;
      
      ## Step 2
      if (defined $self->{input_encoding} and
          $self->{input_encoding} eq $charset_name) {
        !!!parse-error (type => 'charset label:matching',
                        text => $charset_name,
                        level => $self->{level}->{info});
        $self->{confident} = 1;
        return;
      }

      !!!parse-error (type => 'charset label detected',
                      text => $self->{input_encoding},
                      value => $charset_name,
                      level => $self->{level}->{warn},
                      token => $token);
      
      ## Step 3
      # if (can) {
        ## change the encoding on the fly.
        #$self->{confident} = 1;
        #return;
      # }
      
      ## Step 4
      throw Whatpm::HTML::RestartParser ();
    }
  }; # $self->{change_encoding}

  my $char_onerror = sub {
    my (undef, $type, %opt) = @_;
    !!!parse-error (layer => 'encode',
                    line => $self->{line}, column => $self->{column} + 1,
                    %opt, type => $type);
    if ($opt{octets}) {
      ${$opt{octets}} = "\x{FFFD}"; # relacement character
    }
  };

  my $wrapped_char_stream = $get_wrapper->($char_stream);
  $wrapped_char_stream->onerror ($char_onerror);

  my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
  my $return;
  try {
    $return = $self->parse_char_stream ($wrapped_char_stream, @args);  
  } catch Whatpm::HTML::RestartParser with {
    ## NOTE: Invoked after {change_encoding}.

    if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
      $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
      !!!parse-error (type => 'chardecode:fallback',
                      level => $self->{level}->{uncertain},
                      #text => $self->{input_encoding},
                      line => 1, column => 1,
                      layer => 'encode');
    } elsif (not ($e_status &
                  Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
      $self->{input_encoding} = $charset->get_iana_name;
      !!!parse-error (type => 'chardecode:no error',
                      text => $self->{input_encoding},
                      level => $self->{level}->{uncertain},
                      line => 1, column => 1,
                      layer => 'encode');
    } else {
      $self->{input_encoding} = $charset->get_iana_name;
    }
    $self->{confident} = 1;

    $wrapped_char_stream = $get_wrapper->($char_stream);
    $wrapped_char_stream->onerror ($char_onerror);

    $return = $self->parse_char_stream ($wrapped_char_stream, @args);
  };
  return $return;
} # parse_byte_stream

## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
## and the HTML layer MUST ignore it.  However, we does strip BOM in
## the encoding layer and the HTML layer does not ignore any U+FEFF,
## because the core part of our HTML parser expects a string of character,
## not a string of bytes or code units or anything which might contain a BOM.
## Therefore, any parser interface that accepts a string of bytes,
## such as |parse_byte_string| in this module, must ensure that it does
## strip the BOM and never strip any ZWNBSP.

sub parse_char_string ($$$;$$) {
  #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
  my $self = shift;
  my $s = ref $_[0] ? $_[0] : \($_[0]);
  require Whatpm::Charset::DecodeHandle;
  my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
  return $self->parse_char_stream ($input, @_[1..$#_]);
} # parse_char_string
*parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.

sub parse_char_stream ($$$;$$) {
  my $self = ref $_[0] ? shift : shift->new;
  my $input = $_[0];
  $self->{document} = $_[1];
  @{$self->{document}->child_nodes} = ();

  ## NOTE: |set_inner_html| copies most of this method's code

  $self->{confident} = 1 unless exists $self->{confident};
  $self->{document}->input_encoding ($self->{input_encoding})
      if defined $self->{input_encoding};
## TODO: |{input_encoding}| is needless?

  $self->{line_prev} = $self->{line} = 1;
  $self->{column_prev} = -1;
  $self->{column} = 0;
  $self->{set_nc} = sub {
    my $self = shift;

    my $char = '';
    if (defined $self->{next_nc}) {
      $char = $self->{next_nc};
      delete $self->{next_nc};
      $self->{nc} = ord $char;
    } else {
      $self->{char_buffer} = '';
      $self->{char_buffer_pos} = 0;

      my $count = $input->manakai_read_until
         ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
      if ($count) {
        $self->{line_prev} = $self->{line};
        $self->{column_prev} = $self->{column};
        $self->{column}++;
        $self->{nc}
            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
        return;
      }

      if ($input->read ($char, 1)) {
        $self->{nc} = ord $char;
      } else {
        $self->{nc} = -1;
        return;
      }
    }

    ($self->{line_prev}, $self->{column_prev})
        = ($self->{line}, $self->{column});
    $self->{column}++;
    
    if ($self->{nc} == 0x000A) { # LF
      !!!cp ('j1');
      $self->{line}++;
      $self->{column} = 0;
    } elsif ($self->{nc} == 0x000D) { # CR
      !!!cp ('j2');
## TODO: support for abort/streaming
      my $next = '';
      if ($input->read ($next, 1) and $next ne "\x0A") {
        $self->{next_nc} = $next;
      }
      $self->{nc} = 0x000A; # LF # MUST
      $self->{line}++;
      $self->{column} = 0;
    } elsif ($self->{nc} == 0x0000) { # NULL
      !!!cp ('j4');
      !!!parse-error (type => 'NULL');
      $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
    }
  };

  $self->{read_until} = sub {
    #my ($scalar, $specials_range, $offset) = @_;
    return 0 if defined $self->{next_nc};

    my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
    my $offset = $_[2] || 0;

    if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
      pos ($self->{char_buffer}) = $self->{char_buffer_pos};
      if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
        substr ($_[0], $offset)
            = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
        my $count = $+[0] - $-[0];
        if ($count) {
          $self->{column} += $count;
          $self->{char_buffer_pos} += $count;
          $self->{line_prev} = $self->{line};
          $self->{column_prev} = $self->{column} - 1;
          $self->{nc} = -1;
        }
        return $count;
      } else {
        return 0;
      }
    } else {
      my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
      if ($count) {
        $self->{column} += $count;
        $self->{line_prev} = $self->{line};
        $self->{column_prev} = $self->{column} - 1;
        $self->{nc} = -1;
      }
      return $count;
    }
  }; # $self->{read_until}

  my $onerror = $_[2] || sub {
    my (%opt) = @_;
    my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
    my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
    warn "Parse error ($opt{type}) at line $line column $column\n";
  };
  $self->{parse_error} = sub {
    $onerror->(line => $self->{line}, column => $self->{column}, @_);
  };

  my $char_onerror = sub {
    my (undef, $type, %opt) = @_;
    !!!parse-error (layer => 'encode',
                    line => $self->{line}, column => $self->{column} + 1,
                    %opt, type => $type);
  }; # $char_onerror

  if ($_[3]) {
    $input = $_[3]->($input);
    $input->onerror ($char_onerror);
  } else {
    $input->onerror ($char_onerror) unless defined $input->onerror;
  }

  $self->_initialize_tokenizer;
  $self->_initialize_tree_constructor;
  $self->_construct_tree;
  $self->_terminate_tree_constructor;

  delete $self->{parse_error}; # remove loop

  return $self->{document};
} # parse_char_stream

sub new ($) {
  my $class = shift;
  my $self = bless {
    level => {must => 'm',
              should => 's',
              warn => 'w',
              info => 'i',
              uncertain => 'u'},
  }, $class;
  $self->{set_nc} = sub {
    $self->{nc} = -1;
  };
  $self->{parse_error} = sub {
    # 
  };
  $self->{change_encoding} = sub {
    # if ($_[0] is a supported encoding) {
    #   run "change the encoding" algorithm;
    #   throw Whatpm::HTML::RestartParser (charset => $new_encoding);
    # }
  };
  $self->{application_cache_selection} = sub {
    #
  };
  return $self;
} # new

sub CM_ENTITY () { 0b001 } # & markup in data
sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)

sub PLAINTEXT_CONTENT_MODEL () { 0 }
sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }

sub DATA_STATE () { 0 }
#sub ENTITY_DATA_STATE () { 1 }
sub TAG_OPEN_STATE () { 2 }
sub CLOSE_TAG_OPEN_STATE () { 3 }
sub TAG_NAME_STATE () { 4 }
sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
sub ATTRIBUTE_NAME_STATE () { 6 }
sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
#sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
sub COMMENT_START_STATE () { 14 }
sub COMMENT_START_DASH_STATE () { 15 }
sub COMMENT_STATE () { 16 }
sub COMMENT_END_STATE () { 17 }
sub COMMENT_END_DASH_STATE () { 18 }
sub BOGUS_COMMENT_STATE () { 19 }
sub DOCTYPE_STATE () { 20 }
sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
sub DOCTYPE_NAME_STATE () { 22 }
sub AFTER_DOCTYPE_NAME_STATE () { 23 }
sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
sub BOGUS_DOCTYPE_STATE () { 32 }
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
sub SELF_CLOSING_START_TAG_STATE () { 34 }
sub CDATA_SECTION_STATE () { 35 }
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
sub CDATA_RCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec
sub SYSTEM_STATE () { 43 } # "after DOCTYPE name state" in the spec
## NOTE: "Entity data state", "entity in attribute value state", and
## "consume a character reference" algorithm are jointly implemented
## using the following six states:
sub ENTITY_STATE () { 44 }
sub ENTITY_HASH_STATE () { 45 }
sub NCR_NUM_STATE () { 46 }
sub HEXREF_X_STATE () { 47 }
sub HEXREF_HEX_STATE () { 48 }
sub ENTITY_NAME_STATE () { 49 }
sub PCDATA_STATE () { 50 } # "data state" in the spec

sub DOCTYPE_TOKEN () { 1 }
sub COMMENT_TOKEN () { 2 }
sub START_TAG_TOKEN () { 3 }
sub END_TAG_TOKEN () { 4 }
sub END_OF_FILE_TOKEN () { 5 }
sub CHARACTER_TOKEN () { 6 }

sub AFTER_HTML_IMS () { 0b100 }
sub HEAD_IMS ()       { 0b1000 }
sub BODY_IMS ()       { 0b10000 }
sub BODY_TABLE_IMS () { 0b100000 }
sub TABLE_IMS ()      { 0b1000000 }
sub ROW_IMS ()        { 0b10000000 }
sub BODY_AFTER_IMS () { 0b100000000 }
sub FRAME_IMS ()      { 0b1000000000 }
sub SELECT_IMS ()     { 0b10000000000 }
sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
    ## NOTE: "in foreign content" insertion mode is special; it is combined
    ## with the secondary insertion mode.  In this parser, they are stored
    ## together in the bit-or'ed form.

## NOTE: "initial" and "before html" insertion modes have no constants.

## NOTE: "after after body" insertion mode.
sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }

## NOTE: "after after frameset" insertion mode.
sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }

sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
sub IN_BODY_IM () { BODY_IMS }
sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
sub IN_TABLE_IM () { TABLE_IMS }
sub AFTER_BODY_IM () { BODY_AFTER_IMS }
sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
sub IN_COLUMN_GROUP_IM () { 0b10 }

## Implementations MUST act as if state machine in the spec

sub _initialize_tokenizer ($) {
  my $self = shift;
  $self->{state} = DATA_STATE; # MUST
  #$self->{s_kwd}; # state keyword - initialized when used
  #$self->{entity__value}; # initialized when used
  #$self->{entity__match}; # initialized when used
  $self->{content_model} = PCDATA_CONTENT_MODEL; # be
  undef $self->{ct}; # current token
  undef $self->{ca}; # current attribute
  undef $self->{last_stag_name}; # last emitted start tag name
  #$self->{prev_state}; # initialized when used
  delete $self->{self_closing};
  $self->{char_buffer} = '';
  $self->{char_buffer_pos} = 0;
  $self->{nc} = -1; # next input character
  #$self->{next_nc}
  !!!next-input-character;
  $self->{token} = [];
  # $self->{escape}
} # _initialize_tokenizer

## A token has:
##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN
##   ->{name} (DOCTYPE_TOKEN)
##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
##   ->{pubid} (DOCTYPE_TOKEN)
##   ->{sysid} (DOCTYPE_TOKEN)
##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
##   ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
##        ->{name}
##        ->{value}
##        ->{has_reference} == 1 or 0
##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
##     while the token is pushed back to the stack.

## Emitted token MUST immediately be handled by the tree construction state.

## Before each step, UA MAY check to see if either one of the scripts in
## "list of scripts that will execute as soon as possible" or the first
## script in the "list of scripts that will execute asynchronously",
## has completed loading.  If one has, then it MUST be executed
## and removed from the list.

## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
## (This requirement was dropped from HTML5 spec, unfortunately.)

my $is_space = {
  0x0009 => 1, # CHARACTER TABULATION (HT)
  0x000A => 1, # LINE FEED (LF)
  #0x000B => 0, # LINE TABULATION (VT)
  0x000C => 1, # FORM FEED (FF)
  #0x000D => 1, # CARRIAGE RETURN (CR)
  0x0020 => 1, # SPACE (SP)
};

sub _get_next_token ($) {
  my $self = shift;

  if ($self->{self_closing}) {
    !!!parse-error (type => 'nestc', token => $self->{ct});
    ## NOTE: The |self_closing| flag is only set by start tag token.
    ## In addition, when a start tag token is emitted, it is always set to
    ## |ct|.
    delete $self->{self_closing};
  }

  if (@{$self->{token}}) {
    $self->{self_closing} = $self->{token}->[0]->{self_closing};
    return shift @{$self->{token}};
  }

  A: {
    if ($self->{state} == PCDATA_STATE) {
      ## NOTE: Same as |DATA_STATE|, but only for |PCDATA| content model.

      if ($self->{nc} == 0x0026) { # &
        !!!cp (0.1);
        ## NOTE: In the spec, the tokenizer is switched to the 
        ## "entity data state".  In this implementation, the tokenizer
        ## is switched to the |ENTITY_STATE|, which is an implementation
        ## of the "consume a character reference" algorithm.
        $self->{entity_add} = -1;
        $self->{prev_state} = DATA_STATE;
        $self->{state} = ENTITY_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003C) { # <
        !!!cp (0.2);
        $self->{state} = TAG_OPEN_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (0.3);
        !!!emit ({type => END_OF_FILE_TOKEN,
                  line => $self->{line}, column => $self->{column}});
        last A; ## TODO: ok?
      } else {
        !!!cp (0.4);
        #
      }

      # Anything else
      my $token = {type => CHARACTER_TOKEN,
                   data => chr $self->{nc},
                   line => $self->{line}, column => $self->{column},
                  };
      $self->{read_until}->($token->{data}, q[<&], length $token->{data});

      ## Stay in the state.
      !!!next-input-character;
      !!!emit ($token);
      redo A;
    } elsif ($self->{state} == DATA_STATE) {
      $self->{s_kwd} = '' unless defined $self->{s_kwd};
      if ($self->{nc} == 0x0026) { # &
        $self->{s_kwd} = '';
        if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
            not $self->{escape}) {
          !!!cp (1);
          ## NOTE: In the spec, the tokenizer is switched to the 
          ## "entity data state".  In this implementation, the tokenizer
          ## is switched to the |ENTITY_STATE|, which is an implementation
          ## of the "consume a character reference" algorithm.
          $self->{entity_add} = -1;
          $self->{prev_state} = DATA_STATE;
          $self->{state} = ENTITY_STATE;
          !!!next-input-character;
          redo A;
        } else {
          !!!cp (2);
          #
        }
      } elsif ($self->{nc} == 0x002D) { # -
        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
          $self->{s_kwd} .= '-';
          
          if ($self->{s_kwd} eq '<!--') {
            !!!cp (3);
            $self->{escape} = 1; # unless $self->{escape};
            $self->{s_kwd} = '--';
            #
          } elsif ($self->{s_kwd} eq '---') {
            !!!cp (4);
            $self->{s_kwd} = '--';
            #
          } else {
            !!!cp (5);
            #
          }
        }
        
        #
      } elsif ($self->{nc} == 0x0021) { # !
        if (length $self->{s_kwd}) {
          !!!cp (5.1);
          $self->{s_kwd} .= '!';
          #
        } else {
          !!!cp (5.2);
          #$self->{s_kwd} = '';
          #
        }
        #
      } elsif ($self->{nc} == 0x003C) { # <
        if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
            (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
             not $self->{escape})) {
          !!!cp (6);
          $self->{state} = TAG_OPEN_STATE;
          !!!next-input-character;
          redo A;
        } else {
          !!!cp (7);
          $self->{s_kwd} = '';
          #
        }
      } elsif ($self->{nc} == 0x003E) { # >
        if ($self->{escape} and
            ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
          if ($self->{s_kwd} eq '--') {
            !!!cp (8);
            delete $self->{escape};
          } else {
            !!!cp (9);
          }
        } else {
          !!!cp (10);
        }
        
        $self->{s_kwd} = '';
        #
      } elsif ($self->{nc} == -1) {
        !!!cp (11);
        $self->{s_kwd} = '';
        !!!emit ({type => END_OF_FILE_TOKEN,
                  line => $self->{line}, column => $self->{column}});
        last A; ## TODO: ok?
      } else {
        !!!cp (12);
        $self->{s_kwd} = '';
        #
      }

      # Anything else
      my $token = {type => CHARACTER_TOKEN,
                   data => chr $self->{nc},
                   line => $self->{line}, column => $self->{column},
                  };
      if ($self->{read_until}->($token->{data}, q[-!<>&],
                                length $token->{data})) {
        $self->{s_kwd} = '';
      }

      ## Stay in the data state.
      if ($self->{content_model} == PCDATA_CONTENT_MODEL) {
        !!!cp (13);
        $self->{state} = PCDATA_STATE;
      } else {
        !!!cp (14);
        ## Stay in the state.
      }
      !!!next-input-character;
      !!!emit ($token);
      redo A;
    } elsif ($self->{state} == TAG_OPEN_STATE) {
      if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
        if ($self->{nc} == 0x002F) { # /
          !!!cp (15);
          !!!next-input-character;
          $self->{state} = CLOSE_TAG_OPEN_STATE;
          redo A;
        } elsif ($self->{nc} == 0x0021) { # !
          !!!cp (15.1);
          $self->{s_kwd} = '<' unless $self->{escape};
          #
        } else {
          !!!cp (16);
          #
        }

        ## reconsume
        $self->{state} = DATA_STATE;
        !!!emit ({type => CHARACTER_TOKEN, data => '<',
                  line => $self->{line_prev},
                  column => $self->{column_prev},
                 });
        redo A;
      } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
        if ($self->{nc} == 0x0021) { # !
          !!!cp (17);
          $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
          !!!next-input-character;
          redo A;
        } elsif ($self->{nc} == 0x002F) { # /
          !!!cp (18);
          $self->{state} = CLOSE_TAG_OPEN_STATE;
          !!!next-input-character;
          redo A;
        } elsif (0x0041 <= $self->{nc} and
                 $self->{nc} <= 0x005A) { # A..Z
          !!!cp (19);
          $self->{ct}
            = {type => START_TAG_TOKEN,
               tag_name => chr ($self->{nc} + 0x0020),
               line => $self->{line_prev},
               column => $self->{column_prev}};
          $self->{state} = TAG_NAME_STATE;
          !!!next-input-character;
          redo A;
        } elsif (0x0061 <= $self->{nc} and
                 $self->{nc} <= 0x007A) { # a..z
          !!!cp (20);
          $self->{ct} = {type => START_TAG_TOKEN,
                                    tag_name => chr ($self->{nc}),
                                    line => $self->{line_prev},
                                    column => $self->{column_prev}};
          $self->{state} = TAG_NAME_STATE;
          !!!next-input-character;
          redo A;
        } elsif ($self->{nc} == 0x003E) { # >
          !!!cp (21);
          !!!parse-error (type => 'empty start tag',
                          line => $self->{line_prev},
                          column => $self->{column_prev});
          $self->{state} = DATA_STATE;
          !!!next-input-character;

          !!!emit ({type => CHARACTER_TOKEN, data => '<>',
                    line => $self->{line_prev},
                    column => $self->{column_prev},
                   });

          redo A;
        } elsif ($self->{nc} == 0x003F) { # ?
          !!!cp (22);
          !!!parse-error (type => 'pio',
                          line => $self->{line_prev},
                          column => $self->{column_prev});
          $self->{state} = BOGUS_COMMENT_STATE;
          $self->{ct} = {type => COMMENT_TOKEN, data => '',
                                    line => $self->{line_prev},
                                    column => $self->{column_prev},
                                   };
          ## $self->{nc} is intentionally left as is
          redo A;
        } else {
          !!!cp (23);
          !!!parse-error (type => 'bare stago',
                          line => $self->{line_prev},
                          column => $self->{column_prev});
          $self->{state} = DATA_STATE;
          ## reconsume

          !!!emit ({type => CHARACTER_TOKEN, data => '<',
                    line => $self->{line_prev},
                    column => $self->{column_prev},
                   });

          redo A;
        }
      } else {
        die "$0: $self->{content_model} in tag open";
      }
    } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
      ## NOTE: The "close tag open state" in the spec is implemented as
      ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.

      my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
      if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
        if (defined $self->{last_stag_name}) {
          $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
          $self->{s_kwd} = '';
          ## Reconsume.
          redo A;
        } else {
          ## No start tag token has ever been emitted
          ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
          !!!cp (28);
          $self->{state} = DATA_STATE;
          ## Reconsume.
          !!!emit ({type => CHARACTER_TOKEN, data => '</',
                    line => $l, column => $c,
                   });
          redo A;
        }
      }

      if (0x0041 <= $self->{nc} and
          $self->{nc} <= 0x005A) { # A..Z
        !!!cp (29);
        $self->{ct}
            = {type => END_TAG_TOKEN,
               tag_name => chr ($self->{nc} + 0x0020),
               line => $l, column => $c};
        $self->{state} = TAG_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif (0x0061 <= $self->{nc} and
               $self->{nc} <= 0x007A) { # a..z
        !!!cp (30);
        $self->{ct} = {type => END_TAG_TOKEN,
                                  tag_name => chr ($self->{nc}),
                                  line => $l, column => $c};
        $self->{state} = TAG_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (31);
        !!!parse-error (type => 'empty end tag',
                        line => $self->{line_prev}, ## "<" in "</>"
                        column => $self->{column_prev} - 1);
        $self->{state} = DATA_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (32);
        !!!parse-error (type => 'bare etago');
        $self->{state} = DATA_STATE;
        # reconsume

        !!!emit ({type => CHARACTER_TOKEN, data => '</',
                  line => $l, column => $c,
                 });

        redo A;
      } else {
        !!!cp (33);
        !!!parse-error (type => 'bogus end tag');
        $self->{state} = BOGUS_COMMENT_STATE;
        $self->{ct} = {type => COMMENT_TOKEN, data => '',
                                  line => $self->{line_prev}, # "<" of "</"
                                  column => $self->{column_prev} - 1,
                                 };
        ## NOTE: $self->{nc} is intentionally left as is.
        ## Although the "anything else" case of the spec not explicitly
        ## states that the next input character is to be reconsumed,
        ## it will be included to the |data| of the comment token
        ## generated from the bogus end tag, as defined in the
        ## "bogus comment state" entry.
        redo A;
      }
    } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
      my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;
      if (length $ch) {
        my $CH = $ch;
        $ch =~ tr/a-z/A-Z/;
        my $nch = chr $self->{nc};
        if ($nch eq $ch or $nch eq $CH) {
          !!!cp (24);
          ## Stay in the state.
          $self->{s_kwd} .= $nch;
          !!!next-input-character;
          redo A;
        } else {
          !!!cp (25);
          $self->{state} = DATA_STATE;
          ## Reconsume.
          !!!emit ({type => CHARACTER_TOKEN,
                    data => '</' . $self->{s_kwd},
                    line => $self->{line_prev},
                    column => $self->{column_prev} - 1 - length $self->{s_kwd},
                   });
          redo A;
        }
      } else { # after "<{tag-name}"
        unless ($is_space->{$self->{nc}} or
                {
                 0x003E => 1, # >
                 0x002F => 1, # /
                 -1 => 1, # EOF
                }->{$self->{nc}}) {
          !!!cp (26);
          ## Reconsume.
          $self->{state} = DATA_STATE;
          !!!emit ({type => CHARACTER_TOKEN,
                    data => '</' . $self->{s_kwd},
                    line => $self->{line_prev},
                    column => $self->{column_prev} - 1 - length $self->{s_kwd},
                   });
          redo A;
        } else {
          !!!cp (27);
          $self->{ct}
              = {type => END_TAG_TOKEN,
                 tag_name => $self->{last_stag_name},
                 line => $self->{line_prev},
                 column => $self->{column_prev} - 1 - length $self->{s_kwd}};
          $self->{state} = TAG_NAME_STATE;
          ## Reconsume.
          redo A;
        }
      }
    } elsif ($self->{state} == TAG_NAME_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (34);
        $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (35);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          #if ($self->{ct}->{attributes}) {
          #  ## NOTE: This should never be reached.
          #  !!! cp (36);
          #  !!! parse-error (type => 'end tag attribute');
          #} else {
            !!!cp (37);
          #}
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif (0x0041 <= $self->{nc} and
               $self->{nc} <= 0x005A) { # A..Z
        !!!cp (38);
        $self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020);
          # start tag or end tag
        ## Stay in this state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (39);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          #if ($self->{ct}->{attributes}) {
          #  ## NOTE: This state should never be reached.
          #  !!! cp (40);
          #  !!! parse-error (type => 'end tag attribute');
          #} else {
            !!!cp (41);
          #}
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        # reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif ($self->{nc} == 0x002F) { # /
        !!!cp (42);
        $self->{state} = SELF_CLOSING_START_TAG_STATE;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (44);
        $self->{ct}->{tag_name} .= chr $self->{nc};
          # start tag or end tag
        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (45);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (46);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (47);
            !!!parse-error (type => 'end tag attribute');
          } else {
            !!!cp (48);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif (0x0041 <= $self->{nc} and
               $self->{nc} <= 0x005A) { # A..Z
        !!!cp (49);
        $self->{ca}
            = {name => chr ($self->{nc} + 0x0020),
               value => '',
               line => $self->{line}, column => $self->{column}};
        $self->{state} = ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x002F) { # /
        !!!cp (50);
        $self->{state} = SELF_CLOSING_START_TAG_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (52);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (53);
            !!!parse-error (type => 'end tag attribute');
          } else {
            !!!cp (54);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        # reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } else {
        if ({
             0x0022 => 1, # "
             0x0027 => 1, # '
             0x003D => 1, # =
            }->{$self->{nc}}) {
          !!!cp (55);
          !!!parse-error (type => 'bad attribute name');
        } else {
          !!!cp (56);
        }
        $self->{ca}
            = {name => chr ($self->{nc}),
               value => '',
               line => $self->{line}, column => $self->{column}};
        $self->{state} = ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
      my $before_leave = sub {
        if (exists $self->{ct}->{attributes} # start tag or end tag
            ->{$self->{ca}->{name}}) { # MUST
          !!!cp (57);
          !!!parse-error (type => 'duplicate attribute', text => $self->{ca}->{name}, line => $self->{ca}->{line}, column => $self->{ca}->{column});
          ## Discard $self->{ca} # MUST
        } else {
          !!!cp (58);
          $self->{ct}->{attributes}->{$self->{ca}->{name}}
            = $self->{ca};
        }
      }; # $before_leave

      if ($is_space->{$self->{nc}}) {
        !!!cp (59);
        $before_leave->();
        $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003D) { # =
        !!!cp (60);
        $before_leave->();
        $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        $before_leave->();
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (61);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          !!!cp (62);
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!parse-error (type => 'end tag attribute');
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif (0x0041 <= $self->{nc} and
               $self->{nc} <= 0x005A) { # A..Z
        !!!cp (63);
        $self->{ca}->{name} .= chr ($self->{nc} + 0x0020);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x002F) { # /
        !!!cp (64);
        $before_leave->();
        $self->{state} = SELF_CLOSING_START_TAG_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag');
        $before_leave->();
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (66);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (67);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (68);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        # reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } else {
        if ($self->{nc} == 0x0022 or # "
            $self->{nc} == 0x0027) { # '
          !!!cp (69);
          !!!parse-error (type => 'bad attribute name');
        } else {
          !!!cp (70);
        }
        $self->{ca}->{name} .= chr ($self->{nc});
        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (71);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003D) { # =
        !!!cp (72);
        $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (73);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (74);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (75);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif (0x0041 <= $self->{nc} and
               $self->{nc} <= 0x005A) { # A..Z
        !!!cp (76);
        $self->{ca}
            = {name => chr ($self->{nc} + 0x0020),
               value => '',
               line => $self->{line}, column => $self->{column}};
        $self->{state} = ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x002F) { # /
        !!!cp (77);
        $self->{state} = SELF_CLOSING_START_TAG_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (79);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (80);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (81);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        # reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } else {
        if ($self->{nc} == 0x0022 or # "
            $self->{nc} == 0x0027) { # '
          !!!cp (78);
          !!!parse-error (type => 'bad attribute name');
        } else {
          !!!cp (82);
        }
        $self->{ca}
            = {name => chr ($self->{nc}),
               value => '',
               line => $self->{line}, column => $self->{column}};
        $self->{state} = ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;        
      }
    } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (83);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0022) { # "
        !!!cp (84);
        $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0026) { # &
        !!!cp (85);
        $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
        ## reconsume
        redo A;
      } elsif ($self->{nc} == 0x0027) { # '
        !!!cp (86);
        $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!parse-error (type => 'empty unquoted attribute value');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (87);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (88);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (89);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (90);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (91);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (92);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } else {
        if ($self->{nc} == 0x003D) { # =
          !!!cp (93);
          !!!parse-error (type => 'bad attribute value');
        } else {
          !!!cp (94);
        }
        $self->{ca}->{value} .= chr ($self->{nc});
        $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
      if ($self->{nc} == 0x0022) { # "
        !!!cp (95);
        $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0026) { # &
        !!!cp (96);
        ## NOTE: In the spec, the tokenizer is switched to the 
        ## "entity in attribute value state".  In this implementation, the
        ## tokenizer is switched to the |ENTITY_STATE|, which is an
        ## implementation of the "consume a character reference" algorithm.
        $self->{prev_state} = $self->{state};
        $self->{entity_add} = 0x0022; # "
        $self->{state} = ENTITY_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed attribute value');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (97);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (98);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (99);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } else {
        !!!cp (100);
        $self->{ca}->{value} .= chr ($self->{nc});
        $self->{read_until}->($self->{ca}->{value},
                              q["&],
                              length $self->{ca}->{value});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
      if ($self->{nc} == 0x0027) { # '
        !!!cp (101);
        $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0026) { # &
        !!!cp (102);
        ## NOTE: In the spec, the tokenizer is switched to the 
        ## "entity in attribute value state".  In this implementation, the
        ## tokenizer is switched to the |ENTITY_STATE|, which is an
        ## implementation of the "consume a character reference" algorithm.
        $self->{entity_add} = 0x0027; # '
        $self->{prev_state} = $self->{state};
        $self->{state} = ENTITY_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed attribute value');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (103);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (104);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (105);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } else {
        !!!cp (106);
        $self->{ca}->{value} .= chr ($self->{nc});
        $self->{read_until}->($self->{ca}->{value},
                              q['&],
                              length $self->{ca}->{value});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (107);
        $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0026) { # &
        !!!cp (108);
        ## NOTE: In the spec, the tokenizer is switched to the 
        ## "entity in attribute value state".  In this implementation, the
        ## tokenizer is switched to the |ENTITY_STATE|, which is an
        ## implementation of the "consume a character reference" algorithm.
        $self->{entity_add} = -1;
        $self->{prev_state} = $self->{state};
        $self->{state} = ENTITY_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (109);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (110);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (111);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (112);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (113);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (114);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } else {
        if ({
             0x0022 => 1, # "
             0x0027 => 1, # '
             0x003D => 1, # =
            }->{$self->{nc}}) {
          !!!cp (115);
          !!!parse-error (type => 'bad attribute value');
        } else {
          !!!cp (116);
        }
        $self->{ca}->{value} .= chr ($self->{nc});
        $self->{read_until}->($self->{ca}->{value},
                              q["'=& >],
                              length $self->{ca}->{value});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (118);
        $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (119);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp (120);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (121);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif ($self->{nc} == 0x002F) { # /
        !!!cp (122);
        $self->{state} = SELF_CLOSING_START_TAG_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag'); 
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (122.3);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          if ($self->{ct}->{attributes}) {
            !!!cp (122.1);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (122.2);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        ## Reconsume.
        !!!emit ($self->{ct}); # start tag or end tag
        redo A;
      } else {
        !!!cp ('124.1');
        !!!parse-error (type => 'no space between attributes');
        $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
        ## reconsume
        redo A;
      }
    } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
      if ($self->{nc} == 0x003E) { # >
        if ($self->{ct}->{type} == END_TAG_TOKEN) {
          !!!cp ('124.2');
          !!!parse-error (type => 'nestc', token => $self->{ct});
          ## TODO: Different type than slash in start tag
          $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
          if ($self->{ct}->{attributes}) {
            !!!cp ('124.4');
            !!!parse-error (type => 'end tag attribute');
          } else {
            !!!cp ('124.5');
          }
          ## TODO: Test |<title></title/>|
        } else {
          !!!cp ('124.3');
          $self->{self_closing} = 1;
        }

        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # start tag or end tag

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!parse-error (type => 'unclosed tag');
        if ($self->{ct}->{type} == START_TAG_TOKEN) {
          !!!cp (124.7);
          $self->{last_stag_name} = $self->{ct}->{tag_name};
        } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
          if ($self->{ct}->{attributes}) {
            !!!cp (124.5);
            !!!parse-error (type => 'end tag attribute');
          } else {
            ## NOTE: This state should never be reached.
            !!!cp (124.6);
          }
        } else {
          die "$0: $self->{ct}->{type}: Unknown token type";
        }
        $self->{state} = DATA_STATE;
        ## Reconsume.
        !!!emit ($self->{ct}); # start tag or end tag
        redo A;
      } else {
        !!!cp ('124.4');
        !!!parse-error (type => 'nestc');
        ## TODO: This error type is wrong.
        $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
      ## (only happen if PCDATA state)

      ## NOTE: Unlike spec's "bogus comment state", this implementation
      ## consumes characters one-by-one basis.
      
      if ($self->{nc} == 0x003E) { # >
        !!!cp (124);
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # comment
        redo A;
      } elsif ($self->{nc} == -1) { 
        !!!cp (125);
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # comment
        redo A;
      } else {
        !!!cp (126);
        $self->{ct}->{data} .= chr ($self->{nc}); # comment
        $self->{read_until}->($self->{ct}->{data},
                              q[>],
                              length $self->{ct}->{data});

        ## Stay in the state.
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
      ## (only happen if PCDATA state)
      
      if ($self->{nc} == 0x002D) { # -
        !!!cp (133);
        $self->{state} = MD_HYPHEN_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0044 or # D
               $self->{nc} == 0x0064) { # d
        ## ASCII case-insensitive.
        !!!cp (130);
        $self->{state} = MD_DOCTYPE_STATE;
        $self->{s_kwd} = chr $self->{nc};
        !!!next-input-character;
        redo A;
      } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
               $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
               $self->{nc} == 0x005B) { # [
        !!!cp (135.4);                
        $self->{state} = MD_CDATA_STATE;
        $self->{s_kwd} = '[';
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (136);
      }

      !!!parse-error (type => 'bogus comment',
                      line => $self->{line_prev},
                      column => $self->{column_prev} - 1);
      ## Reconsume.
      $self->{state} = BOGUS_COMMENT_STATE;
      $self->{ct} = {type => COMMENT_TOKEN, data => '',
                                line => $self->{line_prev},
                                column => $self->{column_prev} - 1,
                               };
      redo A;
    } elsif ($self->{state} == MD_HYPHEN_STATE) {
      if ($self->{nc} == 0x002D) { # -
        !!!cp (127);
        $self->{ct} = {type => COMMENT_TOKEN, data => '',
                                  line => $self->{line_prev},
                                  column => $self->{column_prev} - 2,
                                 };
        $self->{state} = COMMENT_START_STATE;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (128);
        !!!parse-error (type => 'bogus comment',
                        line => $self->{line_prev},
                        column => $self->{column_prev} - 2);
        $self->{state} = BOGUS_COMMENT_STATE;
        ## Reconsume.
        $self->{ct} = {type => COMMENT_TOKEN,
                                  data => '-',
                                  line => $self->{line_prev},
                                  column => $self->{column_prev} - 2,
                                 };
        redo A;
      }
    } elsif ($self->{state} == MD_DOCTYPE_STATE) {
      ## ASCII case-insensitive.
      if ($self->{nc} == [
            undef,
            0x004F, # O
            0x0043, # C
            0x0054, # T
            0x0059, # Y
            0x0050, # P
          ]->[length $self->{s_kwd}] or
          $self->{nc} == [
            undef,
            0x006F, # o
            0x0063, # c
            0x0074, # t
            0x0079, # y
            0x0070, # p
          ]->[length $self->{s_kwd}]) {
        !!!cp (131);
        ## Stay in the state.
        $self->{s_kwd} .= chr $self->{nc};
        !!!next-input-character;
        redo A;
      } elsif ((length $self->{s_kwd}) == 6 and
               ($self->{nc} == 0x0045 or # E
                $self->{nc} == 0x0065)) { # e
        !!!cp (129);
        $self->{state} = DOCTYPE_STATE;
        $self->{ct} = {type => DOCTYPE_TOKEN,
                                  quirks => 1,
                                  line => $self->{line_prev},
                                  column => $self->{column_prev} - 7,
                                 };
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (132);        
        !!!parse-error (type => 'bogus comment',
                        line => $self->{line_prev},
                        column => $self->{column_prev} - 1 - length $self->{s_kwd});
        $self->{state} = BOGUS_COMMENT_STATE;
        ## Reconsume.
        $self->{ct} = {type => COMMENT_TOKEN,
                                  data => $self->{s_kwd},
                                  line => $self->{line_prev},
                                  column => $self->{column_prev} - 1 - length $self->{s_kwd},
                                 };
        redo A;
      }
    } elsif ($self->{state} == MD_CDATA_STATE) {
      if ($self->{nc} == {
            '[' => 0x0043, # C
            '[C' => 0x0044, # D
            '[CD' => 0x0041, # A
            '[CDA' => 0x0054, # T
            '[CDAT' => 0x0041, # A
          }->{$self->{s_kwd}}) {
        !!!cp (135.1);
        ## Stay in the state.
        $self->{s_kwd} .= chr $self->{nc};
        !!!next-input-character;
        redo A;
      } elsif ($self->{s_kwd} eq '[CDATA' and
               $self->{nc} == 0x005B) { # [
        !!!cp (135.2);
        $self->{ct} = {type => CHARACTER_TOKEN,
                                  data => '',
                                  line => $self->{line_prev},
                                  column => $self->{column_prev} - 7};
        $self->{state} = CDATA_SECTION_STATE;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (135.3);
        !!!parse-error (type => 'bogus comment',
                        line => $self->{line_prev},
                        column => $self->{column_prev} - 1 - length $self->{s_kwd});
        $self->{state} = BOGUS_COMMENT_STATE;
        ## Reconsume.
        $self->{ct} = {type => COMMENT_TOKEN,
                                  data => $self->{s_kwd},
                                  line => $self->{line_prev},
                                  column => $self->{column_prev} - 1 - length $self->{s_kwd},
                                 };
        redo A;
      }
    } elsif ($self->{state} == COMMENT_START_STATE) {
      if ($self->{nc} == 0x002D) { # -
        !!!cp (137);
        $self->{state} = COMMENT_START_DASH_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (138);
        !!!parse-error (type => 'bogus comment');
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # comment

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (139);
        !!!parse-error (type => 'unclosed comment');
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # comment

        redo A;
      } else {
        !!!cp (140);
        $self->{ct}->{data} # comment
            .= chr ($self->{nc});
        $self->{state} = COMMENT_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
      if ($self->{nc} == 0x002D) { # -
        !!!cp (141);
        $self->{state} = COMMENT_END_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (142);
        !!!parse-error (type => 'bogus comment');
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # comment

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (143);
        !!!parse-error (type => 'unclosed comment');
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # comment

        redo A;
      } else {
        !!!cp (144);
        $self->{ct}->{data} # comment
            .= '-' . chr ($self->{nc});
        $self->{state} = COMMENT_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == COMMENT_STATE) {
      if ($self->{nc} == 0x002D) { # -
        !!!cp (145);
        $self->{state} = COMMENT_END_DASH_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (146);
        !!!parse-error (type => 'unclosed comment');
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # comment

        redo A;
      } else {
        !!!cp (147);
        $self->{ct}->{data} .= chr ($self->{nc}); # comment
        $self->{read_until}->($self->{ct}->{data},
                              q[-],
                              length $self->{ct}->{data});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
      if ($self->{nc} == 0x002D) { # -
        !!!cp (148);
        $self->{state} = COMMENT_END_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (149);
        !!!parse-error (type => 'unclosed comment');
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # comment

        redo A;
      } else {
        !!!cp (150);
        $self->{ct}->{data} .= '-' . chr ($self->{nc}); # comment
        $self->{state} = COMMENT_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == COMMENT_END_STATE) {
      if ($self->{nc} == 0x003E) { # >
        !!!cp (151);
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # comment

        redo A;
      } elsif ($self->{nc} == 0x002D) { # -
        !!!cp (152);
        !!!parse-error (type => 'dash in comment',
                        line => $self->{line_prev},
                        column => $self->{column_prev});
        $self->{ct}->{data} .= '-'; # comment
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (153);
        !!!parse-error (type => 'unclosed comment');
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # comment

        redo A;
      } else {
        !!!cp (154);
        !!!parse-error (type => 'dash in comment',
                        line => $self->{line_prev},
                        column => $self->{column_prev});
        $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
        $self->{state} = COMMENT_STATE;
        !!!next-input-character;
        redo A;
      } 
    } elsif ($self->{state} == DOCTYPE_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (155);
        $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (156);
        !!!parse-error (type => 'no space before DOCTYPE name');
        $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
        ## reconsume
        redo A;
      }
    } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (157);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (158);
        !!!parse-error (type => 'no DOCTYPE name');
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # DOCTYPE (quirks)

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (159);
        !!!parse-error (type => 'no DOCTYPE name');
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # DOCTYPE (quirks)

        redo A;
      } else {
        !!!cp (160);
        $self->{ct}->{name} = chr $self->{nc};
        delete $self->{ct}->{quirks};
## ISSUE: "Set the token's name name to the" in the spec
        $self->{state} = DOCTYPE_NAME_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
## ISSUE: Redundant "First," in the spec.
      if ($is_space->{$self->{nc}}) {
        !!!cp (161);
        $self->{state} = AFTER_DOCTYPE_NAME_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (162);
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (163);
        !!!parse-error (type => 'unclosed DOCTYPE');
        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (164);
        $self->{ct}->{name}
          .= chr ($self->{nc}); # DOCTYPE
        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (165);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (166);
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (167);
        !!!parse-error (type => 'unclosed DOCTYPE');
        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == 0x0050 or # P
               $self->{nc} == 0x0070) { # p
        $self->{state} = PUBLIC_STATE;
        $self->{s_kwd} = chr $self->{nc};
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0053 or # S
               $self->{nc} == 0x0073) { # s
        $self->{state} = SYSTEM_STATE;
        $self->{s_kwd} = chr $self->{nc};
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (180);
        !!!parse-error (type => 'string after DOCTYPE name');
        $self->{ct}->{quirks} = 1;

        $self->{state} = BOGUS_DOCTYPE_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == PUBLIC_STATE) {
      ## ASCII case-insensitive
      if ($self->{nc} == [
            undef, 
            0x0055, # U
            0x0042, # B
            0x004C, # L
            0x0049, # I
          ]->[length $self->{s_kwd}] or
          $self->{nc} == [
            undef, 
            0x0075, # u
            0x0062, # b
            0x006C, # l
            0x0069, # i
          ]->[length $self->{s_kwd}]) {
        !!!cp (175);
        ## Stay in the state.
        $self->{s_kwd} .= chr $self->{nc};
        !!!next-input-character;
        redo A;
      } elsif ((length $self->{s_kwd}) == 5 and
               ($self->{nc} == 0x0043 or # C
                $self->{nc} == 0x0063)) { # c
        !!!cp (168);
        $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (169);
        !!!parse-error (type => 'string after DOCTYPE name',
                        line => $self->{line_prev},
                        column => $self->{column_prev} + 1 - length $self->{s_kwd});
        $self->{ct}->{quirks} = 1;

        $self->{state} = BOGUS_DOCTYPE_STATE;
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == SYSTEM_STATE) {
      ## ASCII case-insensitive
      if ($self->{nc} == [
            undef, 
            0x0059, # Y
            0x0053, # S
            0x0054, # T
            0x0045, # E
          ]->[length $self->{s_kwd}] or
          $self->{nc} == [
            undef, 
            0x0079, # y
            0x0073, # s
            0x0074, # t
            0x0065, # e
          ]->[length $self->{s_kwd}]) {
        !!!cp (170);
        ## Stay in the state.
        $self->{s_kwd} .= chr $self->{nc};
        !!!next-input-character;
        redo A;
      } elsif ((length $self->{s_kwd}) == 5 and
               ($self->{nc} == 0x004D or # M
                $self->{nc} == 0x006D)) { # m
        !!!cp (171);
        $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (172);
        !!!parse-error (type => 'string after DOCTYPE name',
                        line => $self->{line_prev},
                        column => $self->{column_prev} + 1 - length $self->{s_kwd});
        $self->{ct}->{quirks} = 1;

        $self->{state} = BOGUS_DOCTYPE_STATE;
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (181);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} eq 0x0022) { # "
        !!!cp (182);
        $self->{ct}->{pubid} = ''; # DOCTYPE
        $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} eq 0x0027) { # '
        !!!cp (183);
        $self->{ct}->{pubid} = ''; # DOCTYPE
        $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} eq 0x003E) { # >
        !!!cp (184);
        !!!parse-error (type => 'no PUBLIC literal');

        $self->{state} = DATA_STATE;
        !!!next-input-character;

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (185);
        !!!parse-error (type => 'unclosed DOCTYPE');

        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (186);
        !!!parse-error (type => 'string after PUBLIC');
        $self->{ct}->{quirks} = 1;

        $self->{state} = BOGUS_DOCTYPE_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
      if ($self->{nc} == 0x0022) { # "
        !!!cp (187);
        $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (188);
        !!!parse-error (type => 'unclosed PUBLIC literal');

        $self->{state} = DATA_STATE;
        !!!next-input-character;

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (189);
        !!!parse-error (type => 'unclosed PUBLIC literal');

        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (190);
        $self->{ct}->{pubid} # DOCTYPE
            .= chr $self->{nc};
        $self->{read_until}->($self->{ct}->{pubid}, q[">],
                              length $self->{ct}->{pubid});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
      if ($self->{nc} == 0x0027) { # '
        !!!cp (191);
        $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (192);
        !!!parse-error (type => 'unclosed PUBLIC literal');

        $self->{state} = DATA_STATE;
        !!!next-input-character;

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (193);
        !!!parse-error (type => 'unclosed PUBLIC literal');

        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (194);
        $self->{ct}->{pubid} # DOCTYPE
            .= chr $self->{nc};
        $self->{read_until}->($self->{ct}->{pubid}, q['>],
                              length $self->{ct}->{pubid});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (195);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0022) { # "
        !!!cp (196);
        $self->{ct}->{sysid} = ''; # DOCTYPE
        $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0027) { # '
        !!!cp (197);
        $self->{ct}->{sysid} = ''; # DOCTYPE
        $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (198);
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (199);
        !!!parse-error (type => 'unclosed DOCTYPE');

        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (200);
        !!!parse-error (type => 'string after PUBLIC literal');
        $self->{ct}->{quirks} = 1;

        $self->{state} = BOGUS_DOCTYPE_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (201);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0022) { # "
        !!!cp (202);
        $self->{ct}->{sysid} = ''; # DOCTYPE
        $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x0027) { # '
        !!!cp (203);
        $self->{ct}->{sysid} = ''; # DOCTYPE
        $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (204);
        !!!parse-error (type => 'no SYSTEM literal');
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (205);
        !!!parse-error (type => 'unclosed DOCTYPE');

        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (206);
        !!!parse-error (type => 'string after SYSTEM');
        $self->{ct}->{quirks} = 1;

        $self->{state} = BOGUS_DOCTYPE_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
      if ($self->{nc} == 0x0022) { # "
        !!!cp (207);
        $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (208);
        !!!parse-error (type => 'unclosed SYSTEM literal');

        $self->{state} = DATA_STATE;
        !!!next-input-character;

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (209);
        !!!parse-error (type => 'unclosed SYSTEM literal');

        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (210);
        $self->{ct}->{sysid} # DOCTYPE
            .= chr $self->{nc};
        $self->{read_until}->($self->{ct}->{sysid}, q[">],
                              length $self->{ct}->{sysid});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
      if ($self->{nc} == 0x0027) { # '
        !!!cp (211);
        $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (212);
        !!!parse-error (type => 'unclosed SYSTEM literal');

        $self->{state} = DATA_STATE;
        !!!next-input-character;

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (213);
        !!!parse-error (type => 'unclosed SYSTEM literal');

        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (214);
        $self->{ct}->{sysid} # DOCTYPE
            .= chr $self->{nc};
        $self->{read_until}->($self->{ct}->{sysid}, q['>],
                              length $self->{ct}->{sysid});

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
      if ($is_space->{$self->{nc}}) {
        !!!cp (215);
        ## Stay in the state
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003E) { # >
        !!!cp (216);
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (217);
        !!!parse-error (type => 'unclosed DOCTYPE');
        $self->{state} = DATA_STATE;
        ## reconsume

        $self->{ct}->{quirks} = 1;
        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (218);
        !!!parse-error (type => 'string after SYSTEM literal');
        #$self->{ct}->{quirks} = 1;

        $self->{state} = BOGUS_DOCTYPE_STATE;
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
      if ($self->{nc} == 0x003E) { # >
        !!!cp (219);
        $self->{state} = DATA_STATE;
        !!!next-input-character;

        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } elsif ($self->{nc} == -1) {
        !!!cp (220);
        !!!parse-error (type => 'unclosed DOCTYPE');
        $self->{state} = DATA_STATE;
        ## reconsume

        !!!emit ($self->{ct}); # DOCTYPE

        redo A;
      } else {
        !!!cp (221);
        my $s = '';
        $self->{read_until}->($s, q[>], 0);

        ## Stay in the state
        !!!next-input-character;
        redo A;
      }
    } elsif ($self->{state} == CDATA_SECTION_STATE) {
      ## NOTE: "CDATA section state" in the state is jointly implemented
      ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
      ## and |CDATA_SECTION_MSE2_STATE|.
      
      if ($self->{nc} == 0x005D) { # ]
        !!!cp (221.1);
        $self->{state} = CDATA_SECTION_MSE1_STATE;
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == -1) {
        $self->{state} = DATA_STATE;
        !!!next-input-character;
        if (length $self->{ct}->{data}) { # character
          !!!cp (221.2);
          !!!emit ($self->{ct}); # character
        } else {
          !!!cp (221.3);
          ## No token to emit. $self->{ct} is discarded.
        }        
        redo A;
      } else {
        !!!cp (221.4);
        $self->{ct}->{data} .= chr $self->{nc};
        $self->{read_until}->($self->{ct}->{data},
                              q<]>,
                              length $self->{ct}->{data});

        ## Stay in the state.
        !!!next-input-character;
        redo A;
      }

      ## ISSUE: "text tokens" in spec.
    } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
      if ($self->{nc} == 0x005D) { # ]
        !!!cp (221.5);
        $self->{state} = CDATA_SECTION_MSE2_STATE;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (221.6);
        $self->{ct}->{data} .= ']';
        $self->{state} = CDATA_SECTION_STATE;
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
      if ($self->{nc} == 0x003E) { # >
        $self->{state} = DATA_STATE;
        !!!next-input-character;
        if (length $self->{ct}->{data}) { # character
          !!!cp (221.7);
          !!!emit ($self->{ct}); # character
        } else {
          !!!cp (221.8);
          ## No token to emit. $self->{ct} is discarded.
        }
        redo A;
      } elsif ($self->{nc} == 0x005D) { # ]
        !!!cp (221.9); # character
        $self->{ct}->{data} .= ']'; ## Add first "]" of "]]]".
        ## Stay in the state.
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (221.11);
        $self->{ct}->{data} .= ']]'; # character
        $self->{state} = CDATA_SECTION_STATE;
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == ENTITY_STATE) {
      if ($is_space->{$self->{nc}} or
          {
            0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
            $self->{entity_add} => 1,
          }->{$self->{nc}}) {
        !!!cp (1001);
        ## Don't consume
        ## No error
        ## Return nothing.
        #
      } elsif ($self->{nc} == 0x0023) { # #
        !!!cp (999);
        $self->{state} = ENTITY_HASH_STATE;
        $self->{s_kwd} = '#';
        !!!next-input-character;
        redo A;
      } elsif ((0x0041 <= $self->{nc} and
                $self->{nc} <= 0x005A) or # A..Z
               (0x0061 <= $self->{nc} and
                $self->{nc} <= 0x007A)) { # a..z
        !!!cp (998);
        require Whatpm::_NamedEntityList;
        $self->{state} = ENTITY_NAME_STATE;
        $self->{s_kwd} = chr $self->{nc};
        $self->{entity__value} = $self->{s_kwd};
        $self->{entity__match} = 0;
        !!!next-input-character;
        redo A;
      } else {
        !!!cp (1027);
        !!!parse-error (type => 'bare ero');
        ## Return nothing.
        #
      }

      ## NOTE: No character is consumed by the "consume a character
      ## reference" algorithm.  In other word, there is an "&" character
      ## that does not introduce a character reference, which would be
      ## appended to the parent element or the attribute value in later
      ## process of the tokenizer.

      if ($self->{prev_state} == DATA_STATE) {
        !!!cp (997);
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        !!!emit ({type => CHARACTER_TOKEN, data => '&',
                  line => $self->{line_prev},
                  column => $self->{column_prev},
                 });
        redo A;
      } else {
        !!!cp (996);
        $self->{ca}->{value} .= '&';
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == ENTITY_HASH_STATE) {
      if ($self->{nc} == 0x0078 or # x
          $self->{nc} == 0x0058) { # X
        !!!cp (995);
        $self->{state} = HEXREF_X_STATE;
        $self->{s_kwd} .= chr $self->{nc};
        !!!next-input-character;
        redo A;
      } elsif (0x0030 <= $self->{nc} and
               $self->{nc} <= 0x0039) { # 0..9
        !!!cp (994);
        $self->{state} = NCR_NUM_STATE;
        $self->{s_kwd} = $self->{nc} - 0x0030;
        !!!next-input-character;
        redo A;
      } else {
        !!!parse-error (type => 'bare nero',
                        line => $self->{line_prev},
                        column => $self->{column_prev} - 1);

        ## NOTE: According to the spec algorithm, nothing is returned,
        ## and then "&#" is appended to the parent element or the attribute 
        ## value in the later processing.

        if ($self->{prev_state} == DATA_STATE) {
          !!!cp (1019);
          $self->{state} = $self->{prev_state};
          ## Reconsume.
          !!!emit ({type => CHARACTER_TOKEN,
                    data => '&#',
                    line => $self->{line_prev},
                    column => $self->{column_prev} - 1,
                   });
          redo A;
        } else {
          !!!cp (993);
          $self->{ca}->{value} .= '&#';
          $self->{state} = $self->{prev_state};
          ## Reconsume.
          redo A;
        }
      }
    } elsif ($self->{state} == NCR_NUM_STATE) {
      if (0x0030 <= $self->{nc} and 
          $self->{nc} <= 0x0039) { # 0..9
        !!!cp (1012);
        $self->{s_kwd} *= 10;
        $self->{s_kwd} += $self->{nc} - 0x0030;
        
        ## Stay in the state.
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003B) { # ;
        !!!cp (1013);
        !!!next-input-character;
        #
      } else {
        !!!cp (1014);
        !!!parse-error (type => 'no refc');
        ## Reconsume.
        #
      }

      my $code = $self->{s_kwd};
      my $l = $self->{line_prev};
      my $c = $self->{column_prev};
      if ($charref_map->{$code}) {
        !!!cp (1015);
        !!!parse-error (type => 'invalid character reference',
                        text => (sprintf 'U+%04X', $code),
                        line => $l, column => $c);
        $code = $charref_map->{$code};
      } elsif ($code > 0x10FFFF) {
        !!!cp (1016);
        !!!parse-error (type => 'invalid character reference',
                        text => (sprintf 'U-%08X', $code),
                        line => $l, column => $c);
        $code = 0xFFFD;
      }

      if ($self->{prev_state} == DATA_STATE) {
        !!!cp (992);
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
                  line => $l, column => $c,
                 });
        redo A;
      } else {
        !!!cp (991);
        $self->{ca}->{value} .= chr $code;
        $self->{ca}->{has_reference} = 1;
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == HEXREF_X_STATE) {
      if ((0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) or
          (0x0041 <= $self->{nc} and $self->{nc} <= 0x0046) or
          (0x0061 <= $self->{nc} and $self->{nc} <= 0x0066)) {
        # 0..9, A..F, a..f
        !!!cp (990);
        $self->{state} = HEXREF_HEX_STATE;
        $self->{s_kwd} = 0;
        ## Reconsume.
        redo A;
      } else {
        !!!parse-error (type => 'bare hcro',
                        line => $self->{line_prev},
                        column => $self->{column_prev} - 2);

        ## NOTE: According to the spec algorithm, nothing is returned,
        ## and then "&#" followed by "X" or "x" is appended to the parent
        ## element or the attribute value in the later processing.

        if ($self->{prev_state} == DATA_STATE) {
          !!!cp (1005);
          $self->{state} = $self->{prev_state};
          ## Reconsume.
          !!!emit ({type => CHARACTER_TOKEN,
                    data => '&' . $self->{s_kwd},
                    line => $self->{line_prev},
                    column => $self->{column_prev} - length $self->{s_kwd},
                   });
          redo A;
        } else {
          !!!cp (989);
          $self->{ca}->{value} .= '&' . $self->{s_kwd};
          $self->{state} = $self->{prev_state};
          ## Reconsume.
          redo A;
        }
      }
    } elsif ($self->{state} == HEXREF_HEX_STATE) {
      if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
        # 0..9
        !!!cp (1002);
        $self->{s_kwd} *= 0x10;
        $self->{s_kwd} += $self->{nc} - 0x0030;
        ## Stay in the state.
        !!!next-input-character;
        redo A;
      } elsif (0x0061 <= $self->{nc} and
               $self->{nc} <= 0x0066) { # a..f
        !!!cp (1003);
        $self->{s_kwd} *= 0x10;
        $self->{s_kwd} += $self->{nc} - 0x0060 + 9;
        ## Stay in the state.
        !!!next-input-character;
        redo A;
      } elsif (0x0041 <= $self->{nc} and
               $self->{nc} <= 0x0046) { # A..F
        !!!cp (1004);
        $self->{s_kwd} *= 0x10;
        $self->{s_kwd} += $self->{nc} - 0x0040 + 9;
        ## Stay in the state.
        !!!next-input-character;
        redo A;
      } elsif ($self->{nc} == 0x003B) { # ;
        !!!cp (1006);
        !!!next-input-character;
        #
      } else {
        !!!cp (1007);
        !!!parse-error (type => 'no refc',
                        line => $self->{line},
                        column => $self->{column});
        ## Reconsume.
        #
      }

      my $code = $self->{s_kwd};
      my $l = $self->{line_prev};
      my $c = $self->{column_prev};
      if ($charref_map->{$code}) {
        !!!cp (1008);
        !!!parse-error (type => 'invalid character reference',
                        text => (sprintf 'U+%04X', $code),
                        line => $l, column => $c);
        $code = $charref_map->{$code};
      } elsif ($code > 0x10FFFF) {
        !!!cp (1009);
        !!!parse-error (type => 'invalid character reference',
                        text => (sprintf 'U-%08X', $code),
                        line => $l, column => $c);
        $code = 0xFFFD;
      }

      if ($self->{prev_state} == DATA_STATE) {
        !!!cp (988);
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
                  line => $l, column => $c,
                 });
        redo A;
      } else {
        !!!cp (987);
        $self->{ca}->{value} .= chr $code;
        $self->{ca}->{has_reference} = 1;
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        redo A;
      }
    } elsif ($self->{state} == ENTITY_NAME_STATE) {
      if (length $self->{s_kwd} < 30 and
          ## NOTE: Some number greater than the maximum length of entity name
          ((0x0041 <= $self->{nc} and # a
            $self->{nc} <= 0x005A) or # x
           (0x0061 <= $self->{nc} and # a
            $self->{nc} <= 0x007A) or # z
           (0x0030 <= $self->{nc} and # 0
            $self->{nc} <= 0x0039) or # 9
           $self->{nc} == 0x003B)) { # ;
        our $EntityChar;
        $self->{s_kwd} .= chr $self->{nc};
        if (defined $EntityChar->{$self->{s_kwd}}) {
          if ($self->{nc} == 0x003B) { # ;
            !!!cp (1020);
            $self->{entity__value} = $EntityChar->{$self->{s_kwd}};
            $self->{entity__match} = 1;
            !!!next-input-character;
            #
          } else {
            !!!cp (1021);
            $self->{entity__value} = $EntityChar->{$self->{s_kwd}};
            $self->{entity__match} = -1;
            ## Stay in the state.
            !!!next-input-character;
            redo A;
          }
        } else {
          !!!cp (1022);
          $self->{entity__value} .= chr $self->{nc};
          $self->{entity__match} *= 2;
          ## Stay in the state.
          !!!next-input-character;
          redo A;
        }
      }

      my $data;
      my $has_ref;
      if ($self->{entity__match} > 0) {
        !!!cp (1023);
        $data = $self->{entity__value};
        $has_ref = 1;
        #
      } elsif ($self->{entity__match} < 0) {
        !!!parse-error (type => 'no refc');
        if ($self->{prev_state} != DATA_STATE and # in attribute
            $self->{entity__match} < -1) {
          !!!cp (1024);
          $data = '&' . $self->{s_kwd};
          #
        } else {
          !!!cp (1025);
          $data = $self->{entity__value};
          $has_ref = 1;
          #
        }
      } else {
        !!!cp (1026);
        !!!parse-error (type => 'bare ero',
                        line => $self->{line_prev},
                        column => $self->{column_prev} - length $self->{s_kwd});
        $data = '&' . $self->{s_kwd};
        #
      }
  
      ## NOTE: In these cases, when a character reference is found,
      ## it is consumed and a character token is returned, or, otherwise,
      ## nothing is consumed and returned, according to the spec algorithm.
      ## In this implementation, anything that has been examined by the
      ## tokenizer is appended to the parent element or the attribute value
      ## as string, either literal string when no character reference or
      ## entity-replaced string otherwise, in this stage, since any characters
      ## that would not be consumed are appended in the data state or in an
      ## appropriate attribute value state anyway.
 
      if ($self->{prev_state} == DATA_STATE) {
        !!!cp (986);
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        !!!emit ({type => CHARACTER_TOKEN,
                  data => $data,
                  line => $self->{line_prev},
                  column => $self->{column_prev} + 1 - length $self->{s_kwd},
                 });
        redo A;
      } else {
        !!!cp (985);
        $self->{ca}->{value} .= $data;
        $self->{ca}->{has_reference} = 1 if $has_ref;
        $self->{state} = $self->{prev_state};
        ## Reconsume.
        redo A;
      }
    } else {
      die "$0: $self->{state}: Unknown state";
    }
  } # A   

  die "$0: _get_next_token: unexpected case";
} # _get_next_token

sub _initialize_tree_constructor ($) {
  my $self = shift;
  ## NOTE: $self->{document} MUST be specified before this method is called
  $self->{document}->strict_error_checking (0);
  ## TODO: Turn mutation events off # MUST
  ## TODO: Turn loose Document option (manakai extension) on
  $self->{document}->manakai_is_html (1); # MUST
  $self->{document}->set_user_data (manakai_source_line => 1);
  $self->{document}->set_user_data (manakai_source_column => 1);
} # _initialize_tree_constructor

sub _terminate_tree_constructor ($) {
  my $self = shift;
  $self->{document}->strict_error_checking (1);
  ## TODO: Turn mutation events on
} # _terminate_tree_constructor

## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?

{ # tree construction stage
  my $token;

sub _construct_tree ($) {
  my ($self) = @_;

  ## When an interactive UA render the $self->{document} available
  ## to the user, or when it begin accepting user input, are
  ## not defined.

  ## Append a character: collect it and all subsequent consecutive
  ## characters and insert one Text node whose data is concatenation
  ## of all those characters. # MUST
  
  !!!next-token;

  undef $self->{form_element};
  undef $self->{head_element};
  $self->{open_elements} = [];
  undef $self->{inner_html_node};

  ## NOTE: The "initial" insertion mode.
  $self->_tree_construction_initial; # MUST

  ## NOTE: The "before html" insertion mode.
  $self->_tree_construction_root_element;
  $self->{insertion_mode} = BEFORE_HEAD_IM;

  ## NOTE: The "before head" insertion mode and so on.
  $self->_tree_construction_main;
} # _construct_tree

sub _tree_construction_initial ($) {
  my $self = shift;

  ## NOTE: "initial" insertion mode

  INITIAL: {
    if ($token->{type} == DOCTYPE_TOKEN) {
      ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
      ## error, switch to a conformance checking mode for another 
      ## language.
      my $doctype_name = $token->{name};
      $doctype_name = '' unless defined $doctype_name;
      $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
      if (not defined $token->{name} or # <!DOCTYPE>
          defined $token->{sysid}) {
        !!!cp ('t1');
        !!!parse-error (type => 'not HTML5', token => $token);
      } elsif ($doctype_name ne 'HTML') {
        !!!cp ('t2');
        !!!parse-error (type => 'not HTML5', token => $token);
      } elsif (defined $token->{pubid}) {
        if ($token->{pubid} eq 'XSLT-compat') {
          !!!cp ('t1.2');
          !!!parse-error (type => 'XSLT-compat', token => $token,
                          level => $self->{level}->{should});
        } else {
          !!!parse-error (type => 'not HTML5', token => $token);
        }
      } else {
        !!!cp ('t3');
        #
      }
      
      my $doctype = $self->{document}->create_document_type_definition
        ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
      ## NOTE: Default value for both |public_id| and |system_id| attributes
      ## are empty strings, so that we don't set any value in missing cases.
      $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
      $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
      ## NOTE: Other DocumentType attributes are null or empty lists.
      ## ISSUE: internalSubset = null??
      $self->{document}->append_child ($doctype);
      
      if ($token->{quirks} or $doctype_name ne 'HTML') {
        !!!cp ('t4');
        $self->{document}->manakai_compat_mode ('quirks');
      } elsif (defined $token->{pubid}) {
        my $pubid = $token->{pubid};
        $pubid =~ tr/a-z/A-z/;
        my $prefix = [
          "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
          "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
          "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
          "-//IETF//DTD HTML 2.0 LEVEL 1//",
          "-//IETF//DTD HTML 2.0 LEVEL 2//",
          "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
          "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
          "-//IETF//DTD HTML 2.0 STRICT//",
          "-//IETF//DTD HTML 2.0//",
          "-//IETF//DTD HTML 2.1E//",
          "-//IETF//DTD HTML 3.0//",
          "-//IETF//DTD HTML 3.2 FINAL//",
          "-//IETF//DTD HTML 3.2//",
          "-//IETF//DTD HTML 3//",
          "-//IETF//DTD HTML LEVEL 0//",
          "-//IETF//DTD HTML LEVEL 1//",
          "-//IETF//DTD HTML LEVEL 2//",
          "-//IETF//DTD HTML LEVEL 3//",
          "-//IETF//DTD HTML STRICT LEVEL 0//",
          "-//IETF//DTD HTML STRICT LEVEL 1//",
          "-//IETF//DTD HTML STRICT LEVEL 2//",
          "-//IETF//DTD HTML STRICT LEVEL 3//",
          "-//IETF//DTD HTML STRICT//",
          "-//IETF//DTD HTML//",
          "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
          "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
          "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
          "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
          "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
          "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
          "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
          "-//NETSCAPE COMM. CORP.//DTD HTML//",
          "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
          "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
          "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
          "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
          "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
          "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
          "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
          "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
          "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
          "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
          "-//W3C//DTD HTML 3 1995-03-24//",
          "-//W3C//DTD HTML 3.2 DRAFT//",
          "-//W3C//DTD HTML 3.2 FINAL//",
          "-//W3C//DTD HTML 3.2//",
          "-//W3C//DTD HTML 3.2S DRAFT//",
          "-//W3C//DTD HTML 4.0 FRAMESET//",
          "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
          "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
          "-//W3C//DTD HTML EXPERIMENTAL 970421//",
          "-//W3C//DTD W3 HTML//",
          "-//W3O//DTD W3 HTML 3.0//",
          "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
          "-//WEBTECHS//DTD MOZILLA HTML//",
        ]; # $prefix
        my $match;
        for (@$prefix) {
          if (substr ($prefix, 0, length $_) eq $_) {
            $match = 1;
            last;
          }
        }
        if ($match or
            $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
            $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
            $pubid eq "HTML") {
          !!!cp ('t5');
          $self->{document}->manakai_compat_mode ('quirks');
        } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
                 $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
          if (defined $token->{sysid}) {
            !!!cp ('t6');
            $self->{document}->manakai_compat_mode ('quirks');
          } else {
            !!!cp ('t7');
            $self->{document}->manakai_compat_mode ('limited quirks');
          }
        } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
                 $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
          !!!cp ('t8');
          $self->{document}->manakai_compat_mode ('limited quirks');
        } else {
          !!!cp ('t9');
        }
      } else {
        !!!cp ('t10');
      }
      if (defined $token->{sysid}) {
        my $sysid = $token->{sysid};
        $sysid =~ tr/A-Z/a-z/;
        if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
          ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is 
          ## marked as quirks.
          $self->{document}->manakai_compat_mode ('quirks');
          !!!cp ('t11');
        } else {
          !!!cp ('t12');
        }
      } else {
        !!!cp ('t13');
      }
      
      ## Go to the "before html" insertion mode.
      !!!next-token;
      return;
    } elsif ({
              START_TAG_TOKEN, 1,
              END_TAG_TOKEN, 1,
              END_OF_FILE_TOKEN, 1,
             }->{$token->{type}}) {
      !!!cp ('t14');
      !!!parse-error (type => 'no DOCTYPE', token => $token);
      $self->{document}->manakai_compat_mode ('quirks');
      ## Go to the "before html" insertion mode.
      ## reprocess
      !!!ack-later;
      return;
    } elsif ($token->{type} == CHARACTER_TOKEN) {
      if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
        ## Ignore the token

        unless (length $token->{data}) {
          !!!cp ('t15');
          ## Stay in the insertion mode.
          !!!next-token;
          redo INITIAL;
        } else {
          !!!cp ('t16');
        }
      } else {
        !!!cp ('t17');
      }

      !!!parse-error (type => 'no DOCTYPE', token => $token);
      $self->{document}->manakai_compat_mode ('quirks');
      ## Go to the "before html" insertion mode.
      ## reprocess
      return;
    } elsif ($token->{type} == COMMENT_TOKEN) {
      !!!cp ('t18');
      my $comment = $self->{document}->create_comment ($token->{data});
      $self->{document}->append_child ($comment);
      
      ## Stay in the insertion mode.
      !!!next-token;
      redo INITIAL;
    } else {
      die "$0: $token->{type}: Unknown token type";
    }
  } # INITIAL

  die "$0: _tree_construction_initial: This should be never reached";
} # _tree_construction_initial

sub _tree_construction_root_element ($) {
  my $self = shift;

  ## NOTE: "before html" insertion mode.
  
  B: {
      if ($token->{type} == DOCTYPE_TOKEN) {
        !!!cp ('t19');
        !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
        ## Ignore the token
        ## Stay in the insertion mode.
        !!!next-token;
        redo B;
      } elsif ($token->{type} == COMMENT_TOKEN) {
        !!!cp ('t20');
        my $comment = $self->{document}->create_comment ($token->{data});
        $self->{document}->append_child ($comment);
        ## Stay in the insertion mode.
        !!!next-token;
        redo B;
      } elsif ($token->{type} == CHARACTER_TOKEN) {
        if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
          ## Ignore the token.

          unless (length $token->{data}) {
            !!!cp ('t21');
            ## Stay in the insertion mode.
            !!!next-token;
            redo B;
          } else {
            !!!cp ('t22');
          }
        } else {
          !!!cp ('t23');
        }

        $self->{application_cache_selection}->(undef);

        #
      } elsif ($token->{type} == START_TAG_TOKEN) {
        if ($token->{tag_name} eq 'html') {
          my $root_element;
          !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
          $self->{document}->append_child ($root_element);
          push @{$self->{open_elements}},
              [$root_element, $el_category->{html}];

          if ($token->{attributes}->{manifest}) {
            !!!cp ('t24');
            $self->{application_cache_selection}
                ->($token->{attributes}->{manifest}->{value});
            ## ISSUE: Spec is unclear on relative references.
            ## According to Hixie (#whatwg 2008-03-19), it should be
            ## resolved against the base URI of the document in HTML
            ## or xml:base of the element in XHTML.
          } else {
            !!!cp ('t25');
            $self->{application_cache_selection}->(undef);
          }

          !!!nack ('t25c');

          !!!next-token;
          return; ## Go to the "before head" insertion mode.
        } else {
          !!!cp ('t25.1');
          #
        }
      } elsif ({
                END_TAG_TOKEN, 1,
                END_OF_FILE_TOKEN, 1,
               }->{$token->{type}}) {
        !!!cp ('t26');
        #
      } else {
        die "$0: $token->{type}: Unknown token type";
      }

    my $root_element;
    !!!create-element ($root_element, $HTML_NS, 'html',, $token);
    $self->{document}->append_child ($root_element);
    push @{$self->{open_elements}}, [$root_element, $el_category->{html}];

    $self->{application_cache_selection}->(undef);

    ## NOTE: Reprocess the token.
    !!!ack-later;
    return; ## Go to the "before head" insertion mode.

    ## ISSUE: There is an issue in the spec
  } # B

  die "$0: _tree_construction_root_element: This should never be reached";
} # _tree_construction_root_element

sub _reset_insertion_mode ($) {
  my $self = shift;

    ## Step 1
    my $last;
    
    ## Step 2
    my $i = -1;
    my $node = $self->{open_elements}->[$i];
    
    ## Step 3
    S3: {
      if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
        $last = 1;
        if (defined $self->{inner_html_node}) {
          !!!cp ('t28');
          $node = $self->{inner_html_node};
        } else {
          die "_reset_insertion_mode: t27";
        }
      }
      
      ## Step 4..14
      my $new_mode;
      if ($node->[1] & FOREIGN_EL) {
        !!!cp ('t28.1');
        ## NOTE: Strictly spaking, the line below only applies to MathML and
        ## SVG elements.  Currently the HTML syntax supports only MathML and
        ## SVG elements as foreigners.
        $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
      } elsif ($node->[1] & TABLE_CELL_EL) {
        if ($last) {
          !!!cp ('t28.2');
          #
        } else {
          !!!cp ('t28.3');
          $new_mode = IN_CELL_IM;
        }
      } else {
        !!!cp ('t28.4');
        $new_mode = {
                      select => IN_SELECT_IM,
                      ## NOTE: |option| and |optgroup| do not set
                      ## insertion mode to "in select" by themselves.
                      tr => IN_ROW_IM,
                      tbody => IN_TABLE_BODY_IM,
                      thead => IN_TABLE_BODY_IM,
                      tfoot => IN_TABLE_BODY_IM,
                      caption => IN_CAPTION_IM,
                      colgroup => IN_COLUMN_GROUP_IM,
                      table => IN_TABLE_IM,
                      head => IN_BODY_IM, # not in head!
                      body => IN_BODY_IM,
                      frameset => IN_FRAMESET_IM,
                     }->{$node->[0]->manakai_local_name};
      }
      $self->{insertion_mode} = $new_mode and return if defined $new_mode;
      
      ## Step 15
      if ($node->[1] & HTML_EL) {
        unless (defined $self->{head_element}) {
          !!!cp ('t29');
          $self->{insertion_mode} = BEFORE_HEAD_IM;
        } else {
          ## ISSUE: Can this state be reached?
          !!!cp ('t30');
          $self->{insertion_mode} = AFTER_HEAD_IM;
        }
        return;
      } else {
        !!!cp ('t31');
      }
      
      ## Step 16
      $self->{insertion_mode} = IN_BODY_IM and return if $last;
      
      ## Step 17
      $i--;
      $node = $self->{open_elements}->[$i];
      
      ## Step 18
      redo S3;
    } # S3

  die "$0: _reset_insertion_mode: This line should never be reached";
} # _reset_insertion_mode

sub _tree_construction_main ($) {
  my $self = shift;

  my $active_formatting_elements = [];

  my $reconstruct_active_formatting_elements = sub { # MUST
    my $insert = shift;

    ## Step 1
    return unless @$active_formatting_elements;

    ## Step 3
    my $i = -1;
    my $entry = $active_formatting_elements->[$i];

    ## Step 2
    return if $entry->[0] eq '#marker';
    for (@{$self->{open_elements}}) {
      if ($entry->[0] eq $_->[0]) {
        !!!cp ('t32');
        return;
      }
    }
    
    S4: {
      ## Step 4
      last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];

      ## Step 5
      $i--;
      $entry = $active_formatting_elements->[$i];

      ## Step 6
      if ($entry->[0] eq '#marker') {
        !!!cp ('t33_1');
        #
      } else {
        my $in_open_elements;
        OE: for (@{$self->{open_elements}}) {
          if ($entry->[0] eq $_->[0]) {
            !!!cp ('t33');
            $in_open_elements = 1;
            last OE;
          }
        }
        if ($in_open_elements) {
          !!!cp ('t34');
          #
        } else {
          ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
          !!!cp ('t35');
          redo S4;
        }
      }

      ## Step 7
      $i++;
      $entry = $active_formatting_elements->[$i];
    } # S4

    S7: {
      ## Step 8
      my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
    
      ## Step 9
      $insert->($clone->[0]);
      push @{$self->{open_elements}}, $clone;
      
      ## Step 10
      $active_formatting_elements->[$i] = $self->{open_elements}->[-1];

      ## Step 11
      unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
        !!!cp ('t36');
        ## Step 7'
        $i++;
        $entry = $active_formatting_elements->[$i];
        
        redo S7;
      }

      !!!cp ('t37');
    } # S7
  }; # $reconstruct_active_formatting_elements

  my $clear_up_to_marker = sub {
    for (reverse 0..$#$active_formatting_elements) {
      if ($active_formatting_elements->[$_]->[0] eq '#marker') {
        !!!cp ('t38');
        splice @$active_formatting_elements, $_;
        return;
      }
    }

    !!!cp ('t39');
  }; # $clear_up_to_marker

  my $insert;

  my $parse_rcdata = sub ($) {
    my ($content_model_flag) = @_;

    ## Step 1
    my $start_tag_name = $token->{tag_name};
    my $el;
    !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);

    ## Step 2
    $insert->($el);

    ## Step 3
    $self->{content_model} = $content_model_flag; # CDATA or RCDATA
    delete $self->{escape}; # MUST

    ## Step 4
    my $text = '';
    !!!nack ('t40.1');
    !!!next-token;
    while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
      !!!cp ('t40');
      $text .= $token->{data};
      !!!next-token;
    }

    ## Step 5
    if (length $text) {
      !!!cp ('t41');
      my $text = $self->{document}->create_text_node ($text);
      $el->append_child ($text);
    }

    ## Step 6
    $self->{content_model} = PCDATA_CONTENT_MODEL;

    ## Step 7
    if ($token->{type} == END_TAG_TOKEN and
        $token->{tag_name} eq $start_tag_name) {
      !!!cp ('t42');
      ## Ignore the token
    } else {
      ## NOTE: An end-of-file token.
      if ($content_model_flag == CDATA_CONTENT_MODEL) {
        !!!cp ('t43');
        !!!parse-error (type => 'in CDATA:#eof', token => $token);
      } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
        !!!cp ('t44');
        !!!parse-error (type => 'in RCDATA:#eof', token => $token);
      } else {
        die "$0: $content_model_flag in parse_rcdata";
      }
    }
    !!!next-token;
  }; # $parse_rcdata

  my $script_start_tag = sub () {
    my $script_el;
    !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
    ## TODO: mark as "parser-inserted"

    $self->{content_model} = CDATA_CONTENT_MODEL;
    delete $self->{escape}; # MUST
    
    my $text = '';
    !!!nack ('t45.1');
    !!!next-token;
    while ($token->{type} == CHARACTER_TOKEN) {
      !!!cp ('t45');
      $text .= $token->{data};
      !!!next-token;
    } # stop if non-character token or tokenizer stops tokenising
    if (length $text) {
      !!!cp ('t46');
      $script_el->manakai_append_text ($text);
    }
              
    $self->{content_model} = PCDATA_CONTENT_MODEL;

    if ($token->{type} == END_TAG_TOKEN and
        $token->{tag_name} eq 'script') {
      !!!cp ('t47');
      ## Ignore the token
    } else {
      !!!cp ('t48');
      !!!parse-error (type => 'in CDATA:#eof', token => $token);
      ## ISSUE: And ignore?
      ## TODO: mark as "already executed"
    }
    
    if (defined $self->{inner_html_node}) {
      !!!cp ('t49');
      ## TODO: mark as "already executed"
    } else {
      !!!cp ('t50');
      ## TODO: $old_insertion_point = current insertion point
      ## TODO: insertion point = just before the next input character

      $insert->($script_el);
      
      ## TODO: insertion point = $old_insertion_point (might be "undefined")
      
      ## TODO: if there is a script that will execute as soon as the parser resume, then...
    }
    
    !!!next-token;
  }; # $script_start_tag

  ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
  ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
  my $open_tables = [[$self->{open_elements}->[0]->[0]]];

  my $formatting_end_tag = sub {
    my $end_tag_token = shift;
    my $tag_name = $end_tag_token->{tag_name};

    ## NOTE: The adoption agency algorithm (AAA).

    FET: {
      ## Step 1
      my $formatting_element;
      my $formatting_element_i_in_active;
      AFE: for (reverse 0..$#$active_formatting_elements) {
        if ($active_formatting_elements->[$_]->[0] eq '#marker') {
          !!!cp ('t52');
          last AFE;
        } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
                     eq $tag_name) {
          !!!cp ('t51');
          $formatting_element = $active_formatting_elements->[$_];
          $formatting_element_i_in_active = $_;
          last AFE;
        }
      } # AFE
      unless (defined $formatting_element) {
        !!!cp ('t53');
        !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
        ## Ignore the token
        !!!next-token;
        return;
      }
      ## has an element in scope
      my $in_scope = 1;
      my $formatting_element_i_in_open;  
      INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
        my $node = $self->{open_elements}->[$_];
        if ($node->[0] eq $formatting_element->[0]) {
          if ($in_scope) {
            !!!cp ('t54');
            $formatting_element_i_in_open = $_;
            last INSCOPE;
          } else { # in open elements but not in scope
            !!!cp ('t55');
            !!!parse-error (type => 'unmatched end tag',
                            text => $token->{tag_name},
                            token => $end_tag_token);
            ## Ignore the token
            !!!next-token;
            return;
          }
        } elsif ($node->[1] & SCOPING_EL) {
          !!!cp ('t56');
          $in_scope = 0;
        }
      } # INSCOPE
      unless (defined $formatting_element_i_in_open) {
        !!!cp ('t57');
        !!!parse-error (type => 'unmatched end tag',
                        text => $token->{tag_name},
                        token => $end_tag_token);
        pop @$active_formatting_elements; # $formatting_element
        !!!next-token; ## TODO: ok?
        return;
      }
      if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
        !!!cp ('t58');
        !!!parse-error (type => 'not closed',
                        text => $self->{open_elements}->[-1]->[0]
                            ->manakai_local_name,
                        token => $end_tag_token);
      }
      
      ## Step 2
      my $furthest_block;
      my $furthest_block_i_in_open;
      OE: for (reverse 0..$#{$self->{open_elements}}) {
        my $node = $self->{open_elements}->[$_];
        if (not ($node->[1] & FORMATTING_EL) and 
            #not $phrasing_category->{$node->[1]} and
            ($node->[1] & SPECIAL_EL or
             $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
          !!!cp ('t59');
          $furthest_block = $node;
          $furthest_block_i_in_open = $_;
        } elsif ($node->[0] eq $formatting_element->[0]) {
          !!!cp ('t60');
          last OE;
        }
      } # OE
      
      ## Step 3
      unless (defined $furthest_block) { # MUST
        !!!cp ('t61');
        splice @{$self->{open_elements}}, $formatting_element_i_in_open;
        splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
        !!!next-token;
        return;
      }
      
      ## Step 4
      my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
      
      ## Step 5
      my $furthest_block_parent = $furthest_block->[0]->parent_node;
      if (defined $furthest_block_parent) {
        !!!cp ('t62');
        $furthest_block_parent->remove_child ($furthest_block->[0]);
      }
      
      ## Step 6
      my $bookmark_prev_el
        = $active_formatting_elements->[$formatting_element_i_in_active - 1]
          ->[0];
      
      ## Step 7
      my $node = $furthest_block;
      my $node_i_in_open = $furthest_block_i_in_open;
      my $last_node = $furthest_block;
      S7: {
        ## Step 1
        $node_i_in_open--;
        $node = $self->{open_elements}->[$node_i_in_open];
        
        ## Step 2
        my $node_i_in_active;
        S7S2: {
          for (reverse 0..$#$active_formatting_elements) {
            if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
              !!!cp ('t63');
              $node_i_in_active = $_;
              last S7S2;
            }
          }
          splice @{$self->{open_elements}}, $node_i_in_open, 1;
          redo S7;
        } # S7S2
        
        ## Step 3
        last S7 if $node->[0] eq $formatting_element->[0];
        
        ## Step 4
        if ($last_node->[0] eq $furthest_block->[0]) {
          !!!cp ('t64');
          $bookmark_prev_el = $node->[0];
        }
        
        ## Step 5
        if ($node->[0]->has_child_nodes ()) {
          !!!cp ('t65');
          my $clone = [$node->[0]->clone_node (0), $node->[1]];
          $active_formatting_elements->[$node_i_in_active] = $clone;
          $self->{open_elements}->[$node_i_in_open] = $clone;
          $node = $clone;
        }
        
        ## Step 6
        $node->[0]->append_child ($last_node->[0]);
        
        ## Step 7
        $last_node = $node;
        
        ## Step 8
        redo S7;
      } # S7  
      
      ## Step 8
      if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
        my $foster_parent_element;
        my $next_sibling;
        OE: for (reverse 0..$#{$self->{open_elements}}) {
          if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
                             my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
                             if (defined $parent and $parent->node_type == 1) {
                               !!!cp ('t65.1');
                               $foster_parent_element = $parent;
                               $next_sibling = $self->{open_elements}->[$_]->[0];
                             } else {
                               !!!cp ('t65.2');
                               $foster_parent_element
                                 = $self->{open_elements}->[$_ - 1]->[0];
                             }
                             last OE;
                           }
                         } # OE
                         $foster_parent_element = $self->{open_elements}->[0]->[0]
                           unless defined $foster_parent_element;
        $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
        $open_tables->[-1]->[1] = 1; # tainted
      } else {
        !!!cp ('t65.3');
        $common_ancestor_node->[0]->append_child ($last_node->[0]);
      }
      
      ## Step 9
      my $clone = [$formatting_element->[0]->clone_node (0),
                   $formatting_element->[1]];
      
      ## Step 10
      my @cn = @{$furthest_block->[0]->child_nodes};
      $clone->[0]->append_child ($_) for @cn;
      
      ## Step 11
      $furthest_block->[0]->append_child ($clone->[0]);
      
      ## Step 12
      my $i;
      AFE: for (reverse 0..$#$active_formatting_elements) {
        if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
          !!!cp ('t66');
          splice @$active_formatting_elements, $_, 1;
          $i-- and last AFE if defined $i;
        } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
          !!!cp ('t67');
          $i = $_;
        }
      } # AFE
      splice @$active_formatting_elements, $i + 1, 0, $clone;
      
      ## Step 13
      undef $i;
      OE: for (reverse 0..$#{$self->{open_elements}}) {
        if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
          !!!cp ('t68');
          splice @{$self->{open_elements}}, $_, 1;
          $i-- and last OE if defined $i;
        } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
          !!!cp ('t69');
          $i = $_;
        }
      } # OE
      splice @{$self->{open_elements}}, $i + 1, 1, $clone;
      
      ## Step 14
      redo FET;
    } # FET
  }; # $formatting_end_tag

  $insert = my $insert_to_current = sub {
    $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
  }; # $insert_to_current

  my $insert_to_foster = sub {
    my $child = shift;
    if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
      # MUST
      my $foster_parent_element;
      my $next_sibling;
      OE: for (reverse 0..$#{$self->{open_elements}}) {
        if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
                             my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
                             if (defined $parent and $parent->node_type == 1) {
                               !!!cp ('t70');
                               $foster_parent_element = $parent;
                               $next_sibling = $self->{open_elements}->[$_]->[0];
                             } else {
                               !!!cp ('t71');
                               $foster_parent_element
                                 = $self->{open_elements}->[$_ - 1]->[0];
                             }
                             last OE;
                           }
                         } # OE
                         $foster_parent_element = $self->{open_elements}->[0]->[0]
                           unless defined $foster_parent_element;
                         $foster_parent_element->insert_before
                           ($child, $next_sibling);
      $open_tables->[-1]->[1] = 1; # tainted
    } else {
      !!!cp ('t72');
      $self->{open_elements}->[-1]->[0]->append_child ($child);
    }
  }; # $insert_to_foster

  B: while (1) {
    if ($token->{type} == DOCTYPE_TOKEN) {
      !!!cp ('t73');
      !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
      ## Ignore the token
      ## Stay in the phase
      !!!next-token;
      next B;
    } elsif ($token->{type} == START_TAG_TOKEN and
             $token->{tag_name} eq 'html') {
      if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
        !!!cp ('t79');
        !!!parse-error (type => 'after html', text => 'html', token => $token);
        $self->{insertion_mode} = AFTER_BODY_IM;
      } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
        !!!cp ('t80');
        !!!parse-error (type => 'after html', text => 'html', token => $token);
        $self->{insertion_mode} = AFTER_FRAMESET_IM;
      } else {
        !!!cp ('t81');
      }

      !!!cp ('t82');
      !!!parse-error (type => 'not first start tag', token => $token);
      my $top_el = $self->{open_elements}->[0]->[0];
      for my $attr_name (keys %{$token->{attributes}}) {
        unless ($top_el->has_attribute_ns (undef, $attr_name)) {
          !!!cp ('t84');
          $top_el->set_attribute_ns
            (undef, [undef, $attr_name], 
             $token->{attributes}->{$attr_name}->{value});
        }
      }
      !!!nack ('t84.1');
      !!!next-token;
      next B;
    } elsif ($token->{type} == COMMENT_TOKEN) {
      my $comment = $self->{document}->create_comment ($token->{data});
      if ($self->{insertion_mode} & AFTER_HTML_IMS) {
        !!!cp ('t85');
        $self->{document}->append_child ($comment);
      } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
        !!!cp ('t86');
        $self->{open_elements}->[0]->[0]->append_child ($comment);
      } else {
        !!!cp ('t87');
        $self->{open_elements}->[-1]->[0]->append_child ($comment);
      }
      !!!next-token;
      next B;
    } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
      if ($token->{type} == CHARACTER_TOKEN) {
        !!!cp ('t87.1');
        $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
        !!!next-token;
        next B;
      } elsif ($token->{type} == START_TAG_TOKEN) {
        if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
             $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
            not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
            ($token->{tag_name} eq 'svg' and
             $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
          ## NOTE: "using the rules for secondary insertion mode"then"continue"
          !!!cp ('t87.2');
          #
        } elsif ({
                  b => 1, big => 1, blockquote => 1, body => 1, br => 1,
                  center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
                  em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
                  h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
                  img => 1, li => 1, listing => 1, menu => 1, meta => 1,
                  nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
                  small => 1, span => 1, strong => 1, strike => 1, sub => 1,
                  sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
                 }->{$token->{tag_name}}) {
          !!!cp ('t87.2');
          !!!parse-error (type => 'not closed',
                          text => $self->{open_elements}->[-1]->[0]
                              ->manakai_local_name,
                          token => $token);

          pop @{$self->{open_elements}}
              while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;

          $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
          ## Reprocess.
          next B;
        } else {
          my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
          my $tag_name = $token->{tag_name};
          if ($nsuri eq $SVG_NS) {
            $tag_name = {
               altglyph => 'altGlyph',
               altglyphdef => 'altGlyphDef',
               altglyphitem => 'altGlyphItem',
               animatecolor => 'animateColor',
               animatemotion => 'animateMotion',
               animatetransform => 'animateTransform',
               clippath => 'clipPath',
               feblend => 'feBlend',
               fecolormatrix => 'feColorMatrix',
               fecomponenttransfer => 'feComponentTransfer',
               fecomposite => 'feComposite',
               feconvolvematrix => 'feConvolveMatrix',
               fediffuselighting => 'feDiffuseLighting',
               fedisplacementmap => 'feDisplacementMap',
               fedistantlight => 'feDistantLight',
               feflood => 'feFlood',
               fefunca => 'feFuncA',
               fefuncb => 'feFuncB',
               fefuncg => 'feFuncG',
               fefuncr => 'feFuncR',
               fegaussianblur => 'feGaussianBlur',
               feimage => 'feImage',
               femerge => 'feMerge',
               femergenode => 'feMergeNode',
               femorphology => 'feMorphology',
               feoffset => 'feOffset',
               fepointlight => 'fePointLight',
               fespecularlighting => 'feSpecularLighting',
               fespotlight => 'feSpotLight',
               fetile => 'feTile',
               feturbulence => 'feTurbulence',
               foreignobject => 'foreignObject',
               glyphref => 'glyphRef',
               lineargradient => 'linearGradient',
               radialgradient => 'radialGradient',
               #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
               textpath => 'textPath',  
            }->{$tag_name} || $tag_name;
          }

          ## "adjust SVG attributes" (SVG only) - done in insert-element-f

          ## "adjust foreign attributes" - done in insert-element-f

          !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);

          if ($self->{self_closing}) {
            pop @{$self->{open_elements}};
            !!!ack ('t87.3');
          } else {
            !!!cp ('t87.4');
          }

          !!!next-token;
          next B;
        }
      } elsif ($token->{type} == END_TAG_TOKEN) {
        ## NOTE: "using the rules for secondary insertion mode" then "continue"
        !!!cp ('t87.5');
        #
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        !!!cp ('t87.6');
        !!!parse-error (type => 'not closed',
                        text => $self->{open_elements}->[-1]->[0]
                            ->manakai_local_name,
                        token => $token);

        pop @{$self->{open_elements}}
            while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;

        $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
        ## Reprocess.
        next B;
      } else {
        die "$0: $token->{type}: Unknown token type";        
      }
    }

    if ($self->{insertion_mode} & HEAD_IMS) {
      if ($token->{type} == CHARACTER_TOKEN) {
        if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
          unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
            !!!cp ('t88.2');
            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
            #
          } else {
            !!!cp ('t88.1');
            ## Ignore the token.
            #
          }
          unless (length $token->{data}) {
            !!!cp ('t88');
            !!!next-token;
            next B;
          }
## TODO: set $token->{column} appropriately
        }

        if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
          !!!cp ('t89');
          ## As if <head>
          !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
          $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
          push @{$self->{open_elements}},
              [$self->{head_element}, $el_category->{head}];

          ## Reprocess in the "in head" insertion mode...
          pop @{$self->{open_elements}};

          ## Reprocess in the "after head" insertion mode...
        } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
          !!!cp ('t90');
          ## As if </noscript>
          pop @{$self->{open_elements}};
          !!!parse-error (type => 'in noscript:#text', token => $token);
          
          ## Reprocess in the "in head" insertion mode...
          ## As if </head>
          pop @{$self->{open_elements}};

          ## Reprocess in the "after head" insertion mode...
        } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
          !!!cp ('t91');
          pop @{$self->{open_elements}};

          ## Reprocess in the "after head" insertion mode...
        } else {
          !!!cp ('t92');
        }

        ## "after head" insertion mode
        ## As if <body>
        !!!insert-element ('body',, $token);
        $self->{insertion_mode} = IN_BODY_IM;
        ## reprocess
        next B;
      } elsif ($token->{type} == START_TAG_TOKEN) {
        if ($token->{tag_name} eq 'head') {
          if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
            !!!cp ('t93');
            !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
            $self->{open_elements}->[-1]->[0]->append_child
                ($self->{head_element});
            push @{$self->{open_elements}},
                [$self->{head_element}, $el_category->{head}];
            $self->{insertion_mode} = IN_HEAD_IM;
            !!!nack ('t93.1');
            !!!next-token;
            next B;
          } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
            !!!cp ('t93.2');
            !!!parse-error (type => 'after head', text => 'head',
                            token => $token);
            ## Ignore the token
            !!!nack ('t93.3');
            !!!next-token;
            next B;
          } else {
            !!!cp ('t95');
            !!!parse-error (type => 'in head:head',
                            token => $token); # or in head noscript
            ## Ignore the token
            !!!nack ('t95.1');
            !!!next-token;
            next B;
          }
        } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
          !!!cp ('t96');
          ## As if <head>
          !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
          $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
          push @{$self->{open_elements}},
              [$self->{head_element}, $el_category->{head}];

          $self->{insertion_mode} = IN_HEAD_IM;
          ## Reprocess in the "in head" insertion mode...
        } else {
          !!!cp ('t97');
        }

            if ($token->{tag_name} eq 'base') {
              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t98');
                ## As if </noscript>
                pop @{$self->{open_elements}};
                !!!parse-error (type => 'in noscript', text => 'base',
                                token => $token);
              
                $self->{insertion_mode} = IN_HEAD_IM;
                ## Reprocess in the "in head" insertion mode...
              } else {
                !!!cp ('t99');
              }

              ## NOTE: There is a "as if in head" code clone.
              if ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t100');
                !!!parse-error (type => 'after head',
                                text => $token->{tag_name}, token => $token);
                push @{$self->{open_elements}},
                    [$self->{head_element}, $el_category->{head}];
              } else {
                !!!cp ('t101');
              }
              !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
              pop @{$self->{open_elements}};
              pop @{$self->{open_elements}} # <head>
                  if $self->{insertion_mode} == AFTER_HEAD_IM;
              !!!nack ('t101.1');
              !!!next-token;
              next B;
        } elsif ($token->{tag_name} eq 'link') {
          ## NOTE: There is a "as if in head" code clone.
          if ($self->{insertion_mode} == AFTER_HEAD_IM) {
            !!!cp ('t102');
            !!!parse-error (type => 'after head',
                            text => $token->{tag_name}, token => $token);
            push @{$self->{open_elements}},
                [$self->{head_element}, $el_category->{head}];
          } else {
            !!!cp ('t103');
          }
          !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
          pop @{$self->{open_elements}};
          pop @{$self->{open_elements}} # <head>
              if $self->{insertion_mode} == AFTER_HEAD_IM;
          !!!ack ('t103.1');
          !!!next-token;
          next B;
        } elsif ($token->{tag_name} eq 'command' or
                 $token->{tag_name} eq 'eventsource') {
          if ($self->{insertion_mode} == IN_HEAD_IM) {
            ## NOTE: If the insertion mode at the time of the emission
            ## of the token was "before head", $self->{insertion_mode}
            ## is already changed to |IN_HEAD_IM|.

            ## NOTE: There is a "as if in head" code clone.
            !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
            pop @{$self->{open_elements}};
            pop @{$self->{open_elements}} # <head>
                if $self->{insertion_mode} == AFTER_HEAD_IM;
            !!!ack ('t103.2');
            !!!next-token;
            next B;
          } else {
            ## NOTE: "in head noscript" or "after head" insertion mode
            ## - in these cases, these tags are treated as same as
            ## normal in-body tags.
            !!!cp ('t103.3');
            #
          }
            } elsif ($token->{tag_name} eq 'meta') {
              ## NOTE: There is a "as if in head" code clone.
              if ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t104');
                !!!parse-error (type => 'after head',
                                text => $token->{tag_name}, token => $token);
                push @{$self->{open_elements}},
                    [$self->{head_element}, $el_category->{head}];
              } else {
                !!!cp ('t105');
              }
              !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
              my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.

              unless ($self->{confident}) {
                if ($token->{attributes}->{charset}) {
                  !!!cp ('t106');
                  ## NOTE: Whether the encoding is supported or not is handled
                  ## in the {change_encoding} callback.
                  $self->{change_encoding}
                      ->($self, $token->{attributes}->{charset}->{value},
                         $token);
                  
                  $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
                      ->set_user_data (manakai_has_reference =>
                                           $token->{attributes}->{charset}
                                               ->{has_reference});
                } elsif ($token->{attributes}->{content}) {
                  if ($token->{attributes}->{content}->{value}
                      =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
                          [\x09\x0A\x0C\x0D\x20]*=
                          [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
                          ([^"'\x09\x0A\x0C\x0D\x20]
                           [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
                    !!!cp ('t107');
                    ## NOTE: Whether the encoding is supported or not is handled
                    ## in the {change_encoding} callback.
                    $self->{change_encoding}
                        ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
                           $token);
                    $meta_el->[0]->get_attribute_node_ns (undef, 'content')
                        ->set_user_data (manakai_has_reference =>
                                             $token->{attributes}->{content}
                                                   ->{has_reference});
                  } else {
                    !!!cp ('t108');
                  }
                }
              } else {
                if ($token->{attributes}->{charset}) {
                  !!!cp ('t109');
                  $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
                      ->set_user_data (manakai_has_reference =>
                                           $token->{attributes}->{charset}
                                               ->{has_reference});
                }
                if ($token->{attributes}->{content}) {
                  !!!cp ('t110');
                  $meta_el->[0]->get_attribute_node_ns (undef, 'content')
                      ->set_user_data (manakai_has_reference =>
                                           $token->{attributes}->{content}
                                               ->{has_reference});
                }
              }

              pop @{$self->{open_elements}} # <head>
                  if $self->{insertion_mode} == AFTER_HEAD_IM;
              !!!ack ('t110.1');
              !!!next-token;
              next B;
            } elsif ($token->{tag_name} eq 'title') {
              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t111');
                ## As if </noscript>
                pop @{$self->{open_elements}};
                !!!parse-error (type => 'in noscript', text => 'title',
                                token => $token);
              
                $self->{insertion_mode} = IN_HEAD_IM;
                ## Reprocess in the "in head" insertion mode...
              } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t112');
                !!!parse-error (type => 'after head',
                                text => $token->{tag_name}, token => $token);
                push @{$self->{open_elements}},
                    [$self->{head_element}, $el_category->{head}];
              } else {
                !!!cp ('t113');
              }

              ## NOTE: There is a "as if in head" code clone.
              my $parent = defined $self->{head_element} ? $self->{head_element}
                  : $self->{open_elements}->[-1]->[0];
              $parse_rcdata->(RCDATA_CONTENT_MODEL);
              pop @{$self->{open_elements}} # <head>
                  if $self->{insertion_mode} == AFTER_HEAD_IM;
              next B;
            } elsif ($token->{tag_name} eq 'style' or
                     $token->{tag_name} eq 'noframes') {
              ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
              ## insertion mode IN_HEAD_IM)
              ## NOTE: There is a "as if in head" code clone.
              if ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t114');
                !!!parse-error (type => 'after head',
                                text => $token->{tag_name}, token => $token);
                push @{$self->{open_elements}},
                    [$self->{head_element}, $el_category->{head}];
              } else {
                !!!cp ('t115');
              }
              $parse_rcdata->(CDATA_CONTENT_MODEL);
              pop @{$self->{open_elements}} # <head>
                  if $self->{insertion_mode} == AFTER_HEAD_IM;
              next B;
            } elsif ($token->{tag_name} eq 'noscript') {
              if ($self->{insertion_mode} == IN_HEAD_IM) {
                !!!cp ('t116');
                ## NOTE: and scripting is disalbed
                !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
                $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
                !!!nack ('t116.1');
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t117');
                !!!parse-error (type => 'in noscript', text => 'noscript',
                                token => $token);
                ## Ignore the token
                !!!nack ('t117.1');
                !!!next-token;
                next B;
              } else {
                !!!cp ('t118');
                #
              }
            } elsif ($token->{tag_name} eq 'script') {
              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t119');
                ## As if </noscript>
                pop @{$self->{open_elements}};
                !!!parse-error (type => 'in noscript', text => 'script',
                                token => $token);
              
                $self->{insertion_mode} = IN_HEAD_IM;
                ## Reprocess in the "in head" insertion mode...
              } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t120');
                !!!parse-error (type => 'after head',
                                text => $token->{tag_name}, token => $token);
                push @{$self->{open_elements}},
                    [$self->{head_element}, $el_category->{head}];
              } else {
                !!!cp ('t121');
              }

              ## NOTE: There is a "as if in head" code clone.
              $script_start_tag->();
              pop @{$self->{open_elements}} # <head>
                  if $self->{insertion_mode} == AFTER_HEAD_IM;
              next B;
            } elsif ($token->{tag_name} eq 'body' or
                     $token->{tag_name} eq 'frameset') {
              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t122');
                ## As if </noscript>
                pop @{$self->{open_elements}};
                !!!parse-error (type => 'in noscript',
                                text => $token->{tag_name}, token => $token);
                
                ## Reprocess in the "in head" insertion mode...
                ## As if </head>
                pop @{$self->{open_elements}};
                
                ## Reprocess in the "after head" insertion mode...
              } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
                !!!cp ('t124');
                pop @{$self->{open_elements}};
                
                ## Reprocess in the "after head" insertion mode...
              } else {
                !!!cp ('t125');
              }

              ## "after head" insertion mode
              !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
              if ($token->{tag_name} eq 'body') {
                !!!cp ('t126');
                $self->{insertion_mode} = IN_BODY_IM;
              } elsif ($token->{tag_name} eq 'frameset') {
                !!!cp ('t127');
                $self->{insertion_mode} = IN_FRAMESET_IM;
              } else {
                die "$0: tag name: $self->{tag_name}";
              }
              !!!nack ('t127.1');
              !!!next-token;
              next B;
            } else {
              !!!cp ('t128');
              #
            }

            if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
              !!!cp ('t129');
              ## As if </noscript>
              pop @{$self->{open_elements}};
              !!!parse-error (type => 'in noscript:/',
                              text => $token->{tag_name}, token => $token);
              
              ## Reprocess in the "in head" insertion mode...
              ## As if </head>
              pop @{$self->{open_elements}};

              ## Reprocess in the "after head" insertion mode...
            } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
              !!!cp ('t130');
              ## As if </head>
              pop @{$self->{open_elements}};

              ## Reprocess in the "after head" insertion mode...
            } else {
              !!!cp ('t131');
            }

            ## "after head" insertion mode
            ## As if <body>
            !!!insert-element ('body',, $token);
            $self->{insertion_mode} = IN_BODY_IM;
            ## reprocess
            !!!ack-later;
            next B;
          } elsif ($token->{type} == END_TAG_TOKEN) {
            if ($token->{tag_name} eq 'head') {
              if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
                !!!cp ('t132');
                ## As if <head>
                !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
                $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
                push @{$self->{open_elements}},
                    [$self->{head_element}, $el_category->{head}];

                ## Reprocess in the "in head" insertion mode...
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = AFTER_HEAD_IM;
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t133');
                ## As if </noscript>
                pop @{$self->{open_elements}};
                !!!parse-error (type => 'in noscript:/',
                                text => 'head', token => $token);
                
                ## Reprocess in the "in head" insertion mode...
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = AFTER_HEAD_IM;
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
                !!!cp ('t134');
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = AFTER_HEAD_IM;
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t134.1');
                !!!parse-error (type => 'unmatched end tag', text => 'head',
                                token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } else {
                die "$0: $self->{insertion_mode}: Unknown insertion mode";
              }
            } elsif ($token->{tag_name} eq 'noscript') {
              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t136');
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = IN_HEAD_IM;
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
                       $self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t137');
                !!!parse-error (type => 'unmatched end tag',
                                text => 'noscript', token => $token);
                ## Ignore the token ## ISSUE: An issue in the spec.
                !!!next-token;
                next B;
              } else {
                !!!cp ('t138');
                #
              }
            } elsif ({
                      body => 1, html => 1,
                     }->{$token->{tag_name}}) {
              if ($self->{insertion_mode} == BEFORE_HEAD_IM or
                  $self->{insertion_mode} == IN_HEAD_IM or
                  $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t140');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t140.1');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } else {
                die "$0: $self->{insertion_mode}: Unknown insertion mode";
              }
            } elsif ($token->{tag_name} eq 'p') {
              !!!cp ('t142');
              !!!parse-error (type => 'unmatched end tag',
                              text => $token->{tag_name}, token => $token);
              ## Ignore the token
              !!!next-token;
              next B;
            } elsif ($token->{tag_name} eq 'br') {
              if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
                !!!cp ('t142.2');
                ## (before head) as if <head>, (in head) as if </head>
                !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
                $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
                $self->{insertion_mode} = AFTER_HEAD_IM;
  
                ## Reprocess in the "after head" insertion mode...
              } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
                !!!cp ('t143.2');
                ## As if </head>
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = AFTER_HEAD_IM;
  
                ## Reprocess in the "after head" insertion mode...
              } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                !!!cp ('t143.3');
                ## ISSUE: Two parse errors for <head><noscript></br>
                !!!parse-error (type => 'unmatched end tag',
                                text => 'br', token => $token);
                ## As if </noscript>
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = IN_HEAD_IM;

                ## Reprocess in the "in head" insertion mode...
                ## As if </head>
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = AFTER_HEAD_IM;

                ## Reprocess in the "after head" insertion mode...
              } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
                !!!cp ('t143.4');
                #
              } else {
                die "$0: $self->{insertion_mode}: Unknown insertion mode";
              }

              ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
              !!!parse-error (type => 'unmatched end tag',
                              text => 'br', token => $token);
              ## Ignore the token
              !!!next-token;
              next B;
            } else {
              !!!cp ('t145');
              !!!parse-error (type => 'unmatched end tag',
                              text => $token->{tag_name}, token => $token);
              ## Ignore the token
              !!!next-token;
              next B;
            }

            if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
              !!!cp ('t146');
              ## As if </noscript>
              pop @{$self->{open_elements}};
              !!!parse-error (type => 'in noscript:/',
                              text => $token->{tag_name}, token => $token);
              
              ## Reprocess in the "in head" insertion mode...
              ## As if </head>
              pop @{$self->{open_elements}};

              ## Reprocess in the "after head" insertion mode...
            } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
              !!!cp ('t147');
              ## As if </head>
              pop @{$self->{open_elements}};

              ## Reprocess in the "after head" insertion mode...
            } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
## ISSUE: This case cannot be reached?
              !!!cp ('t148');
              !!!parse-error (type => 'unmatched end tag',
                              text => $token->{tag_name}, token => $token);
              ## Ignore the token ## ISSUE: An issue in the spec.
              !!!next-token;
              next B;
            } else {
              !!!cp ('t149');
            }

            ## "after head" insertion mode
            ## As if <body>
            !!!insert-element ('body',, $token);
            $self->{insertion_mode} = IN_BODY_IM;
            ## reprocess
            next B;
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
          !!!cp ('t149.1');

          ## NOTE: As if <head>
          !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
          $self->{open_elements}->[-1]->[0]->append_child
              ($self->{head_element});
          #push @{$self->{open_elements}},
          #    [$self->{head_element}, $el_category->{head}];
          #$self->{insertion_mode} = IN_HEAD_IM;
          ## NOTE: Reprocess.

          ## NOTE: As if </head>
          #pop @{$self->{open_elements}};
          #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
          ## NOTE: Reprocess.
          
          #
        } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
          !!!cp ('t149.2');

          ## NOTE: As if </head>
          pop @{$self->{open_elements}};
          #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
          ## NOTE: Reprocess.

          #
        } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
          !!!cp ('t149.3');

          !!!parse-error (type => 'in noscript:#eof', token => $token);

          ## As if </noscript>
          pop @{$self->{open_elements}};
          #$self->{insertion_mode} = IN_HEAD_IM;
          ## NOTE: Reprocess.

          ## NOTE: As if </head>
          pop @{$self->{open_elements}};
          #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
          ## NOTE: Reprocess.

          #
        } else {
          !!!cp ('t149.4');
          #
        }

        ## NOTE: As if <body>
        !!!insert-element ('body',, $token);
        $self->{insertion_mode} = IN_BODY_IM;
        ## NOTE: Reprocess.
        next B;
      } else {
        die "$0: $token->{type}: Unknown token type";
      }

          ## ISSUE: An issue in the spec.
    } elsif ($self->{insertion_mode} & BODY_IMS) {
          if ($token->{type} == CHARACTER_TOKEN) {
            !!!cp ('t150');
            ## NOTE: There is a code clone of "character in body".
            $reconstruct_active_formatting_elements->($insert_to_current);
            
            $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});

            !!!next-token;
            next B;
          } elsif ($token->{type} == START_TAG_TOKEN) {
            if ({
                 caption => 1, col => 1, colgroup => 1, tbody => 1,
                 td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
                }->{$token->{tag_name}}) {
              if ($self->{insertion_mode} == IN_CELL_IM) {
                ## have an element in table scope
                for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[1] & TABLE_CELL_EL) {
                    !!!cp ('t151');

                    ## Close the cell
                    !!!back-token; # <x>
                    $token = {type => END_TAG_TOKEN,
                              tag_name => $node->[0]->manakai_local_name,
                              line => $token->{line},
                              column => $token->{column}};
                    next B;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t152');
                    ## ISSUE: This case can never be reached, maybe.
                    last;
                  }
                }

                !!!cp ('t153');
                !!!parse-error (type => 'start tag not allowed',
                    text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!nack ('t153.1');
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
                !!!parse-error (type => 'not closed', text => 'caption',
                                token => $token);
                
                ## NOTE: As if </caption>.
                ## have a table element in table scope
                my $i;
                INSCOPE: {
                  for (reverse 0..$#{$self->{open_elements}}) {
                    my $node = $self->{open_elements}->[$_];
                    if ($node->[1] & CAPTION_EL) {
                      !!!cp ('t155');
                      $i = $_;
                      last INSCOPE;
                    } elsif ($node->[1] & TABLE_SCOPING_EL) {
                      !!!cp ('t156');
                      last;
                    }
                  }

                  !!!cp ('t157');
                  !!!parse-error (type => 'start tag not allowed',
                                  text => $token->{tag_name}, token => $token);
                  ## Ignore the token
                  !!!nack ('t157.1');
                  !!!next-token;
                  next B;
                } # INSCOPE
                
                ## generate implied end tags
                while ($self->{open_elements}->[-1]->[1]
                           & END_TAG_OPTIONAL_EL) {
                  !!!cp ('t158');
                  pop @{$self->{open_elements}};
                }

                unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
                  !!!cp ('t159');
                  !!!parse-error (type => 'not closed',
                                  text => $self->{open_elements}->[-1]->[0]
                                      ->manakai_local_name,
                                  token => $token);
                } else {
                  !!!cp ('t160');
                }
                
                splice @{$self->{open_elements}}, $i;
                
                $clear_up_to_marker->();
                
                $self->{insertion_mode} = IN_TABLE_IM;
                
                ## reprocess
                !!!ack-later;
                next B;
              } else {
                !!!cp ('t161');
                #
              }
            } else {
              !!!cp ('t162');
              #
            }
          } elsif ($token->{type} == END_TAG_TOKEN) {
            if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
              if ($self->{insertion_mode} == IN_CELL_IM) {
                ## have an element in table scope
                my $i;
                INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
                    !!!cp ('t163');
                    $i = $_;
                    last INSCOPE;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t164');
                    last INSCOPE;
                  }
                } # INSCOPE
                  unless (defined $i) {
                    !!!cp ('t165');
                    !!!parse-error (type => 'unmatched end tag',
                                    text => $token->{tag_name},
                                    token => $token);
                    ## Ignore the token
                    !!!next-token;
                    next B;
                  }
                
                ## generate implied end tags
                while ($self->{open_elements}->[-1]->[1]
                           & END_TAG_OPTIONAL_EL) {
                  !!!cp ('t166');
                  pop @{$self->{open_elements}};
                }

                if ($self->{open_elements}->[-1]->[0]->manakai_local_name
                        ne $token->{tag_name}) {
                  !!!cp ('t167');
                  !!!parse-error (type => 'not closed',
                                  text => $self->{open_elements}->[-1]->[0]
                                      ->manakai_local_name,
                                  token => $token);
                } else {
                  !!!cp ('t168');
                }
                
                splice @{$self->{open_elements}}, $i;
                
                $clear_up_to_marker->();
                
                $self->{insertion_mode} = IN_ROW_IM;
                
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
                !!!cp ('t169');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } else {
                !!!cp ('t170');
                #
              }
            } elsif ($token->{tag_name} eq 'caption') {
              if ($self->{insertion_mode} == IN_CAPTION_IM) {
                ## have a table element in table scope
                my $i;
                INSCOPE: {
                  for (reverse 0..$#{$self->{open_elements}}) {
                    my $node = $self->{open_elements}->[$_];
                    if ($node->[1] & CAPTION_EL) {
                      !!!cp ('t171');
                      $i = $_;
                      last INSCOPE;
                    } elsif ($node->[1] & TABLE_SCOPING_EL) {
                      !!!cp ('t172');
                      last;
                    }
                  }

                  !!!cp ('t173');
                  !!!parse-error (type => 'unmatched end tag',
                                  text => $token->{tag_name}, token => $token);
                  ## Ignore the token
                  !!!next-token;
                  next B;
                } # INSCOPE
                
                ## generate implied end tags
                while ($self->{open_elements}->[-1]->[1]
                           & END_TAG_OPTIONAL_EL) {
                  !!!cp ('t174');
                  pop @{$self->{open_elements}};
                }
                
                unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
                  !!!cp ('t175');
                  !!!parse-error (type => 'not closed',
                                  text => $self->{open_elements}->[-1]->[0]
                                      ->manakai_local_name,
                                  token => $token);
                } else {
                  !!!cp ('t176');
                }
                
                splice @{$self->{open_elements}}, $i;
                
                $clear_up_to_marker->();
                
                $self->{insertion_mode} = IN_TABLE_IM;
                
                !!!next-token;
                next B;
              } elsif ($self->{insertion_mode} == IN_CELL_IM) {
                !!!cp ('t177');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } else {
                !!!cp ('t178');
                #
              }
            } elsif ({
                      table => 1, tbody => 1, tfoot => 1, 
                      thead => 1, tr => 1,
                     }->{$token->{tag_name}} and
                     $self->{insertion_mode} == IN_CELL_IM) {
              ## have an element in table scope
              my $i;
              my $tn;
              INSCOPE: {
                for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
                    !!!cp ('t179');
                    $i = $_;

                    ## Close the cell
                    !!!back-token; # </x>
                    $token = {type => END_TAG_TOKEN, tag_name => $tn,
                              line => $token->{line},
                              column => $token->{column}};
                    next B;
                  } elsif ($node->[1] & TABLE_CELL_EL) {
                    !!!cp ('t180');
                    $tn = $node->[0]->manakai_local_name;
                    ## NOTE: There is exactly one |td| or |th| element
                    ## in scope in the stack of open elements by definition.
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    ## ISSUE: Can this be reached?
                    !!!cp ('t181');
                    last;
                  }
                }

                !!!cp ('t182');
                !!!parse-error (type => 'unmatched end tag',
                    text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } # INSCOPE
            } elsif ($token->{tag_name} eq 'table' and
                     $self->{insertion_mode} == IN_CAPTION_IM) {
              !!!parse-error (type => 'not closed', text => 'caption',
                              token => $token);

              ## As if </caption>
              ## have a table element in table scope
              my $i;
              INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                my $node = $self->{open_elements}->[$_];
                if ($node->[1] & CAPTION_EL) {
                  !!!cp ('t184');
                  $i = $_;
                  last INSCOPE;
                } elsif ($node->[1] & TABLE_SCOPING_EL) {
                  !!!cp ('t185');
                  last INSCOPE;
                }
              } # INSCOPE
              unless (defined $i) {
                !!!cp ('t186');
                !!!parse-error (type => 'unmatched end tag',
                                text => 'caption', token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              }
              
              ## generate implied end tags
              while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
                !!!cp ('t187');
                pop @{$self->{open_elements}};
              }

              unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
                !!!cp ('t188');
                !!!parse-error (type => 'not closed',
                                text => $self->{open_elements}->[-1]->[0]
                                    ->manakai_local_name,
                                token => $token);
              } else {
                !!!cp ('t189');
              }

              splice @{$self->{open_elements}}, $i;

              $clear_up_to_marker->();

              $self->{insertion_mode} = IN_TABLE_IM;

              ## reprocess
              next B;
            } elsif ({
                      body => 1, col => 1, colgroup => 1, html => 1,
                     }->{$token->{tag_name}}) {
              if ($self->{insertion_mode} & BODY_TABLE_IMS) {
                !!!cp ('t190');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } else {
                !!!cp ('t191');
                #
              }
            } elsif ({
                      tbody => 1, tfoot => 1,
                      thead => 1, tr => 1,
                     }->{$token->{tag_name}} and
                     $self->{insertion_mode} == IN_CAPTION_IM) {
              !!!cp ('t192');
              !!!parse-error (type => 'unmatched end tag',
                              text => $token->{tag_name}, token => $token);
              ## Ignore the token
              !!!next-token;
              next B;
            } else {
              !!!cp ('t193');
              #
            }
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        for my $entry (@{$self->{open_elements}}) {
          unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
            !!!cp ('t75');
            !!!parse-error (type => 'in body:#eof', token => $token);
            last;
          }
        }

        ## Stop parsing.
        last B;
      } else {
        die "$0: $token->{type}: Unknown token type";
      }

      $insert = $insert_to_current;
      #
    } elsif ($self->{insertion_mode} & TABLE_IMS) {
      if ($token->{type} == CHARACTER_TOKEN) {
        if (not $open_tables->[-1]->[1] and # tainted
            $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
          $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
              
          unless (length $token->{data}) {
            !!!cp ('t194');
            !!!next-token;
            next B;
          } else {
            !!!cp ('t195');
          }
        }

        !!!parse-error (type => 'in table:#text', token => $token);

            ## As if in body, but insert into foster parent element
            ## ISSUE: Spec says that "whenever a node would be inserted
            ## into the current node" while characters might not be
            ## result in a new Text node.
            $reconstruct_active_formatting_elements->($insert_to_foster);
            
            if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
              # MUST
              my $foster_parent_element;
              my $next_sibling;
              my $prev_sibling;
              OE: for (reverse 0..$#{$self->{open_elements}}) {
                if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
                  my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
                  if (defined $parent and $parent->node_type == 1) {
                    !!!cp ('t196');
                    $foster_parent_element = $parent;
                    $next_sibling = $self->{open_elements}->[$_]->[0];
                    $prev_sibling = $next_sibling->previous_sibling;
                  } else {
                    !!!cp ('t197');
                    $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
                    $prev_sibling = $foster_parent_element->last_child;
                  }
                  last OE;
                }
              } # OE
              $foster_parent_element = $self->{open_elements}->[0]->[0] and
              $prev_sibling = $foster_parent_element->last_child
                unless defined $foster_parent_element;
              if (defined $prev_sibling and
                  $prev_sibling->node_type == 3) {
                !!!cp ('t198');
                $prev_sibling->manakai_append_text ($token->{data});
              } else {
                !!!cp ('t199');
                $foster_parent_element->insert_before
                  ($self->{document}->create_text_node ($token->{data}),
                   $next_sibling);
              }
          $open_tables->[-1]->[1] = 1; # tainted
        } else {
          !!!cp ('t200');
          $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
        }
            
        !!!next-token;
        next B;
      } elsif ($token->{type} == START_TAG_TOKEN) {
        if ({
             tr => ($self->{insertion_mode} != IN_ROW_IM),
             th => 1, td => 1,
            }->{$token->{tag_name}}) {
          if ($self->{insertion_mode} == IN_TABLE_IM) {
            ## Clear back to table context
            while (not ($self->{open_elements}->[-1]->[1]
                            & TABLE_SCOPING_EL)) {
              !!!cp ('t201');
              pop @{$self->{open_elements}};
            }
            
            !!!insert-element ('tbody',, $token);
            $self->{insertion_mode} = IN_TABLE_BODY_IM;
            ## reprocess in the "in table body" insertion mode...
          }
          
          if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
            unless ($token->{tag_name} eq 'tr') {
              !!!cp ('t202');
              !!!parse-error (type => 'missing start tag:tr', token => $token);
            }
                
            ## Clear back to table body context
            while (not ($self->{open_elements}->[-1]->[1]
                            & TABLE_ROWS_SCOPING_EL)) {
              !!!cp ('t203');
              ## ISSUE: Can this case be reached?
              pop @{$self->{open_elements}};
            }
                
                $self->{insertion_mode} = IN_ROW_IM;
                if ($token->{tag_name} eq 'tr') {
                  !!!cp ('t204');
                  !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
                  !!!nack ('t204');
                  !!!next-token;
                  next B;
                } else {
                  !!!cp ('t205');
                  !!!insert-element ('tr',, $token);
                  ## reprocess in the "in row" insertion mode
                }
              } else {
                !!!cp ('t206');
              }

              ## Clear back to table row context
              while (not ($self->{open_elements}->[-1]->[1]
                              & TABLE_ROW_SCOPING_EL)) {
                !!!cp ('t207');
                pop @{$self->{open_elements}};
              }
              
              !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
              $self->{insertion_mode} = IN_CELL_IM;

              push @$active_formatting_elements, ['#marker', ''];
              
              !!!nack ('t207.1');
              !!!next-token;
              next B;
            } elsif ({
                      caption => 1, col => 1, colgroup => 1,
                      tbody => 1, tfoot => 1, thead => 1,
                      tr => 1, # $self->{insertion_mode} == IN_ROW_IM
                     }->{$token->{tag_name}}) {
              if ($self->{insertion_mode} == IN_ROW_IM) {
                ## As if </tr>
                ## have an element in table scope
                my $i;
                INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[1] & TABLE_ROW_EL) {
                    !!!cp ('t208');
                    $i = $_;
                    last INSCOPE;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t209');
                    last INSCOPE;
                  }
                } # INSCOPE
                unless (defined $i) { 
                  !!!cp ('t210');
## TODO: This type is wrong.
                  !!!parse-error (type => 'unmacthed end tag',
                                  text => $token->{tag_name}, token => $token);
                  ## Ignore the token
                  !!!nack ('t210.1');
                  !!!next-token;
                  next B;
                }
                
                ## Clear back to table row context
                while (not ($self->{open_elements}->[-1]->[1]
                                & TABLE_ROW_SCOPING_EL)) {
                  !!!cp ('t211');
                  ## ISSUE: Can this case be reached?
                  pop @{$self->{open_elements}};
                }
                
                pop @{$self->{open_elements}}; # tr
                $self->{insertion_mode} = IN_TABLE_BODY_IM;
                if ($token->{tag_name} eq 'tr') {
                  !!!cp ('t212');
                  ## reprocess
                  !!!ack-later;
                  next B;
                } else {
                  !!!cp ('t213');
                  ## reprocess in the "in table body" insertion mode...
                }
              }

              if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
                ## have an element in table scope
                my $i;
                INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[1] & TABLE_ROW_GROUP_EL) {
                    !!!cp ('t214');
                    $i = $_;
                    last INSCOPE;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t215');
                    last INSCOPE;
                  }
                } # INSCOPE
                unless (defined $i) {
                  !!!cp ('t216');
## TODO: This erorr type is wrong.
                  !!!parse-error (type => 'unmatched end tag',
                                  text => $token->{tag_name}, token => $token);
                  ## Ignore the token
                  !!!nack ('t216.1');
                  !!!next-token;
                  next B;
                }

                ## Clear back to table body context
                while (not ($self->{open_elements}->[-1]->[1]
                                & TABLE_ROWS_SCOPING_EL)) {
                  !!!cp ('t217');
                  ## ISSUE: Can this state be reached?
                  pop @{$self->{open_elements}};
                }
                
                ## As if <{current node}>
                ## have an element in table scope
                ## true by definition
                
                ## Clear back to table body context
                ## nop by definition
                
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = IN_TABLE_IM;
                ## reprocess in "in table" insertion mode...
              } else {
                !!!cp ('t218');
              }

              if ($token->{tag_name} eq 'col') {
                ## Clear back to table context
                while (not ($self->{open_elements}->[-1]->[1]
                                & TABLE_SCOPING_EL)) {
                  !!!cp ('t219');
                  ## ISSUE: Can this state be reached?
                  pop @{$self->{open_elements}};
                }
                
                !!!insert-element ('colgroup',, $token);
                $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
                ## reprocess
                !!!ack-later;
                next B;
              } elsif ({
                        caption => 1,
                        colgroup => 1,
                        tbody => 1, tfoot => 1, thead => 1,
                       }->{$token->{tag_name}}) {
                ## Clear back to table context
                while (not ($self->{open_elements}->[-1]->[1]
                                & TABLE_SCOPING_EL)) {
                  !!!cp ('t220');
                  ## ISSUE: Can this state be reached?
                  pop @{$self->{open_elements}};
                }
                
                push @$active_formatting_elements, ['#marker', '']
                    if $token->{tag_name} eq 'caption';
                
                !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
                $self->{insertion_mode} = {
                                           caption => IN_CAPTION_IM,
                                           colgroup => IN_COLUMN_GROUP_IM,
                                           tbody => IN_TABLE_BODY_IM,
                                           tfoot => IN_TABLE_BODY_IM,
                                           thead => IN_TABLE_BODY_IM,
                                          }->{$token->{tag_name}};
                !!!next-token;
                !!!nack ('t220.1');
                next B;
              } else {
                die "$0: in table: <>: $token->{tag_name}";
              }
            } elsif ($token->{tag_name} eq 'table') {
              !!!parse-error (type => 'not closed',
                              text => $self->{open_elements}->[-1]->[0]
                                  ->manakai_local_name,
                              token => $token);

              ## As if </table>
              ## have a table element in table scope
              my $i;
              INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                my $node = $self->{open_elements}->[$_];
                if ($node->[1] & TABLE_EL) {
                  !!!cp ('t221');
                  $i = $_;
                  last INSCOPE;
                } elsif ($node->[1] & TABLE_SCOPING_EL) {
                  !!!cp ('t222');
                  last INSCOPE;
                }
              } # INSCOPE
              unless (defined $i) {
                !!!cp ('t223');
## TODO: The following is wrong, maybe.
                !!!parse-error (type => 'unmatched end tag', text => 'table',
                                token => $token);
                ## Ignore tokens </table><table>
                !!!nack ('t223.1');
                !!!next-token;
                next B;
              }
              
## TODO: Followings are removed from the latest spec. 
              ## generate implied end tags
              while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
                !!!cp ('t224');
                pop @{$self->{open_elements}};
              }

              unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
                !!!cp ('t225');
                ## NOTE: |<table><tr><table>|
                !!!parse-error (type => 'not closed',
                                text => $self->{open_elements}->[-1]->[0]
                                    ->manakai_local_name,
                                token => $token);
              } else {
                !!!cp ('t226');
              }

              splice @{$self->{open_elements}}, $i;
              pop @{$open_tables};

              $self->_reset_insertion_mode; 

          ## reprocess
          !!!ack-later;
          next B;
        } elsif ($token->{tag_name} eq 'style') {
          if (not $open_tables->[-1]->[1]) { # tainted
            !!!cp ('t227.8');
            ## NOTE: This is a "as if in head" code clone.
            $parse_rcdata->(CDATA_CONTENT_MODEL);
            next B;
          } else {
            !!!cp ('t227.7');
            #
          }
        } elsif ($token->{tag_name} eq 'script') {
          if (not $open_tables->[-1]->[1]) { # tainted
            !!!cp ('t227.6');
            ## NOTE: This is a "as if in head" code clone.
            $script_start_tag->();
            next B;
          } else {
            !!!cp ('t227.5');
            #
          }
        } elsif ($token->{tag_name} eq 'input') {
          if (not $open_tables->[-1]->[1]) { # tainted
            if ($token->{attributes}->{type}) { ## TODO: case
              my $type = lc $token->{attributes}->{type}->{value};
              if ($type eq 'hidden') {
                !!!cp ('t227.3');
                !!!parse-error (type => 'in table',
                                text => $token->{tag_name}, token => $token);

                !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);

                ## TODO: form element pointer

                pop @{$self->{open_elements}};

                !!!next-token;
                !!!ack ('t227.2.1');
                next B;
              } else {
                !!!cp ('t227.2');
                #
              }
            } else {
              !!!cp ('t227.1');
              #
            }
          } else {
            !!!cp ('t227.4');
            #
          }
        } else {
          !!!cp ('t227');
          #
        }

        !!!parse-error (type => 'in table', text => $token->{tag_name},
                        token => $token);

        $insert = $insert_to_foster;
        #
      } elsif ($token->{type} == END_TAG_TOKEN) {
            if ($token->{tag_name} eq 'tr' and
                $self->{insertion_mode} == IN_ROW_IM) {
              ## have an element in table scope
              my $i;
              INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                my $node = $self->{open_elements}->[$_];
                if ($node->[1] & TABLE_ROW_EL) {
                  !!!cp ('t228');
                  $i = $_;
                  last INSCOPE;
                } elsif ($node->[1] & TABLE_SCOPING_EL) {
                  !!!cp ('t229');
                  last INSCOPE;
                }
              } # INSCOPE
              unless (defined $i) {
                !!!cp ('t230');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!nack ('t230.1');
                !!!next-token;
                next B;
              } else {
                !!!cp ('t232');
              }

              ## Clear back to table row context
              while (not ($self->{open_elements}->[-1]->[1]
                              & TABLE_ROW_SCOPING_EL)) {
                !!!cp ('t231');
## ISSUE: Can this state be reached?
                pop @{$self->{open_elements}};
              }

              pop @{$self->{open_elements}}; # tr
              $self->{insertion_mode} = IN_TABLE_BODY_IM;
              !!!next-token;
              !!!nack ('t231.1');
              next B;
            } elsif ($token->{tag_name} eq 'table') {
              if ($self->{insertion_mode} == IN_ROW_IM) {
                ## As if </tr>
                ## have an element in table scope
                my $i;
                INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[1] & TABLE_ROW_EL) {
                    !!!cp ('t233');
                    $i = $_;
                    last INSCOPE;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t234');
                    last INSCOPE;
                  }
                } # INSCOPE
                unless (defined $i) {
                  !!!cp ('t235');
## TODO: The following is wrong.
                  !!!parse-error (type => 'unmatched end tag',
                                  text => $token->{type}, token => $token);
                  ## Ignore the token
                  !!!nack ('t236.1');
                  !!!next-token;
                  next B;
                }
                
                ## Clear back to table row context
                while (not ($self->{open_elements}->[-1]->[1]
                                & TABLE_ROW_SCOPING_EL)) {
                  !!!cp ('t236');
## ISSUE: Can this state be reached?
                  pop @{$self->{open_elements}};
                }
                
                pop @{$self->{open_elements}}; # tr
                $self->{insertion_mode} = IN_TABLE_BODY_IM;
                ## reprocess in the "in table body" insertion mode...
              }

              if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
                ## have an element in table scope
                my $i;
                INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[1] & TABLE_ROW_GROUP_EL) {
                    !!!cp ('t237');
                    $i = $_;
                    last INSCOPE;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t238');
                    last INSCOPE;
                  }
                } # INSCOPE
                unless (defined $i) {
                  !!!cp ('t239');
                  !!!parse-error (type => 'unmatched end tag',
                                  text => $token->{tag_name}, token => $token);
                  ## Ignore the token
                  !!!nack ('t239.1');
                  !!!next-token;
                  next B;
                }
                
                ## Clear back to table body context
                while (not ($self->{open_elements}->[-1]->[1]
                                & TABLE_ROWS_SCOPING_EL)) {
                  !!!cp ('t240');
                  pop @{$self->{open_elements}};
                }
                
                ## As if <{current node}>
                ## have an element in table scope
                ## true by definition
                
                ## Clear back to table body context
                ## nop by definition
                
                pop @{$self->{open_elements}};
                $self->{insertion_mode} = IN_TABLE_IM;
                ## reprocess in the "in table" insertion mode...
              }

              ## NOTE: </table> in the "in table" insertion mode.
              ## When you edit the code fragment below, please ensure that
              ## the code for <table> in the "in table" insertion mode
              ## is synced with it.

              ## have a table element in table scope
              my $i;
              INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                my $node = $self->{open_elements}->[$_];
                if ($node->[1] & TABLE_EL) {
                  !!!cp ('t241');
                  $i = $_;
                  last INSCOPE;
                } elsif ($node->[1] & TABLE_SCOPING_EL) {
                  !!!cp ('t242');
                  last INSCOPE;
                }
              } # INSCOPE
              unless (defined $i) {
                !!!cp ('t243');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!nack ('t243.1');
                !!!next-token;
                next B;
              }
                
              splice @{$self->{open_elements}}, $i;
              pop @{$open_tables};
              
              $self->_reset_insertion_mode;
              
              !!!next-token;
              next B;
            } elsif ({
                      tbody => 1, tfoot => 1, thead => 1,
                     }->{$token->{tag_name}} and
                     $self->{insertion_mode} & ROW_IMS) {
              if ($self->{insertion_mode} == IN_ROW_IM) {
                ## have an element in table scope
                my $i;
                INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
                    !!!cp ('t247');
                    $i = $_;
                    last INSCOPE;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t248');
                    last INSCOPE;
                  }
                } # INSCOPE
                  unless (defined $i) {
                    !!!cp ('t249');
                    !!!parse-error (type => 'unmatched end tag',
                                    text => $token->{tag_name}, token => $token);
                    ## Ignore the token
                    !!!nack ('t249.1');
                    !!!next-token;
                    next B;
                  }
                
                ## As if </tr>
                ## have an element in table scope
                my $i;
                INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                  my $node = $self->{open_elements}->[$_];
                  if ($node->[1] & TABLE_ROW_EL) {
                    !!!cp ('t250');
                    $i = $_;
                    last INSCOPE;
                  } elsif ($node->[1] & TABLE_SCOPING_EL) {
                    !!!cp ('t251');
                    last INSCOPE;
                  }
                } # INSCOPE
                  unless (defined $i) {
                    !!!cp ('t252');
                    !!!parse-error (type => 'unmatched end tag',
                                    text => 'tr', token => $token);
                    ## Ignore the token
                    !!!nack ('t252.1');
                    !!!next-token;
                    next B;
                  }
                
                ## Clear back to table row context
                while (not ($self->{open_elements}->[-1]->[1]
                                & TABLE_ROW_SCOPING_EL)) {
                  !!!cp ('t253');
## ISSUE: Can this case be reached?
                  pop @{$self->{open_elements}};
                }
                
                pop @{$self->{open_elements}}; # tr
                $self->{insertion_mode} = IN_TABLE_BODY_IM;
                ## reprocess in the "in table body" insertion mode...
              }

              ## have an element in table scope
              my $i;
              INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
                my $node = $self->{open_elements}->[$_];
                if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
                  !!!cp ('t254');
                  $i = $_;
                  last INSCOPE;
                } elsif ($node->[1] & TABLE_SCOPING_EL) {
                  !!!cp ('t255');
                  last INSCOPE;
                }
              } # INSCOPE
              unless (defined $i) {
                !!!cp ('t256');
                !!!parse-error (type => 'unmatched end tag',
                                text => $token->{tag_name}, token => $token);
                ## Ignore the token
                !!!nack ('t256.1');
                !!!next-token;
                next B;
              }

              ## Clear back to table body context
              while (not ($self->{open_elements}->[-1]->[1]
                              & TABLE_ROWS_SCOPING_EL)) {
                !!!cp ('t257');
## ISSUE: Can this case be reached?
                pop @{$self->{open_elements}};
              }

              pop @{$self->{open_elements}};
              $self->{insertion_mode} = IN_TABLE_IM;
              !!!nack ('t257.1');
              !!!next-token;
              next B;
            } elsif ({
                      body => 1, caption => 1, col => 1, colgroup => 1,
                      html => 1, td => 1, th => 1,
                      tr => 1, # $self->{insertion_mode} == IN_ROW_IM
                      tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
                     }->{$token->{tag_name}}) {
          !!!cp ('t258');
          !!!parse-error (type => 'unmatched end tag',
                          text => $token->{tag_name}, token => $token);
          ## Ignore the token
          !!!nack ('t258.1');
           !!!next-token;
          next B;
        } else {
          !!!cp ('t259');
          !!!parse-error (type => 'in table:/',
                          text => $token->{tag_name}, token => $token);

          $insert = $insert_to_foster;
          #
        }
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
                @{$self->{open_elements}} == 1) { # redundant, maybe
          !!!parse-error (type => 'in body:#eof', token => $token);
          !!!cp ('t259.1');
          #
        } else {
          !!!cp ('t259.2');
          #
        }

        ## Stop parsing
        last B;
      } else {
        die "$0: $token->{type}: Unknown token type";
      }
    } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
          if ($token->{type} == CHARACTER_TOKEN) {
            if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
              $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
              unless (length $token->{data}) {
                !!!cp ('t260');
                !!!next-token;
                next B;
              }
            }
            
            !!!cp ('t261');
            #
          } elsif ($token->{type} == START_TAG_TOKEN) {
            if ($token->{tag_name} eq 'col') {
              !!!cp ('t262');
              !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
              pop @{$self->{open_elements}};
              !!!ack ('t262.1');
              !!!next-token;
              next B;
            } else { 
              !!!cp ('t263');
              #
            }
          } elsif ($token->{type} == END_TAG_TOKEN) {
            if ($token->{tag_name} eq 'colgroup') {
              if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
                !!!cp ('t264');
                !!!parse-error (type => 'unmatched end tag',
                                text => 'colgroup', token => $token);
                ## Ignore the token
                !!!next-token;
                next B;
              } else {
                !!!cp ('t265');
                pop @{$self->{open_elements}}; # colgroup
                $self->{insertion_mode} = IN_TABLE_IM;
                !!!next-token;
                next B;             
              }
            } elsif ($token->{tag_name} eq 'col') {
              !!!cp ('t266');
              !!!parse-error (type => 'unmatched end tag',
                              text => 'col', token => $token);
              ## Ignore the token
              !!!next-token;
              next B;
            } else {
              !!!cp ('t267');
              # 
            }
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        if ($self->{open_elements}->[-1]->[1] & HTML_EL and
            @{$self->{open_elements}} == 1) { # redundant, maybe
          !!!cp ('t270.2');
          ## Stop parsing.
          last B;
        } else {
          ## NOTE: As if </colgroup>.
          !!!cp ('t270.1');
          pop @{$self->{open_elements}}; # colgroup
          $self->{insertion_mode} = IN_TABLE_IM;
          ## Reprocess.
          next B;
        }
      } else {
        die "$0: $token->{type}: Unknown token type";
      }

          ## As if </colgroup>
          if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
            !!!cp ('t269');
## TODO: Wrong error type?
            !!!parse-error (type => 'unmatched end tag',
                            text => 'colgroup', token => $token);
            ## Ignore the token
            !!!nack ('t269.1');
            !!!next-token;
            next B;
          } else {
            !!!cp ('t270');
            pop @{$self->{open_elements}}; # colgroup
            $self->{insertion_mode} = IN_TABLE_IM;
            !!!ack-later;
            ## reprocess
            next B;
          }
    } elsif ($self->{insertion_mode} & SELECT_IMS) {
      if ($token->{type} == CHARACTER_TOKEN) {
        !!!cp ('t271');
        $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
        !!!next-token;
        next B;
      } elsif ($token->{type} == START_TAG_TOKEN) {
        if ($token->{tag_name} eq 'option') {
          if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
            !!!cp ('t272');
            ## As if </option>
            pop @{$self->{open_elements}};
          } else {
            !!!cp ('t273');
          }

          !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
          !!!nack ('t273.1');
          !!!next-token;
          next B;
        } elsif ($token->{tag_name} eq 'optgroup') {
          if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
            !!!cp ('t274');
            ## As if </option>
            pop @{$self->{open_elements}};
          } else {
            !!!cp ('t275');
          }

          if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
            !!!cp ('t276');
            ## As if </optgroup>
            pop @{$self->{open_elements}};
          } else {
            !!!cp ('t277');
          }

          !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
          !!!nack ('t277.1');
          !!!next-token;
          next B;
        } elsif ({
                   select => 1, input => 1, textarea => 1,
                 }->{$token->{tag_name}} or
                 ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
                  {
                   caption => 1, table => 1,
                   tbody => 1, tfoot => 1, thead => 1,
                   tr => 1, td => 1, th => 1,
                  }->{$token->{tag_name}})) {
          ## TODO: The type below is not good - <select> is replaced by </select>
          !!!parse-error (type => 'not closed', text => 'select',
                          token => $token);
          ## NOTE: As if the token were </select> (<select> case) or
          ## as if there were </select> (otherwise).
          ## have an element in table scope
          my $i;
          INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
            my $node = $self->{open_elements}->[$_];
            if ($node->[1] & SELECT_EL) {
              !!!cp ('t278');
              $i = $_;
              last INSCOPE;
            } elsif ($node->[1] & TABLE_SCOPING_EL) {
              !!!cp ('t279');
              last INSCOPE;
            }
          } # INSCOPE
          unless (defined $i) {
            !!!cp ('t280');
            !!!parse-error (type => 'unmatched end tag',
                            text => 'select', token => $token);
            ## Ignore the token
            !!!nack ('t280.1');
            !!!next-token;
            next B;
          }
              
          !!!cp ('t281');
          splice @{$self->{open_elements}}, $i;

          $self->_reset_insertion_mode;

          if ($token->{tag_name} eq 'select') {
            !!!nack ('t281.2');
            !!!next-token;
            next B;
          } else {
            !!!cp ('t281.1');
            !!!ack-later;
            ## Reprocess the token.
            next B;
          }
        } else {
          !!!cp ('t282');
          !!!parse-error (type => 'in select',
                          text => $token->{tag_name}, token => $token);
          ## Ignore the token
          !!!nack ('t282.1');
          !!!next-token;
          next B;
        }
      } elsif ($token->{type} == END_TAG_TOKEN) {
        if ($token->{tag_name} eq 'optgroup') {
          if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
              $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
            !!!cp ('t283');
            ## As if </option>
            splice @{$self->{open_elements}}, -2;
          } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
            !!!cp ('t284');
            pop @{$self->{open_elements}};
          } else {
            !!!cp ('t285');
            !!!parse-error (type => 'unmatched end tag',
                            text => $token->{tag_name}, token => $token);
            ## Ignore the token
          }
          !!!nack ('t285.1');
          !!!next-token;
          next B;
        } elsif ($token->{tag_name} eq 'option') {
          if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
            !!!cp ('t286');
            pop @{$self->{open_elements}};
          } else {
            !!!cp ('t287');
            !!!parse-error (type => 'unmatched end tag',
                            text => $token->{tag_name}, token => $token);
            ## Ignore the token
          }
          !!!nack ('t287.1');
          !!!next-token;
          next B;
        } elsif ($token->{tag_name} eq 'select') {
          ## have an element in table scope
          my $i;
          INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
            my $node = $self->{open_elements}->[$_];
            if ($node->[1] & SELECT_EL) {
              !!!cp ('t288');
              $i = $_;
              last INSCOPE;
            } elsif ($node->[1] & TABLE_SCOPING_EL) {
              !!!cp ('t289');
              last INSCOPE;
            }
          } # INSCOPE
          unless (defined $i) {
            !!!cp ('t290');
            !!!parse-error (type => 'unmatched end tag',
                            text => $token->{tag_name}, token => $token);
            ## Ignore the token
            !!!nack ('t290.1');
            !!!next-token;
            next B;
          }
              
          !!!cp ('t291');
          splice @{$self->{open_elements}}, $i;

          $self->_reset_insertion_mode;

          !!!nack ('t291.1');
          !!!next-token;
          next B;
        } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
                 {
                  caption => 1, table => 1, tbody => 1,
                  tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
                 }->{$token->{tag_name}}) {
## TODO: The following is wrong?
          !!!parse-error (type => 'unmatched end tag',
                          text => $token->{tag_name}, token => $token);
              
          ## have an element in table scope
          my $i;
          INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
            my $node = $self->{open_elements}->[$_];
            if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
              !!!cp ('t292');
              $i = $_;
              last INSCOPE;
            } elsif ($node->[1] & TABLE_SCOPING_EL) {
              !!!cp ('t293');
              last INSCOPE;
            }
          } # INSCOPE
          unless (defined $i) {
            !!!cp ('t294');
            ## Ignore the token
            !!!nack ('t294.1');
            !!!next-token;
            next B;
          }
              
          ## As if </select>
          ## have an element in table scope
          undef $i;
          INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
            my $node = $self->{open_elements}->[$_];
            if ($node->[1] & SELECT_EL) {
              !!!cp ('t295');
              $i = $_;
              last INSCOPE;
            } elsif ($node->[1] & TABLE_SCOPING_EL) {
## ISSUE: Can this state be reached?
              !!!cp ('t296');
              last INSCOPE;
            }
          } # INSCOPE
          unless (defined $i) {
            !!!cp ('t297');
## TODO: The following error type is correct?
            !!!parse-error (type => 'unmatched end tag',
                            text => 'select', token => $token);
            ## Ignore the </select> token
            !!!nack ('t297.1');
            !!!next-token; ## TODO: ok?
            next B;
          }
              
          !!!cp ('t298');
          splice @{$self->{open_elements}}, $i;

          $self->_reset_insertion_mode;

          !!!ack-later;
          ## reprocess
          next B;
        } else {
          !!!cp ('t299');
          !!!parse-error (type => 'in select:/',
                          text => $token->{tag_name}, token => $token);
          ## Ignore the token
          !!!nack ('t299.3');
          !!!next-token;
          next B;
        }
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
                @{$self->{open_elements}} == 1) { # redundant, maybe
          !!!cp ('t299.1');
          !!!parse-error (type => 'in body:#eof', token => $token);
        } else {
          !!!cp ('t299.2');
        }

        ## Stop parsing.
        last B;
      } else {
        die "$0: $token->{type}: Unknown token type";
      }
    } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
      if ($token->{type} == CHARACTER_TOKEN) {
        if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
          my $data = $1;
          ## As if in body
          $reconstruct_active_formatting_elements->($insert_to_current);
              
          $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
          
          unless (length $token->{data}) {
            !!!cp ('t300');
            !!!next-token;
            next B;
          }
        }
        
        if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
          !!!cp ('t301');
          !!!parse-error (type => 'after html:#text', token => $token);
          #
        } else {
          !!!cp ('t302');
          ## "after body" insertion mode
          !!!parse-error (type => 'after body:#text', token => $token);
          #
        }

        $self->{insertion_mode} = IN_BODY_IM;
        ## reprocess
        next B;
      } elsif ($token->{type} == START_TAG_TOKEN) {
        if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
          !!!cp ('t303');
          !!!parse-error (type => 'after html',
                          text => $token->{tag_name}, token => $token);
          #
        } else {
          !!!cp ('t304');
          ## "after body" insertion mode
          !!!parse-error (type => 'after body',
                          text => $token->{tag_name}, token => $token);
          #
        }

        $self->{insertion_mode} = IN_BODY_IM;
        !!!ack-later;
        ## reprocess
        next B;
      } elsif ($token->{type} == END_TAG_TOKEN) {
        if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
          !!!cp ('t305');
          !!!parse-error (type => 'after html:/',
                          text => $token->{tag_name}, token => $token);
          
          $self->{insertion_mode} = IN_BODY_IM;
          ## Reprocess.
          next B;
        } else {
          !!!cp ('t306');
        }

        ## "after body" insertion mode
        if ($token->{tag_name} eq 'html') {
          if (defined $self->{inner_html_node}) {
            !!!cp ('t307');
            !!!parse-error (type => 'unmatched end tag',
                            text => 'html', token => $token);
            ## Ignore the token
            !!!next-token;
            next B;
          } else {
            !!!cp ('t308');
            $self->{insertion_mode} = AFTER_HTML_BODY_IM;
            !!!next-token;
            next B;
          }
        } else {
          !!!cp ('t309');
          !!!parse-error (type => 'after body:/',
                          text => $token->{tag_name}, token => $token);

          $self->{insertion_mode} = IN_BODY_IM;
          ## reprocess
          next B;
        }
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        !!!cp ('t309.2');
        ## Stop parsing
        last B;
      } else {
        die "$0: $token->{type}: Unknown token type";
      }
    } elsif ($self->{insertion_mode} & FRAME_IMS) {
      if ($token->{type} == CHARACTER_TOKEN) {
        if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
          $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
          
          unless (length $token->{data}) {
            !!!cp ('t310');
            !!!next-token;
            next B;
          }
        }
        
        if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
          if ($self->{insertion_mode} == IN_FRAMESET_IM) {
            !!!cp ('t311');
            !!!parse-error (type => 'in frameset:#text', token => $token);
          } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
            !!!cp ('t312');
            !!!parse-error (type => 'after frameset:#text', token => $token);
          } else { # "after after frameset"
            !!!cp ('t313');
            !!!parse-error (type => 'after html:#text', token => $token);
          }
          
          ## Ignore the token.
          if (length $token->{data}) {
            !!!cp ('t314');
            ## reprocess the rest of characters
          } else {
            !!!cp ('t315');
            !!!next-token;
          }
          next B;
        }
        
        die qq[$0: Character "$token->{data}"];
      } elsif ($token->{type} == START_TAG_TOKEN) {
        if ($token->{tag_name} eq 'frameset' and
            $self->{insertion_mode} == IN_FRAMESET_IM) {
          !!!cp ('t318');
          !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
          !!!nack ('t318.1');
          !!!next-token;
          next B;
        } elsif ($token->{tag_name} eq 'frame' and
                 $self->{insertion_mode} == IN_FRAMESET_IM) {
          !!!cp ('t319');
          !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
          pop @{$self->{open_elements}};
          !!!ack ('t319.1');
          !!!next-token;
          next B;
        } elsif ($token->{tag_name} eq 'noframes') {
          !!!cp ('t320');
          ## NOTE: As if in head.
          $parse_rcdata->(CDATA_CONTENT_MODEL);
          next B;

          ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
          ## has no parse error.
        } else {
          if ($self->{insertion_mode} == IN_FRAMESET_IM) {
            !!!cp ('t321');
            !!!parse-error (type => 'in frameset',
                            text => $token->{tag_name}, token => $token);
          } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
            !!!cp ('t322');
            !!!parse-error (type => 'after frameset',
                            text => $token->{tag_name}, token => $token);
          } else { # "after after frameset"
            !!!cp ('t322.2');
            !!!parse-error (type => 'after after frameset',
                            text => $token->{tag_name}, token => $token);
          }
          ## Ignore the token
          !!!nack ('t322.1');
          !!!next-token;
          next B;
        }
      } elsif ($token->{type} == END_TAG_TOKEN) {
        if ($token->{tag_name} eq 'frameset' and
            $self->{insertion_mode} == IN_FRAMESET_IM) {
          if ($self->{open_elements}->[-1]->[1] & HTML_EL and
              @{$self->{open_elements}} == 1) {
            !!!cp ('t325');
            !!!parse-error (type => 'unmatched end tag',
                            text => $token->{tag_name}, token => $token);
            ## Ignore the token
            !!!next-token;
          } else {
            !!!cp ('t326');
            pop @{$self->{open_elements}};
            !!!next-token;
          }

          if (not defined $self->{inner_html_node} and
              not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
            !!!cp ('t327');
            $self->{insertion_mode} = AFTER_FRAMESET_IM;
          } else {
            !!!cp ('t328');
          }
          next B;
        } elsif ($token->{tag_name} eq 'html' and
                 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
          !!!cp ('t329');
          $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
          !!!next-token;
          next B;
        } else {
          if ($self->{insertion_mode} == IN_FRAMESET_IM) {
            !!!cp ('t330');
            !!!parse-error (type => 'in frameset:/',
                            text => $token->{tag_name}, token => $token);
          } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
            !!!cp ('t330.1');
            !!!parse-error (type => 'after frameset:/',
                            text => $token->{tag_name}, token => $token);
          } else { # "after after html"
            !!!cp ('t331');
            !!!parse-error (type => 'after after frameset:/',
                            text => $token->{tag_name}, token => $token);
          }
          ## Ignore the token
          !!!next-token;
          next B;
        }
      } elsif ($token->{type} == END_OF_FILE_TOKEN) {
        unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
                @{$self->{open_elements}} == 1) { # redundant, maybe
          !!!cp ('t331.1');
          !!!parse-error (type => 'in body:#eof', token => $token);
        } else {
          !!!cp ('t331.2');
        }
        
        ## Stop parsing
        last B;
      } else {
        die "$0: $token->{type}: Unknown token type";
      }

      ## ISSUE: An issue in spec here
    } else {
      die "$0: $self->{insertion_mode}: Unknown insertion mode";
    }

    ## "in body" insertion mode
    if ($token->{type} == START_TAG_TOKEN) {
      if ($token->{tag_name} eq 'script') {
        !!!cp ('t332');
        ## NOTE: This is an "as if in head" code clone
        $script_start_tag->();
        next B;
      } elsif ($token->{tag_name} eq 'style') {
        !!!cp ('t333');
        ## NOTE: This is an "as if in head" code clone
        $parse_rcdata->(CDATA_CONTENT_MODEL);
        next B;
      } elsif ({
                base => 1, command => 1, eventsource => 1, link => 1,
               }->{$token->{tag_name}}) {
        !!!cp ('t334');
        ## NOTE: This is an "as if in head" code clone, only "-t" differs
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
        pop @{$self->{open_elements}};
        !!!ack ('t334.1');
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'meta') {
        ## NOTE: This is an "as if in head" code clone, only "-t" differs
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
        my $meta_el = pop @{$self->{open_elements}};

        unless ($self->{confident}) {
          if ($token->{attributes}->{charset}) {
            !!!cp ('t335');
            ## NOTE: Whether the encoding is supported or not is handled
            ## in the {change_encoding} callback.
            $self->{change_encoding}
                ->($self, $token->{attributes}->{charset}->{value}, $token);
            
            $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
                ->set_user_data (manakai_has_reference =>
                                     $token->{attributes}->{charset}
                                         ->{has_reference});
          } elsif ($token->{attributes}->{content}) {
            if ($token->{attributes}->{content}->{value}
                =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
                    [\x09\x0A\x0C\x0D\x20]*=
                    [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
                    ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
                   /x) {
              !!!cp ('t336');
              ## NOTE: Whether the encoding is supported or not is handled
              ## in the {change_encoding} callback.
              $self->{change_encoding}
                  ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
              $meta_el->[0]->get_attribute_node_ns (undef, 'content')
                  ->set_user_data (manakai_has_reference =>
                                       $token->{attributes}->{content}
                                             ->{has_reference});
            }
          }
        } else {
          if ($token->{attributes}->{charset}) {
            !!!cp ('t337');
            $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
                ->set_user_data (manakai_has_reference =>
                                     $token->{attributes}->{charset}
                                         ->{has_reference});
          }
          if ($token->{attributes}->{content}) {
            !!!cp ('t338');
            $meta_el->[0]->get_attribute_node_ns (undef, 'content')
                ->set_user_data (manakai_has_reference =>
                                     $token->{attributes}->{content}
                                         ->{has_reference});
          }
        }

        !!!ack ('t338.1');
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'title') {
        !!!cp ('t341');
        ## NOTE: This is an "as if in head" code clone
        $parse_rcdata->(RCDATA_CONTENT_MODEL);
        next B;
      } elsif ($token->{tag_name} eq 'body') {
        !!!parse-error (type => 'in body', text => 'body', token => $token);
              
        if (@{$self->{open_elements}} == 1 or
            not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
          !!!cp ('t342');
          ## Ignore the token
        } else {
          my $body_el = $self->{open_elements}->[1]->[0];
          for my $attr_name (keys %{$token->{attributes}}) {
            unless ($body_el->has_attribute_ns (undef, $attr_name)) {
              !!!cp ('t343');
              $body_el->set_attribute_ns
                (undef, [undef, $attr_name],
                 $token->{attributes}->{$attr_name}->{value});
            }
          }
        }
        !!!nack ('t343.1');
        !!!next-token;
        next B;
      } elsif ({
                ## NOTE: Start tags for non-phrasing flow content elements

                ## NOTE: The normal one
                address => 1, article => 1, aside => 1, blockquote => 1,
                center => 1, datagrid => 1, details => 1, dialog => 1,
                dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
                footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
                h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1, 
                section => 1, ul => 1,
                ## NOTE: As normal, but drops leading newline
                pre => 1, listing => 1,
                ## NOTE: As normal, but interacts with the form element pointer
                form => 1,
                
                table => 1,
                hr => 1,
               }->{$token->{tag_name}}) {
        if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
          !!!cp ('t350');
          !!!parse-error (type => 'in form:form', token => $token);
          ## Ignore the token
          !!!nack ('t350.1');
          !!!next-token;
          next B;
        }

        ## has a p element in scope
        INSCOPE: for (reverse @{$self->{open_elements}}) {
          if ($_->[1] & P_EL) {
            !!!cp ('t344');
            !!!back-token; # <form>
            $token = {type => END_TAG_TOKEN, tag_name => 'p',
                      line => $token->{line}, column => $token->{column}};
            next B;
          } elsif ($_->[1] & SCOPING_EL) {
            !!!cp ('t345');
            last INSCOPE;
          }
        } # INSCOPE
          
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
        if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
          !!!nack ('t346.1');
          !!!next-token;
          if ($token->{type} == CHARACTER_TOKEN) {
            $token->{data} =~ s/^\x0A//;
            unless (length $token->{data}) {
              !!!cp ('t346');
              !!!next-token;
            } else {
              !!!cp ('t349');
            }
          } else {
            !!!cp ('t348');
          }
        } elsif ($token->{tag_name} eq 'form') {
          !!!cp ('t347.1');
          $self->{form_element} = $self->{open_elements}->[-1]->[0];

          !!!nack ('t347.2');
          !!!next-token;
        } elsif ($token->{tag_name} eq 'table') {
          !!!cp ('t382');
          push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
          
          $self->{insertion_mode} = IN_TABLE_IM;

          !!!nack ('t382.1');
          !!!next-token;
        } elsif ($token->{tag_name} eq 'hr') {
          !!!cp ('t386');
          pop @{$self->{open_elements}};
        
          !!!nack ('t386.1');
          !!!next-token;
        } else {
          !!!nack ('t347.1');
          !!!next-token;
        }
        next B;
      } elsif ($token->{tag_name} eq 'li') {
        ## NOTE: As normal, but imply </li> when there's another <li> ...

        ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)
          ## Interpreted as <li><foo/></li><li/> (non-conforming)
          ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
          ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
          ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
          ## object (Fx)
          ## Generate non-tree (non-conforming)
          ## basefont (IE7 (where basefont is non-void)), center (IE),
          ## form (IE), hn (IE)
        ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)
          ## Interpreted as <li><foo><li/></foo></li> (non-conforming)
          ## div (Fx, S)

        my $non_optional;
        my $i = -1;

        ## 1.
        for my $node (reverse @{$self->{open_elements}}) {
          if ($node->[1] & LI_EL) {
            ## 2. (a) As if </li>
            {
              ## If no </li> - not applied
              #

              ## Otherwise

              ## 1. generate implied end tags, except for </li>
              #

              ## 2. If current node != "li", parse error
              if ($non_optional) {
                !!!parse-error (type => 'not closed',
                                text => $non_optional->[0]->manakai_local_name,
                                token => $token);
                !!!cp ('t355');
              } else {
                !!!cp ('t356');
              }

              ## 3. Pop
              splice @{$self->{open_elements}}, $i;
            }

            last; ## 2. (b) goto 5.
          } elsif (
                   ## NOTE: not "formatting" and not "phrasing"
                   ($node->[1] & SPECIAL_EL or
                    $node->[1] & SCOPING_EL) and
                   ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.

                   (not $node->[1] & ADDRESS_EL) &
                   (not $node->[1] & DIV_EL) &
                   (not $node->[1] & P_EL)) {
            ## 3.
            !!!cp ('t357');
            last; ## goto 5.
          } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
            !!!cp ('t358');
            #
          } else {
            !!!cp ('t359');
            $non_optional ||= $node;
            #
          }
          ## 4.
          ## goto 2.
          $i--;
        }

        ## 5. (a) has a |p| element in scope
        INSCOPE: for (reverse @{$self->{open_elements}}) {
          if ($_->[1] & P_EL) {
            !!!cp ('t353');
            !!!back-token; # <x>
            $token = {type => END_TAG_TOKEN, tag_name => 'p',
                      line => $token->{line}, column => $token->{column}};
            next B;
          } elsif ($_->[1] & SCOPING_EL) {
            !!!cp ('t354');
            last INSCOPE;
          }
        } # INSCOPE

        ## 5. (b) insert
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
        !!!nack ('t359.1');
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'dt' or
               $token->{tag_name} eq 'dd') {
        ## NOTE: As normal, but imply </dt> or </dd> when ...

        my $non_optional;
        my $i = -1;

        ## 1.
        for my $node (reverse @{$self->{open_elements}}) {
          if ($node->[1] & DT_EL or $node->[1] & DD_EL) {
            ## 2. (a) As if </li>
            {
              ## If no </li> - not applied
              #

              ## Otherwise

              ## 1. generate implied end tags, except for </dt> or </dd>
              #

              ## 2. If current node != "dt"|"dd", parse error
              if ($non_optional) {
                !!!parse-error (type => 'not closed',
                                text => $non_optional->[0]->manakai_local_name,
                                token => $token);
                !!!cp ('t355.1');
              } else {
                !!!cp ('t356.1');
              }

              ## 3. Pop
              splice @{$self->{open_elements}}, $i;
            }

            last; ## 2. (b) goto 5.
          } elsif (
                   ## NOTE: not "formatting" and not "phrasing"
                   ($node->[1] & SPECIAL_EL or
                    $node->[1] & SCOPING_EL) and
                   ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.

                   (not $node->[1] & ADDRESS_EL) &
                   (not $node->[1] & DIV_EL) &
                   (not $node->[1] & P_EL)) {
            ## 3.
            !!!cp ('t357.1');
            last; ## goto 5.
          } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
            !!!cp ('t358.1');
            #
          } else {
            !!!cp ('t359.1');
            $non_optional ||= $node;
            #
          }
          ## 4.
          ## goto 2.
          $i--;
        }

        ## 5. (a) has a |p| element in scope
        INSCOPE: for (reverse @{$self->{open_elements}}) {
          if ($_->[1] & P_EL) {
            !!!cp ('t353.1');
            !!!back-token; # <x>
            $token = {type => END_TAG_TOKEN, tag_name => 'p',
                      line => $token->{line}, column => $token->{column}};
            next B;
          } elsif ($_->[1] & SCOPING_EL) {
            !!!cp ('t354.1');
            last INSCOPE;
          }
        } # INSCOPE

        ## 5. (b) insert
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
        !!!nack ('t359.2');
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'plaintext') {
        ## NOTE: As normal, but effectively ends parsing

        ## has a p element in scope
        INSCOPE: for (reverse @{$self->{open_elements}}) {
          if ($_->[1] & P_EL) {
            !!!cp ('t367');
            !!!back-token; # <plaintext>
            $token = {type => END_TAG_TOKEN, tag_name => 'p',
                      line => $token->{line}, column => $token->{column}};
            next B;
          } elsif ($_->[1] & SCOPING_EL) {
            !!!cp ('t368');
            last INSCOPE;
          }
        } # INSCOPE
          
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
          
        $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
          
        !!!nack ('t368.1');
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'a') {
        AFE: for my $i (reverse 0..$#$active_formatting_elements) {
          my $node = $active_formatting_elements->[$i];
          if ($node->[1] & A_EL) {
            !!!cp ('t371');
            !!!parse-error (type => 'in a:a', token => $token);
            
            !!!back-token; # <a>
            $token = {type => END_TAG_TOKEN, tag_name => 'a',
                      line => $token->{line}, column => $token->{column}};
            $formatting_end_tag->($token);
            
            AFE2: for (reverse 0..$#$active_formatting_elements) {
              if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
                !!!cp ('t372');
                splice @$active_formatting_elements, $_, 1;
                last AFE2;
              }
            } # AFE2
            OE: for (reverse 0..$#{$self->{open_elements}}) {
              if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
                !!!cp ('t373');
                splice @{$self->{open_elements}}, $_, 1;
                last OE;
              }
            } # OE
            last AFE;
          } elsif ($node->[0] eq '#marker') {
            !!!cp ('t374');
            last AFE;
          }
        } # AFE
          
        $reconstruct_active_formatting_elements->($insert_to_current);

        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
        push @$active_formatting_elements, $self->{open_elements}->[-1];

        !!!nack ('t374.1');
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'nobr') {
        $reconstruct_active_formatting_elements->($insert_to_current);

        ## has a |nobr| element in scope
        INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[1] & NOBR_EL) {
            !!!cp ('t376');
            !!!parse-error (type => 'in nobr:nobr', token => $token);
            !!!back-token; # <nobr>
            $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
                      line => $token->{line}, column => $token->{column}};
            next B;
          } elsif ($node->[1] & SCOPING_EL) {
            !!!cp ('t377');
            last INSCOPE;
          }
        } # INSCOPE
        
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
        push @$active_formatting_elements, $self->{open_elements}->[-1];
        
        !!!nack ('t377.1');
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'button') {
        ## has a button element in scope
        INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[1] & BUTTON_EL) {
            !!!cp ('t378');
            !!!parse-error (type => 'in button:button', token => $token);
            !!!back-token; # <button>
            $token = {type => END_TAG_TOKEN, tag_name => 'button',
                      line => $token->{line}, column => $token->{column}};
            next B;
          } elsif ($node->[1] & SCOPING_EL) {
            !!!cp ('t379');
            last INSCOPE;
          }
        } # INSCOPE
          
        $reconstruct_active_formatting_elements->($insert_to_current);
          
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);

        ## TODO: associate with $self->{form_element} if defined

        push @$active_formatting_elements, ['#marker', ''];

        !!!nack ('t379.1');
        !!!next-token;
        next B;
      } elsif ({
                xmp => 1,
                iframe => 1,
                noembed => 1,
                noframes => 1, ## NOTE: This is an "as if in head" code clone.
                noscript => 0, ## TODO: 1 if scripting is enabled
               }->{$token->{tag_name}}) {
        if ($token->{tag_name} eq 'xmp') {
          !!!cp ('t381');
          $reconstruct_active_formatting_elements->($insert_to_current);
        } else {
          !!!cp ('t399');
        }
        ## NOTE: There is an "as if in body" code clone.
        $parse_rcdata->(CDATA_CONTENT_MODEL);
        next B;
      } elsif ($token->{tag_name} eq 'isindex') {
        !!!parse-error (type => 'isindex', token => $token);
        
        if (defined $self->{form_element}) {
          !!!cp ('t389');
          ## Ignore the token
          !!!nack ('t389'); ## NOTE: Not acknowledged.
          !!!next-token;
          next B;
        } else {
          !!!ack ('t391.1');

          my $at = $token->{attributes};
          my $form_attrs;
          $form_attrs->{action} = $at->{action} if $at->{action};
          my $prompt_attr = $at->{prompt};
          $at->{name} = {name => 'name', value => 'isindex'};
          delete $at->{action};
          delete $at->{prompt};
          my @tokens = (
                        {type => START_TAG_TOKEN, tag_name => 'form',
                         attributes => $form_attrs,
                         line => $token->{line}, column => $token->{column}},
                        {type => START_TAG_TOKEN, tag_name => 'hr',
                         line => $token->{line}, column => $token->{column}},
                        {type => START_TAG_TOKEN, tag_name => 'p',
                         line => $token->{line}, column => $token->{column}},
                        {type => START_TAG_TOKEN, tag_name => 'label',
                         line => $token->{line}, column => $token->{column}},
                       );
          if ($prompt_attr) {
            !!!cp ('t390');
            push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
                           #line => $token->{line}, column => $token->{column},
                          };
          } else {
            !!!cp ('t391');
            push @tokens, {type => CHARACTER_TOKEN,
                           data => 'This is a searchable index. Insert your search keywords here: ',
                           #line => $token->{line}, column => $token->{column},
                          }; # SHOULD
            ## TODO: make this configurable
          }
          push @tokens,
                        {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
                         line => $token->{line}, column => $token->{column}},
                        #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
                        {type => END_TAG_TOKEN, tag_name => 'label',
                         line => $token->{line}, column => $token->{column}},
                        {type => END_TAG_TOKEN, tag_name => 'p',
                         line => $token->{line}, column => $token->{column}},
                        {type => START_TAG_TOKEN, tag_name => 'hr',
                         line => $token->{line}, column => $token->{column}},
                        {type => END_TAG_TOKEN, tag_name => 'form',
                         line => $token->{line}, column => $token->{column}};
          !!!back-token (@tokens);
          !!!next-token;
          next B;
        }
      } elsif ($token->{tag_name} eq 'textarea') {
        my $tag_name = $token->{tag_name};
        my $el;
        !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
        
        ## TODO: $self->{form_element} if defined
        $self->{content_model} = RCDATA_CONTENT_MODEL;
        delete $self->{escape}; # MUST
        
        $insert->($el);
        
        my $text = '';
        !!!nack ('t392.1');
        !!!next-token;
        if ($token->{type} == CHARACTER_TOKEN) {
          $token->{data} =~ s/^\x0A//;
          unless (length $token->{data}) {
            !!!cp ('t392');
            !!!next-token;
          } else {
            !!!cp ('t393');
          }
        } else {
          !!!cp ('t394');
        }
        while ($token->{type} == CHARACTER_TOKEN) {
          !!!cp ('t395');
          $text .= $token->{data};
          !!!next-token;
        }
        if (length $text) {
          !!!cp ('t396');
          $el->manakai_append_text ($text);
        }
        
        $self->{content_model} = PCDATA_CONTENT_MODEL;
        
        if ($token->{type} == END_TAG_TOKEN and
            $token->{tag_name} eq $tag_name) {
          !!!cp ('t397');
          ## Ignore the token
        } else {
          !!!cp ('t398');
          !!!parse-error (type => 'in RCDATA:#eof', token => $token);
        }
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'rt' or
               $token->{tag_name} eq 'rp') {
        ## has a |ruby| element in scope
        INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[1] & RUBY_EL) {
            !!!cp ('t398.1');
            ## generate implied end tags
            while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
              !!!cp ('t398.2');
              pop @{$self->{open_elements}};
            }
            unless ($self->{open_elements}->[-1]->[1] & RUBY_EL) {
              !!!cp ('t398.3');
              !!!parse-error (type => 'not closed',
                              text => $self->{open_elements}->[-1]->[0]
                                  ->manakai_local_name,
                              token => $token);
              pop @{$self->{open_elements}}
                  while not $self->{open_elements}->[-1]->[1] & RUBY_EL;
            }
            last INSCOPE;
          } elsif ($node->[1] & SCOPING_EL) {
            !!!cp ('t398.4');
            last INSCOPE;
          }
        } # INSCOPE

        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);

        !!!nack ('t398.5');
        !!!next-token;
        redo B;
      } elsif ($token->{tag_name} eq 'math' or
               $token->{tag_name} eq 'svg') {
        $reconstruct_active_formatting_elements->($insert_to_current);

        ## "Adjust MathML attributes" ('math' only) - done in insert-element-f

        ## "adjust SVG attributes" ('svg' only) - done in insert-element-f

        ## "adjust foreign attributes" - done in insert-element-f
        
        !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
        
        if ($self->{self_closing}) {
          pop @{$self->{open_elements}};
          !!!ack ('t398.1');
        } else {
          !!!cp ('t398.2');
          $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
          ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
          ## mode, "in body" (not "in foreign content") secondary insertion
          ## mode, maybe.
        }

        !!!next-token;
        next B;
      } elsif ({
                caption => 1, col => 1, colgroup => 1, frame => 1,
                frameset => 1, head => 1, option => 1, optgroup => 1,
                tbody => 1, td => 1, tfoot => 1, th => 1,
                thead => 1, tr => 1,
               }->{$token->{tag_name}}) {
        !!!cp ('t401');
        !!!parse-error (type => 'in body',
                        text => $token->{tag_name}, token => $token);
        ## Ignore the token
        !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
        !!!next-token;
        next B;
        
        ## ISSUE: An issue on HTML5 new elements in the spec.
      } else {
        if ($token->{tag_name} eq 'image') {
          !!!cp ('t384');
          !!!parse-error (type => 'image', token => $token);
          $token->{tag_name} = 'img';
        } else {
          !!!cp ('t385');
        }

        ## NOTE: There is an "as if <br>" code clone.
        $reconstruct_active_formatting_elements->($insert_to_current);
        
        !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);

        if ({
             applet => 1, marquee => 1, object => 1,
            }->{$token->{tag_name}}) {
          !!!cp ('t380');
          push @$active_formatting_elements, ['#marker', ''];
          !!!nack ('t380.1');
        } elsif ({
                  b => 1, big => 1, em => 1, font => 1, i => 1,
                  s => 1, small => 1, strike => 1,
                  strong => 1, tt => 1, u => 1,
                 }->{$token->{tag_name}}) {
          !!!cp ('t375');
          push @$active_formatting_elements, $self->{open_elements}->[-1];
          !!!nack ('t375.1');
        } elsif ($token->{tag_name} eq 'input') {
          !!!cp ('t388');
          ## TODO: associate with $self->{form_element} if defined
          pop @{$self->{open_elements}};
          !!!ack ('t388.2');
        } elsif ({
                  area => 1, basefont => 1, bgsound => 1, br => 1,
                  embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
                  #image => 1,
                 }->{$token->{tag_name}}) {
          !!!cp ('t388.1');
          pop @{$self->{open_elements}};
          !!!ack ('t388.3');
        } elsif ($token->{tag_name} eq 'select') {
          ## TODO: associate with $self->{form_element} if defined
        
          if ($self->{insertion_mode} & TABLE_IMS or
              $self->{insertion_mode} & BODY_TABLE_IMS or
              $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
            !!!cp ('t400.1');
            $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
          } else {
            !!!cp ('t400.2');
            $self->{insertion_mode} = IN_SELECT_IM;
          }
          !!!nack ('t400.3');
        } else {
          !!!nack ('t402');
        }
        
        !!!next-token;
        next B;
      }
    } elsif ($token->{type} == END_TAG_TOKEN) {
      if ($token->{tag_name} eq 'body') {
        ## has a |body| element in scope
        my $i;
        INSCOPE: {
          for (reverse @{$self->{open_elements}}) {
            if ($_->[1] & BODY_EL) {
              !!!cp ('t405');
              $i = $_;
              last INSCOPE;
            } elsif ($_->[1] & SCOPING_EL) {
              !!!cp ('t405.1');
              last;
            }
          }

          !!!parse-error (type => 'start tag not allowed',
                          text => $token->{tag_name}, token => $token);
          ## NOTE: Ignore the token.
          !!!next-token;
          next B;
        } # INSCOPE

        for (@{$self->{open_elements}}) {
          unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
            !!!cp ('t403');
            !!!parse-error (type => 'not closed',
                            text => $_->[0]->manakai_local_name,
                            token => $token);
            last;
          } else {
            !!!cp ('t404');
          }
        }

        $self->{insertion_mode} = AFTER_BODY_IM;
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'html') {
        ## TODO: Update this code.  It seems that the code below is not
        ## up-to-date, though it has same effect as speced.
        if (@{$self->{open_elements}} > 1 and
            $self->{open_elements}->[1]->[1] & BODY_EL) {
          ## ISSUE: There is an issue in the spec.
          unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
            !!!cp ('t406');
            !!!parse-error (type => 'not closed',
                            text => $self->{open_elements}->[1]->[0]
                                ->manakai_local_name,
                            token => $token);
          } else {
            !!!cp ('t407');
          }
          $self->{insertion_mode} = AFTER_BODY_IM;
          ## reprocess
          next B;
        } else {
          !!!cp ('t408');
          !!!parse-error (type => 'unmatched end tag',
                          text => $token->{tag_name}, token => $token);
          ## Ignore the token
          !!!next-token;
          next B;
        }
      } elsif ({
                ## NOTE: End tags for non-phrasing flow content elements

                ## NOTE: The normal ones
                address => 1, article => 1, aside => 1, blockquote => 1,
                center => 1, datagrid => 1, details => 1, dialog => 1,
                dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
                footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
                ol => 1, pre => 1, section => 1, ul => 1,

                ## NOTE: As normal, but ... optional tags
                dd => 1, dt => 1, li => 1,

                applet => 1, button => 1, marquee => 1, object => 1,
               }->{$token->{tag_name}}) {
        ## NOTE: Code for <li> start tags includes "as if </li>" code.
        ## Code for <dt> or <dd> start tags includes "as if </dt> or
        ## </dd>" code.

        ## has an element in scope
        my $i;
        INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
            !!!cp ('t410');
            $i = $_;
            last INSCOPE;
          } elsif ($node->[1] & SCOPING_EL) {
            !!!cp ('t411');
            last INSCOPE;
          }
        } # INSCOPE

        unless (defined $i) { # has an element in scope
          !!!cp ('t413');
          !!!parse-error (type => 'unmatched end tag',
                          text => $token->{tag_name}, token => $token);
          ## NOTE: Ignore the token.
        } else {
          ## Step 1. generate implied end tags
          while ({
                  ## END_TAG_OPTIONAL_EL
                  dd => ($token->{tag_name} ne 'dd'),
                  dt => ($token->{tag_name} ne 'dt'),
                  li => ($token->{tag_name} ne 'li'),
                  option => 1,
                  optgroup => 1,
                  p => 1,
                  rt => 1,
                  rp => 1,
                 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
            !!!cp ('t409');
            pop @{$self->{open_elements}};
          }

          ## Step 2.
          if ($self->{open_elements}->[-1]->[0]->manakai_local_name
                  ne $token->{tag_name}) {
            !!!cp ('t412');
            !!!parse-error (type => 'not closed',
                            text => $self->{open_elements}->[-1]->[0]
                                ->manakai_local_name,
                            token => $token);
          } else {
            !!!cp ('t414');
          }

          ## Step 3.
          splice @{$self->{open_elements}}, $i;

          ## Step 4.
          $clear_up_to_marker->()
              if {
                applet => 1, button => 1, marquee => 1, object => 1,
              }->{$token->{tag_name}};
        }
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'form') {
        ## NOTE: As normal, but interacts with the form element pointer

        undef $self->{form_element};

        ## has an element in scope
        my $i;
        INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[1] & FORM_EL) {
            !!!cp ('t418');
            $i = $_;
            last INSCOPE;
          } elsif ($node->[1] & SCOPING_EL) {
            !!!cp ('t419');
            last INSCOPE;
          }
        } # INSCOPE

        unless (defined $i) { # has an element in scope
          !!!cp ('t421');
          !!!parse-error (type => 'unmatched end tag',
                          text => $token->{tag_name}, token => $token);
          ## NOTE: Ignore the token.
        } else {
          ## Step 1. generate implied end tags
          while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
            !!!cp ('t417');
            pop @{$self->{open_elements}};
          }
          
          ## Step 2. 
          if ($self->{open_elements}->[-1]->[0]->manakai_local_name
                  ne $token->{tag_name}) {
            !!!cp ('t417.1');
            !!!parse-error (type => 'not closed',
                            text => $self->{open_elements}->[-1]->[0]
                                ->manakai_local_name,
                            token => $token);
          } else {
            !!!cp ('t420');
          }  
          
          ## Step 3.
          splice @{$self->{open_elements}}, $i;
        }

        !!!next-token;
        next B;
      } elsif ({
                ## NOTE: As normal, except acts as a closer for any ...
                h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
               }->{$token->{tag_name}}) {
        ## has an element in scope
        my $i;
        INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[1] & HEADING_EL) {
            !!!cp ('t423');
            $i = $_;
            last INSCOPE;
          } elsif ($node->[1] & SCOPING_EL) {
            !!!cp ('t424');
            last INSCOPE;
          }
        } # INSCOPE

        unless (defined $i) { # has an element in scope
          !!!cp ('t425.1');
          !!!parse-error (type => 'unmatched end tag',
                          text => $token->{tag_name}, token => $token);
          ## NOTE: Ignore the token.
        } else {
          ## Step 1. generate implied end tags
          while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
            !!!cp ('t422');
            pop @{$self->{open_elements}};
          }
          
          ## Step 2.
          if ($self->{open_elements}->[-1]->[0]->manakai_local_name
                  ne $token->{tag_name}) {
            !!!cp ('t425');
            !!!parse-error (type => 'unmatched end tag',
                            text => $token->{tag_name}, token => $token);
          } else {
            !!!cp ('t426');
          }

          ## Step 3.
          splice @{$self->{open_elements}}, $i;
        }
        
        !!!next-token;
        next B;
      } elsif ($token->{tag_name} eq 'p') {
        ## NOTE: As normal, except </p> implies <p> and ...

        ## has an element in scope
        my $non_optional;
        my $i;
        INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[1] & P_EL) {
            !!!cp ('t410.1');
            $i = $_;
            last INSCOPE;
          } elsif ($node->[1] & SCOPING_EL) {
            !!!cp ('t411.1');
            last INSCOPE;
          } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
            ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
            !!!cp ('t411.2');
            #
          } else {
            !!!cp ('t411.3');
            $non_optional ||= $node;
            #
          }
        } # INSCOPE

        if (defined $i) {
          ## 1. Generate implied end tags
          #

          ## 2. If current node != "p", parse error
          if ($non_optional) {
            !!!cp ('t412.1');
            !!!parse-error (type => 'not closed',
                            text => $non_optional->[0]->manakai_local_name,
                            token => $token);
          } else {
            !!!cp ('t414.1');
          }

          ## 3. Pop
          splice @{$self->{open_elements}}, $i;
        } else {
          !!!cp ('t413.1');
          !!!parse-error (type => 'unmatched end tag',
                          text => $token->{tag_name}, token => $token);

          !!!cp ('t415.1');
          ## As if <p>, then reprocess the current token
          my $el;
          !!!create-element ($el, $HTML_NS, 'p',, $token);
          $insert->($el);
          ## NOTE: Not inserted into |$self->{open_elements}|.
        }

        !!!next-token;
        next B;
      } elsif ({
                a => 1,
                b => 1, big => 1, em => 1, font => 1, i => 1,
                nobr => 1, s => 1, small => 1, strike => 1,
                strong => 1, tt => 1, u => 1,
               }->{$token->{tag_name}}) {
        !!!cp ('t427');
        $formatting_end_tag->($token);
        next B;
      } elsif ($token->{tag_name} eq 'br') {
        !!!cp ('t428');
        !!!parse-error (type => 'unmatched end tag',
                        text => 'br', token => $token);

        ## As if <br>
        $reconstruct_active_formatting_elements->($insert_to_current);
        
        my $el;
        !!!create-element ($el, $HTML_NS, 'br',, $token);
        $insert->($el);
        
        ## Ignore the token.
        !!!next-token;
        next B;
      } elsif ({
                caption => 1, col => 1, colgroup => 1, frame => 1,
                frameset => 1, head => 1, option => 1, optgroup => 1,
                tbody => 1, td => 1, tfoot => 1, th => 1,
                thead => 1, tr => 1,
                area => 1, basefont => 1, bgsound => 1,
                embed => 1, hr => 1, iframe => 1, image => 1,
                img => 1, input => 1, isindex => 1, noembed => 1,
                noframes => 1, param => 1, select => 1, spacer => 1,
                table => 1, textarea => 1, wbr => 1,
                noscript => 0, ## TODO: if scripting is enabled
               }->{$token->{tag_name}}) {
        !!!cp ('t429');
        !!!parse-error (type => 'unmatched end tag',
                        text => $token->{tag_name}, token => $token);
        ## Ignore the token
        !!!next-token;
        next B;
      } else {
        if ($token->{tag_name} eq 'sarcasm') {
          sleep 0.001; # take a deep breath
        }

        ## Step 1
        my $node_i = -1;
        my $node = $self->{open_elements}->[$node_i];

        ## Step 2
        S2: {
          if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
            ## Step 1
            ## generate implied end tags
            while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
              !!!cp ('t430');
              ## NOTE: |<ruby><rt></ruby>|.
              ## ISSUE: <ruby><rt></rt> will also take this code path,
              ## which seems wrong.
              pop @{$self->{open_elements}};
              $node_i++;
            }
        
            ## Step 2
            if ($self->{open_elements}->[-1]->[0]->manakai_local_name
                    ne $token->{tag_name}) {
              !!!cp ('t431');
              ## NOTE: <x><y></x>
              !!!parse-error (type => 'not closed',
                              text => $self->{open_elements}->[-1]->[0]
                                  ->manakai_local_name,
                              token => $token);
            } else {
              !!!cp ('t432');
            }
            
            ## Step 3
            splice @{$self->{open_elements}}, $node_i if $node_i < 0;

            !!!next-token;
            last S2;
          } else {
            ## Step 3
            if (not ($node->[1] & FORMATTING_EL) and
                #not $phrasing_category->{$node->[1]} and
                ($node->[1] & SPECIAL_EL or
                 $node->[1] & SCOPING_EL)) {
              !!!cp ('t433');
              !!!parse-error (type => 'unmatched end tag',
                              text => $token->{tag_name}, token => $token);
              ## Ignore the token
              !!!next-token;
              last S2;

              ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
              ## 9.27, "a" is a child of <dd> (conforming).  In
              ## Firefox 3.0.2, "a" is a child of <body>.  In WinIE 7,
              ## "a" is a child of both <body> and <dd>.
            }
            
            !!!cp ('t434');
          }
          
          ## Step 4
          $node_i--;
          $node = $self->{open_elements}->[$node_i];
          
          ## Step 5;
          redo S2;
        } # S2
        next B;
      }
    }
    next B;
  } continue { # B
    if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
      ## NOTE: The code below is executed in cases where it does not have
      ## to be, but it it is harmless even in those cases.
      ## has an element in scope
      INSCOPE: {
        for (reverse 0..$#{$self->{open_elements}}) {
          my $node = $self->{open_elements}->[$_];
          if ($node->[1] & FOREIGN_EL) {
            last INSCOPE;
          } elsif ($node->[1] & SCOPING_EL) {
            last;
          }
        }
        
        ## NOTE: No foreign element in scope.
        $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
      } # INSCOPE
    }
  } # B

  ## Stop parsing # MUST
  
  ## TODO: script stuffs
} # _tree_construct_main

sub set_inner_html ($$$$;$) {
  my $class = shift;
  my $node = shift;
  #my $s = \$_[0];
  my $onerror = $_[1];
  my $get_wrapper = $_[2] || sub ($) { return $_[0] };

  ## ISSUE: Should {confident} be true?

  my $nt = $node->node_type;
  if ($nt == 9) {
    # MUST
    
    ## Step 1 # MUST
    ## TODO: If the document has an active parser, ...
    ## ISSUE: There is an issue in the spec.
    
    ## Step 2 # MUST
    my @cn = @{$node->child_nodes};
    for (@cn) {
      $node->remove_child ($_);
    }

    ## Step 3, 4, 5 # MUST
    $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
  } elsif ($nt == 1) {
    ## TODO: If non-html element

    ## NOTE: Most of this code is copied from |parse_string|

## TODO: Support for $get_wrapper

    ## Step 1 # MUST
    my $this_doc = $node->owner_document;
    my $doc = $this_doc->implementation->create_document;
    $doc->manakai_is_html (1);
    my $p = $class->new;
    $p->{document} = $doc;

    ## Step 8 # MUST
    my $i = 0;
    $p->{line_prev} = $p->{line} = 1;
    $p->{column_prev} = $p->{column} = 0;
    require Whatpm::Charset::DecodeHandle;
    my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
    $input = $get_wrapper->($input);
    $p->{set_nc} = sub {
      my $self = shift;

      my $char = '';
      if (defined $self->{next_nc}) {
        $char = $self->{next_nc};
        delete $self->{next_nc};
        $self->{nc} = ord $char;
      } else {
        $self->{char_buffer} = '';
        $self->{char_buffer_pos} = 0;
        
        my $count = $input->manakai_read_until
            ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
             $self->{char_buffer_pos});
        if ($count) {
          $self->{line_prev} = $self->{line};
          $self->{column_prev} = $self->{column};
          $self->{column}++;
          $self->{nc}
              = ord substr ($self->{char_buffer},
                            $self->{char_buffer_pos}++, 1);
          return;
        }
        
        if ($input->read ($char, 1)) {
          $self->{nc} = ord $char;
        } else {
          $self->{nc} = -1;
          return;
        }
      }

      ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
      $p->{column}++;

      if ($self->{nc} == 0x000A) { # LF
        $p->{line}++;
        $p->{column} = 0;
        !!!cp ('i1');
      } elsif ($self->{nc} == 0x000D) { # CR
## TODO: support for abort/streaming
        my $next = '';
        if ($input->read ($next, 1) and $next ne "\x0A") {
          $self->{next_nc} = $next;
        }
        $self->{nc} = 0x000A; # LF # MUST
        $p->{line}++;
        $p->{column} = 0;
        !!!cp ('i2');
      } elsif ($self->{nc} == 0x0000) { # NULL
        !!!cp ('i4');
        !!!parse-error (type => 'NULL');
        $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
      }
    };

    $p->{read_until} = sub {
      #my ($scalar, $specials_range, $offset) = @_;
      return 0 if defined $p->{next_nc};

      my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
      my $offset = $_[2] || 0;
      
      if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
        pos ($p->{char_buffer}) = $p->{char_buffer_pos};
        if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
          substr ($_[0], $offset)
              = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
          my $count = $+[0] - $-[0];
          if ($count) {
            $p->{column} += $count;
            $p->{char_buffer_pos} += $count;
            $p->{line_prev} = $p->{line};
            $p->{column_prev} = $p->{column} - 1;
            $p->{nc} = -1;
          }
          return $count;
        } else {
          return 0;
        }
      } else {
        my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
        if ($count) {
          $p->{column} += $count;
          $p->{column_prev} += $count;
          $p->{nc} = -1;
        }
        return $count;
      }
    }; # $p->{read_until}

    my $ponerror = $onerror || sub {
      my (%opt) = @_;
      my $line = $opt{line};
      my $column = $opt{column};
      if (defined $opt{token} and defined $opt{token}->{line}) {
        $line = $opt{token}->{line};
        $column = $opt{token}->{column};
      }
      warn "Parse error ($opt{type}) at line $line column $column\n";
    };
    $p->{parse_error} = sub {
      $ponerror->(line => $p->{line}, column => $p->{column}, @_);
    };
    
    my $char_onerror = sub {
      my (undef, $type, %opt) = @_;
      $ponerror->(layer => 'encode',
                  line => $p->{line}, column => $p->{column} + 1,
                  %opt, type => $type);
    }; # $char_onerror
    $input->onerror ($char_onerror);

    $p->_initialize_tokenizer;
    $p->_initialize_tree_constructor;

    ## Step 2
    my $node_ln = $node->manakai_local_name;
    $p->{content_model} = {
      title => RCDATA_CONTENT_MODEL,
      textarea => RCDATA_CONTENT_MODEL,
      style => CDATA_CONTENT_MODEL,
      script => CDATA_CONTENT_MODEL,
      xmp => CDATA_CONTENT_MODEL,
      iframe => CDATA_CONTENT_MODEL,
      noembed => CDATA_CONTENT_MODEL,
      noframes => CDATA_CONTENT_MODEL,
      noscript => CDATA_CONTENT_MODEL,
      plaintext => PLAINTEXT_CONTENT_MODEL,
    }->{$node_ln};
    $p->{content_model} = PCDATA_CONTENT_MODEL
        unless defined $p->{content_model};
        ## ISSUE: What is "the name of the element"? local name?

    $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
      ## TODO: Foreign element OK?

    ## Step 3
    my $root = $doc->create_element_ns
      ('http://www.w3.org/1999/xhtml', [undef, 'html']);

    ## Step 4 # MUST
    $doc->append_child ($root);

    ## Step 5 # MUST
    push @{$p->{open_elements}}, [$root, $el_category->{html}];

    undef $p->{head_element};

    ## Step 6 # MUST
    $p->_reset_insertion_mode;

    ## Step 7 # MUST
    my $anode = $node;
    AN: while (defined $anode) {
      if ($anode->node_type == 1) {
        my $nsuri = $anode->namespace_uri;
        if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
          if ($anode->manakai_local_name eq 'form') {
            !!!cp ('i5');
            $p->{form_element} = $anode;
            last AN;
          }
        }
      }
      $anode = $anode->parent_node;
    } # AN
    
    ## Step 9 # MUST
    {
      my $self = $p;
      !!!next-token;
    }
    $p->_tree_construction_main;

    ## Step 10 # MUST
    my @cn = @{$node->child_nodes};
    for (@cn) {
      $node->remove_child ($_);
    }
    ## ISSUE: mutation events? read-only?

    ## Step 11 # MUST
    @cn = @{$root->child_nodes};
    for (@cn) {
      $this_doc->adopt_node ($_);
      $node->append_child ($_);
    }
    ## ISSUE: mutation events?

    $p->_terminate_tree_constructor;

    delete $p->{parse_error}; # delete loop
  } else {
    die "$0: |set_inner_html| is not defined for node of type $nt";
  }
} # set_inner_html

} # tree construction stage

package Whatpm::HTML::RestartParser;
push our @ISA, 'Error';

1;
# $Date: 2008/10/04 07:58:58 $