/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.134 by wakaba, Sat May 17 05:34:23 2008 UTC revision 1.139 by wakaba, Sat May 24 04:26:27 2008 UTC
# Line 11  use Error qw(:try); Line 11  use Error qw(:try);
11  ## TODO: 1252 parse error (revision 1264)  ## TODO: 1252 parse error (revision 1264)
12  ## TODO: 8859-11 = 874 (revision 1271)  ## TODO: 8859-11 = 874 (revision 1271)
13    
14    require IO::Handle;
15    
16  my $HTML_NS = q<http://www.w3.org/1999/xhtml>;  my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
17  my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;  my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
18  my $SVG_NS = q<http://www.w3.org/2000/svg>;  my $SVG_NS = q<http://www.w3.org/2000/svg>;
# Line 332  my $c1_entity_char = { Line 334  my $c1_entity_char = {
334  }; # $c1_entity_char  }; # $c1_entity_char
335    
336  sub parse_byte_string ($$$$;$) {  sub parse_byte_string ($$$$;$) {
337      my $self = shift;
338      my $charset_name = shift;
339      open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
340      return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
341    } # parse_byte_string
342    
343    sub parse_byte_stream ($$$$;$) {
344    my $self = ref $_[0] ? shift : shift->new;    my $self = ref $_[0] ? shift : shift->new;
345    my $charset_name = shift;    my $charset_name = shift;
346    my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);    my $byte_stream = $_[0];
   my $s;  
347    
348    my $onerror = $_[2] || sub {    my $onerror = $_[2] || sub {
349      my (%opt) = @_;      my (%opt) = @_;
# Line 346  sub parse_byte_string ($$$$;$) { Line 354  sub parse_byte_string ($$$$;$) {
354    ## HTML5 encoding sniffing algorithm    ## HTML5 encoding sniffing algorithm
355    require Message::Charset::Info;    require Message::Charset::Info;
356    my $charset;    my $charset;
357    my ($e, $e_status);    my $buffer;
358      my ($char_stream, $e_status);
359    
360    SNIFFING: {    SNIFFING: {
361    
# Line 355  sub parse_byte_string ($$$$;$) { Line 364  sub parse_byte_string ($$$$;$) {
364        $charset = Message::Charset::Info->get_by_iana_name ($charset_name);        $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
365    
366        ## ISSUE: Unsupported encoding is not ignored according to the spec.        ## ISSUE: Unsupported encoding is not ignored according to the spec.
367        ($e, $e_status) = $charset->get_perl_encoding        ($char_stream, $e_status) = $charset->get_decode_handle
368            (allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
369             allow_fallback => 1);             allow_fallback => 1);
370        if ($e) {        if ($char_stream) {
371          $self->{confident} = 1;          $self->{confident} = 1;
372          last SNIFFING;          last SNIFFING;
373          } else {
374            ## TODO: unsupported error
375        }        }
376      }      }
377    
378      ## Step 2      ## Step 2
379      # wait      my $byte_buffer = '';
380        for (1..1024) {
381          my $char = $byte_stream->getc;
382          last unless defined $char;
383          $byte_buffer .= $char;
384        } ## TODO: timeout
385    
386      ## Step 3      ## Step 3
387      my $head = substr ($$bytes_s, 0, 3);      if ($byte_buffer =~ /^\xFE\xFF/) {
     if ($head =~ /^\xFE\xFF/) {  
388        $charset = Message::Charset::Info->get_by_iana_name ('utf-16be');        $charset = Message::Charset::Info->get_by_iana_name ('utf-16be');
389        ($e, $e_status) = $charset->get_perl_encoding        ($char_stream, $e_status) = $charset->get_decode_handle
390            (allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
391             allow_fallback => 1);             allow_fallback => 1, byte_buffer => \$byte_buffer);
392        $self->{confident} = 1;        $self->{confident} = 1;
393        last SNIFFING;        last SNIFFING;
394      } elsif ($head =~ /^\xFF\xFE/) {      } elsif ($byte_buffer =~ /^\xFF\xFE/) {
395        $charset = Message::Charset::Info->get_by_iana_name ('utf-16le');        $charset = Message::Charset::Info->get_by_iana_name ('utf-16le');
396        ($e, $e_status) = $charset->get_perl_encoding        ($char_stream, $e_status) = $charset->get_decode_handle
397            (allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
398             allow_fallback => 1);             allow_fallback => 1, byte_buffer => \$byte_buffer);
399        $self->{confident} = 1;        $self->{confident} = 1;
400        last SNIFFING;        last SNIFFING;
401      } elsif ($head eq "\xEF\xBB\xBF") {      } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
402        $charset = Message::Charset::Info->get_by_iana_name ('utf-8');        $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
403        ($e, $e_status) = $charset->get_perl_encoding        ($char_stream, $e_status) = $charset->get_decode_handle
404            (allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
405             allow_fallback => 1);             allow_fallback => 1, byte_buffer => \$byte_buffer);
406        $self->{confident} = 1;        $self->{confident} = 1;
407        last SNIFFING;        last SNIFFING;
408      }      }
# Line 401  sub parse_byte_string ($$$$;$) { Line 416  sub parse_byte_string ($$$$;$) {
416      ## Step 6      ## Step 6
417      require Whatpm::Charset::UniversalCharDet;      require Whatpm::Charset::UniversalCharDet;
418      $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string      $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
419          (substr ($$bytes_s, 0, 1024));          ($byte_buffer);
420      if (defined $charset_name) {      if (defined $charset_name) {
421        $charset = Message::Charset::Info->get_by_iana_name ($charset_name);        $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
422    
423        ## ISSUE: Unsupported encoding is not ignored according to the spec.        ## ISSUE: Unsupported encoding is not ignored according to the spec.
424        ($e, $e_status) = $charset->get_perl_encoding        require Whatpm::Charset::DecodeHandle;
425            (allow_error_reporting => 1,        $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
426             allow_fallback => 1);            ($byte_stream);
427        if ($e) {        ($char_stream, $e_status) = $charset->get_decode_handle
428              ($buffer, allow_error_reporting => 1,
429               allow_fallback => 1, byte_buffer => \$byte_buffer);
430          if ($char_stream) {
431            $buffer->{buffer} = $byte_buffer;
432          !!!parse-error (type => 'sniffing:chardet', ## TODO: type name          !!!parse-error (type => 'sniffing:chardet', ## TODO: type name
433                          value => $charset_name,                          value => $charset_name,
434                          level => $self->{info_level},                          level => $self->{info_level},
# Line 424  sub parse_byte_string ($$$$;$) { Line 443  sub parse_byte_string ($$$$;$) {
443      $charset = Message::Charset::Info->get_by_iana_name ('windows-1252');      $charset = Message::Charset::Info->get_by_iana_name ('windows-1252');
444          ## NOTE: We choose |windows-1252| here, since |utf-8| should be          ## NOTE: We choose |windows-1252| here, since |utf-8| should be
445          ## detectable in the step 6.          ## detectable in the step 6.
446      ($e, $e_status) = $charset->get_perl_encoding (allow_error_reporting => 1,      require Whatpm::Charset::DecodeHandle;
447                                                     allow_fallback => 1);      $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
448            ($byte_stream);
449        ($char_stream, $e_status)
450            = $charset->get_decode_handle ($buffer,
451                                           allow_error_reporting => 1,
452                                           allow_fallback => 1,
453                                           byte_buffer => \$byte_buffer);
454        $buffer->{buffer} = $byte_buffer;
455      !!!parse-error (type => 'sniffing:default', ## TODO: type name      !!!parse-error (type => 'sniffing:default', ## TODO: type name
456                      value => 'windows-1252',                      value => 'windows-1252',
457                      level => $self->{info_level},                      level => $self->{info_level},
# Line 436  sub parse_byte_string ($$$$;$) { Line 462  sub parse_byte_string ($$$$;$) {
462    $self->{input_encoding} = $charset->get_iana_name;    $self->{input_encoding} = $charset->get_iana_name;
463    if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {    if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
464      !!!parse-error (type => 'chardecode:fallback', ## TODO: type name      !!!parse-error (type => 'chardecode:fallback', ## TODO: type name
465                      value => $e->name,                      value => $self->{input_encoding},
466                      level => $self->{unsupported_level},                      level => $self->{unsupported_level},
467                      line => 1, column => 1);                      line => 1, column => 1);
468    } elsif (not ($e_status &    } elsif (not ($e_status &
# Line 446  sub parse_byte_string ($$$$;$) { Line 472  sub parse_byte_string ($$$$;$) {
472                      level => $self->{unsupported_level},                      level => $self->{unsupported_level},
473                      line => 1, column => 1);                      line => 1, column => 1);
474    }    }
   $s = \ $e->decode ($$bytes_s);  
475    
476    $self->{change_encoding} = sub {    $self->{change_encoding} = sub {
477      my $self = shift;      my $self = shift;
# Line 454  sub parse_byte_string ($$$$;$) { Line 479  sub parse_byte_string ($$$$;$) {
479      my $token = shift;      my $token = shift;
480    
481      $charset = Message::Charset::Info->get_by_iana_name ($charset_name);      $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
482      ($e, $e_status) = $charset->get_perl_encoding      ($char_stream, $e_status) = $charset->get_decode_handle
483          (allow_error_reporting => 1, allow_fallback => 1);          ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
484             byte_buffer => \ $buffer->{buffer});
485            
486      if ($e) { # if supported      if ($char_stream) { # if supported
487        ## "Change the encoding" algorithm:        ## "Change the encoding" algorithm:
488    
489        ## Step 1            ## Step 1    
490        if ($charset->{iana_names}->{'utf-16'}) { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?        if ($charset->{iana_names}->{'utf-16'}) { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
491          $charset = Message::Charset::Info->get_by_iana_name ('utf-8');          $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
492          ($e, $e_status) = $charset->get_perl_encoding;          ($char_stream, $e_status) = $charset->get_decode_handle
493                ($byte_stream,
494                 byte_buffer => \ $buffer->{buffer});
495        }        }
496        $charset_name = $charset->get_iana_name;        $charset_name = $charset->get_iana_name;
497                
# Line 492  sub parse_byte_string ($$$$;$) { Line 520  sub parse_byte_string ($$$$;$) {
520      }      }
521    }; # $self->{change_encoding}    }; # $self->{change_encoding}
522    
523      my $char_onerror = sub {
524        my (undef, $type, %opt) = @_;
525        !!!parse-error (%opt, type => $type,
526                        line => $self->{line}, column => $self->{column} + 1);
527        if ($opt{octets}) {
528          ${$opt{octets}} = "\x{FFFD}"; # relacement character
529        }
530      };
531      $char_stream->onerror ($char_onerror);
532    
533    my @args = @_; shift @args; # $s    my @args = @_; shift @args; # $s
534    my $return;    my $return;
535    try {    try {
536      $return = $self->parse_char_string ($s, @args);        $return = $self->parse_char_stream ($char_stream, @args);  
537    } catch Whatpm::HTML::RestartParser with {    } catch Whatpm::HTML::RestartParser with {
538      ## NOTE: Invoked after {change_encoding}.      ## NOTE: Invoked after {change_encoding}.
539    
540      $self->{input_encoding} = $charset->get_iana_name;      $self->{input_encoding} = $charset->get_iana_name;
541      if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {      if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
542        !!!parse-error (type => 'chardecode:fallback', ## TODO: type name        !!!parse-error (type => 'chardecode:fallback', ## TODO: type name
543                        value => $e->name,                        value => $self->{input_encoding},
544                        level => $self->{unsupported_level},                        level => $self->{unsupported_level},
545                        line => 1, column => 1);                        line => 1, column => 1);
546      } elsif (not ($e_status &      } elsif (not ($e_status &
# Line 512  sub parse_byte_string ($$$$;$) { Line 550  sub parse_byte_string ($$$$;$) {
550                        level => $self->{unsupported_level},                        level => $self->{unsupported_level},
551                        line => 1, column => 1);                        line => 1, column => 1);
552      }      }
     $s = \ $e->decode ($$bytes_s);  
553      $self->{confident} = 1;      $self->{confident} = 1;
554      $return = $self->parse_char_string ($s, @args);      $char_stream->onerror ($char_onerror);
555        $return = $self->parse_char_stream ($char_stream, @args);
556    };    };
557    return $return;    return $return;
558  } # parse_byte_string  } # parse_byte_stream
559    
560  ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM  ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
561  ## and the HTML layer MUST ignore it.  However, we does strip BOM in  ## and the HTML layer MUST ignore it.  However, we does strip BOM in
# Line 528  sub parse_byte_string ($$$$;$) { Line 566  sub parse_byte_string ($$$$;$) {
566  ## such as |parse_byte_string| in this module, must ensure that it does  ## such as |parse_byte_string| in this module, must ensure that it does
567  ## strip the BOM and never strip any ZWNBSP.  ## strip the BOM and never strip any ZWNBSP.
568    
569  *parse_char_string = \&parse_string;  sub parse_char_string ($$$;$) {
570      my $self = shift;
571      require utf8;
572      my $s = ref $_[0] ? $_[0] : \($_[0]);
573      open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s;
574      return $self->parse_char_stream ($input, @_[1..$#_]);
575    } # parse_char_string
576    *parse_string = \&parse_char_string;
577    
578  sub parse_string ($$$;$) {  sub parse_char_stream ($$$;$) {
579    my $self = ref $_[0] ? shift : shift->new;    my $self = ref $_[0] ? shift : shift->new;
580    my $s = ref $_[0] ? $_[0] : \($_[0]);    my $input = $_[0];
581    $self->{document} = $_[1];    $self->{document} = $_[1];
582    @{$self->{document}->child_nodes} = ();    @{$self->{document}->child_nodes} = ();
583    
# Line 551  sub parse_string ($$$;$) { Line 596  sub parse_string ($$$;$) {
596      pop @{$self->{prev_char}};      pop @{$self->{prev_char}};
597      unshift @{$self->{prev_char}}, $self->{next_char};      unshift @{$self->{prev_char}}, $self->{next_char};
598    
599      $self->{next_char} = -1 and return if $i >= length $$s;      my $char;
600      $self->{next_char} = ord substr $$s, $i++, 1;      if (defined $self->{next_next_char}) {
601          $char = $self->{next_next_char};
602          delete $self->{next_next_char};
603        } else {
604          $char = $input->getc;
605        }
606        $self->{next_char} = -1 and return unless defined $char;
607        $self->{next_char} = ord $char;
608    
609      ($self->{line_prev}, $self->{column_prev})      ($self->{line_prev}, $self->{column_prev})
610          = ($self->{line}, $self->{column});          = ($self->{line}, $self->{column});
# Line 564  sub parse_string ($$$;$) { Line 616  sub parse_string ($$$;$) {
616        $self->{column} = 0;        $self->{column} = 0;
617      } elsif ($self->{next_char} == 0x000D) { # CR      } elsif ($self->{next_char} == 0x000D) { # CR
618        !!!cp ('j2');        !!!cp ('j2');
619        $i++ if substr ($$s, $i, 1) eq "\x0A";        my $next = $input->getc;
620          if (defined $next and $next ne "\x0A") {
621            $self->{next_next_char} = $next;
622          }
623        $self->{next_char} = 0x000A; # LF # MUST        $self->{next_char} = 0x000A; # LF # MUST
624        $self->{line}++;        $self->{line}++;
625        $self->{column} = 0;        $self->{column} = 0;
# Line 617  sub parse_string ($$$;$) { Line 672  sub parse_string ($$$;$) {
672    delete $self->{parse_error}; # remove loop    delete $self->{parse_error}; # remove loop
673    
674    return $self->{document};    return $self->{document};
675  } # parse_string  } # parse_char_stream
676    
677  sub new ($) {  sub new ($) {
678    my $class = shift;    my $class = shift;
# Line 993  sub _get_next_token ($) { Line 1048  sub _get_next_token ($) {
1048            redo A;            redo A;
1049          } else {          } else {
1050            !!!cp (23);            !!!cp (23);
1051            !!!parse-error (type => 'bare stago');            !!!parse-error (type => 'bare stago',
1052                              line => $self->{line_prev},
1053                              column => $self->{column_prev});
1054            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
1055            ## reconsume            ## reconsume
1056    
# Line 4097  sub _tree_construction_main ($) { Line 4154  sub _tree_construction_main ($) {
4154              !!!next-token;              !!!next-token;
4155              next B;              next B;
4156            } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {            } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4157              !!!cp ('t94');              !!!cp ('t93.2');
4158              #              !!!parse-error (type => 'after head:head', token => $token); ## TODO: error type
4159                ## Ignore the token
4160                !!!nack ('t93.3');
4161                !!!next-token;
4162                next B;
4163            } else {            } else {
4164              !!!cp ('t95');              !!!cp ('t95');
4165              !!!parse-error (type => 'in head:head', token => $token); # or in head noscript              !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
# Line 4420  sub _tree_construction_main ($) { Line 4481  sub _tree_construction_main ($) {
4481                  $self->{insertion_mode} = AFTER_HEAD_IM;                  $self->{insertion_mode} = AFTER_HEAD_IM;
4482                  !!!next-token;                  !!!next-token;
4483                  next B;                  next B;
4484                  } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4485                    !!!cp ('t134.1');
4486                    !!!parse-error (type => 'unmatched end tag:head', token => $token);
4487                    ## Ignore the token
4488                    !!!next-token;
4489                    next B;
4490                } else {                } else {
4491                  !!!cp ('t135');                  die "$0: $self->{insertion_mode}: Unknown insertion mode";
                 #  
4492                }                }
4493              } elsif ($token->{tag_name} eq 'noscript') {              } elsif ($token->{tag_name} eq 'noscript') {
4494                if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {                if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
# Line 4431  sub _tree_construction_main ($) { Line 4497  sub _tree_construction_main ($) {
4497                  $self->{insertion_mode} = IN_HEAD_IM;                  $self->{insertion_mode} = IN_HEAD_IM;
4498                  !!!next-token;                  !!!next-token;
4499                  next B;                  next B;
4500                } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {                } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4501                           $self->{insertion_mode} == AFTER_HEAD_IM) {
4502                  !!!cp ('t137');                  !!!cp ('t137');
4503                  !!!parse-error (type => 'unmatched end tag:noscript', token => $token);                  !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
4504                  ## Ignore the token ## ISSUE: An issue in the spec.                  ## Ignore the token ## ISSUE: An issue in the spec.
# Line 4444  sub _tree_construction_main ($) { Line 4511  sub _tree_construction_main ($) {
4511              } elsif ({              } elsif ({
4512                        body => 1, html => 1,                        body => 1, html => 1,
4513                       }->{$token->{tag_name}}) {                       }->{$token->{tag_name}}) {
4514                if ($self->{insertion_mode} == BEFORE_HEAD_IM) {                if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4515                  !!!cp ('t139');                    $self->{insertion_mode} == IN_HEAD_IM or
4516                  ## As if <head>                    $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);  
                 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});  
                 push @{$self->{open_elements}},  
                     [$self->{head_element}, $el_category->{head}];  
   
                 $self->{insertion_mode} = IN_HEAD_IM;  
                 ## Reprocess in the "in head" insertion mode...  
               } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {  
4517                  !!!cp ('t140');                  !!!cp ('t140');
4518                  !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);                  !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4519                  ## Ignore the token                  ## Ignore the token
4520                  !!!next-token;                  !!!next-token;
4521                  next B;                  next B;
4522                  } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4523                    !!!cp ('t140.1');
4524                    !!!parse-error (type => 'unmatched end tag:' . $token->{tag_name}, token => $token);
4525                    ## Ignore the token
4526                    !!!next-token;
4527                    next B;
4528                } else {                } else {
4529                  !!!cp ('t141');                  die "$0: $self->{insertion_mode}: Unknown insertion mode";
4530                }                }
4531                              } elsif ($token->{tag_name} eq 'p') {
4532                #                !!!cp ('t142');
4533              } elsif ({                !!!parse-error (type => 'unmatched end tag:p', token => $token);
4534                        p => 1, br => 1,                ## Ignore the token
4535                       }->{$token->{tag_name}}) {                !!!next-token;
4536                  next B;
4537                } elsif ($token->{tag_name} eq 'br') {
4538                if ($self->{insertion_mode} == BEFORE_HEAD_IM) {                if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4539                  !!!cp ('t142');                  !!!cp ('t142.2');
4540                  ## As if <head>                  ## (before head) as if <head>, (in head) as if </head>
4541                  !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);                  !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4542                  $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});                  $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4543                  push @{$self->{open_elements}},                  $self->{insertion_mode} = AFTER_HEAD_IM;
4544                      [$self->{head_element}, $el_category->{head}];    
4545                    ## Reprocess in the "after head" insertion mode...
4546                  } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4547                    !!!cp ('t143.2');
4548                    ## As if </head>
4549                    pop @{$self->{open_elements}};
4550                    $self->{insertion_mode} = AFTER_HEAD_IM;
4551      
4552                    ## Reprocess in the "after head" insertion mode...
4553                  } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4554                    !!!cp ('t143.3');
4555                    ## ISSUE: Two parse errors for <head><noscript></br>
4556                    !!!parse-error (type => 'unmatched end tag:br', token => $token);
4557                    ## As if </noscript>
4558                    pop @{$self->{open_elements}};
4559                  $self->{insertion_mode} = IN_HEAD_IM;                  $self->{insertion_mode} = IN_HEAD_IM;
4560    
4561                  ## Reprocess in the "in head" insertion mode...                  ## Reprocess in the "in head" insertion mode...
4562                } else {                  ## As if </head>
4563                  !!!cp ('t143');                  pop @{$self->{open_elements}};
4564                }                  $self->{insertion_mode} = AFTER_HEAD_IM;
4565    
4566                #                  ## Reprocess in the "after head" insertion mode...
4567              } else {                } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4568                if ($self->{insertion_mode} == AFTER_HEAD_IM) {                  !!!cp ('t143.4');
                 !!!cp ('t144');  
4569                  #                  #
4570                } else {                } else {
4571                  !!!cp ('t145');                  die "$0: $self->{insertion_mode}: Unknown insertion mode";
                 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);  
                 ## Ignore the token  
                 !!!next-token;  
                 next B;  
4572                }                }
4573    
4574                  ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4575                  !!!parse-error (type => 'unmatched end tag:br', token => $token);
4576                  ## Ignore the token
4577                  !!!next-token;
4578                  next B;
4579                } else {
4580                  !!!cp ('t145');
4581                  !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4582                  ## Ignore the token
4583                  !!!next-token;
4584                  next B;
4585              }              }
4586    
4587              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {

Legend:
Removed from v.1.134  
changed lines
  Added in v.1.139

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24