/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory | Revision Log | View Patch Patch

-revision 1.136 by wakaba,
Sat May 17 12:29:24 2008 UTC
+revision 1.139 by wakaba,
Sat May 24 04:26:27 2008 UTC
 Line 11 
 use Error qw(:try);
  ## TODO: 1252 parse error (revision 1264)
  ## TODO: 8859-11 = 874 (revision 1271)
+ require IO::Handle;
  my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
  my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
  my $SVG_NS = q<http://www.w3.org/2000/svg>;
-Line 332 
 my $c1_entity_char = {
+Line 334 
 my $c1_entity_char = {
  }; # $c1_entity_char
  sub parse_byte_string ($$$$;$) {
+   my $self = shift;
+   my $charset_name = shift;
+   open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
+   return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
+ } # parse_byte_string
+ sub parse_byte_stream ($$$$;$) {
    my $self = ref $_[0] ? shift : shift->new;
    my $charset_name = shift;
-   open my $byte_stream, '<', ref $_[0] ? $_[0] : \($_[0]);
+   my $byte_stream = $_[0];
    my $onerror = $_[2] || sub {
      my (%opt) = @_;
-Line 513 
 sub parse_byte_string ($$$$;$) {
+Line 522 
 sub parse_byte_string ($$$$;$) {
    my $char_onerror = sub {
      my (undef, $type, %opt) = @_;
-     !!!parse-error (%opt, type => $type);
+     !!!parse-error (%opt, type => $type,
+                     line => $self->{line}, column => $self->{column} + 1);
      if ($opt{octets}) {
        ${$opt{octets}} = "\x{FFFD}"; # relacement character
      }
-Line 545 
 sub parse_byte_string ($$$$;$) {
+Line 555 
 sub parse_byte_string ($$$$;$) {
      $return = $self->parse_char_stream ($char_stream, @args);
    };
    return $return;
- } # parse_byte_string
+ } # parse_byte_stream
  ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
  ## and the HTML layer MUST ignore it.  However, we does strip BOM in
-Line 558 
 sub parse_byte_string ($$$$;$) {
+Line 568 
 sub parse_byte_string ($$$$;$) {
  sub parse_char_string ($$$;$) {
    my $self = shift;
-   open my $input, '<:utf8', ref $_[0] ? $_[0] : \($_[0]);
+   require utf8;
+   my $s = ref $_[0] ? $_[0] : \($_[0]);
+   open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s;
    return $self->parse_char_stream ($input, @_[1..$#_]);
  } # parse_char_string
  *parse_string = \&parse_char_string;
-Line 584 
 sub parse_char_stream ($$$;$) {
+Line 596 
 sub parse_char_stream ($$$;$) {
      pop @{$self->{prev_char}};
      unshift @{$self->{prev_char}}, $self->{next_char};
-     my $char = $input->getc;
+     my $char;
+     if (defined $self->{next_next_char}) {
+       $char = $self->{next_next_char};
+       delete $self->{next_next_char};
+     } else {
+       $char = $input->getc;
+     }
      $self->{next_char} = -1 and return unless defined $char;
      $self->{next_char} = ord $char;
-Line 599 
 sub parse_char_stream ($$$;$) {
+Line 617 
 sub parse_char_stream ($$$;$) {
      } elsif ($self->{next_char} == 0x000D) { # CR
        !!!cp ('j2');
        my $next = $input->getc;
-       if ($next ne "\x0A") {
+       if (defined $next and $next ne "\x0A") {
-         $input->ungetc ($next);
+         $self->{next_next_char} = $next;
        }
        $self->{next_char} = 0x000A; # LF # MUST
        $self->{line}++;
-Line 4136 
 sub _tree_construction_main ($) {
+Line 4154 
 sub _tree_construction_main ($) {
              !!!next-token;
              next B;
            } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
-             !!!cp ('t94');
+             !!!cp ('t93.2');
-             #
+             !!!parse-error (type => 'after head:head', token => $token); ## TODO: error type
+             ## Ignore the token
+             !!!nack ('t93.3');
+             !!!next-token;
+             next B;
            } else {
              !!!cp ('t95');
              !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
-Line 4459 
 sub _tree_construction_main ($) {
+Line 4481 
 sub _tree_construction_main ($) {
                  $self->{insertion_mode} = AFTER_HEAD_IM;
                  !!!next-token;
                  next B;
+               } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
+                 !!!cp ('t134.1');
+                 !!!parse-error (type => 'unmatched end tag:head', token => $token);
+                 ## Ignore the token
+                 !!!next-token;
+                 next B;
                } else {
-                 !!!cp ('t135');
+                 die "$0: $self->{insertion_mode}: Unknown insertion mode";
-                 #
                }
              } elsif ($token->{tag_name} eq 'noscript') {
                if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
-Line 4470 
 sub _tree_construction_main ($) {
+Line 4497 
 sub _tree_construction_main ($) {
                  $self->{insertion_mode} = IN_HEAD_IM;
                  !!!next-token;
                  next B;
-               } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
+               } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
+                        $self->{insertion_mode} == AFTER_HEAD_IM) {
                  !!!cp ('t137');
                  !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
                  ## Ignore the token ## ISSUE: An issue in the spec.
-Line 4483 
 sub _tree_construction_main ($) {
+Line 4511 
 sub _tree_construction_main ($) {
              } elsif ({
                        body => 1, html => 1,
                       }->{$token->{tag_name}}) {
-               if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
+               if ($self->{insertion_mode} == BEFORE_HEAD_IM or
-                 !!!cp ('t139');
+                   $self->{insertion_mode} == IN_HEAD_IM or
-                 ## As if <head>
+                   $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
-                 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
-                 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
-                 push @{$self->{open_elements}},
-                     [$self->{head_element}, $el_category->{head}];
-                 $self->{insertion_mode} = IN_HEAD_IM;
-                 ## Reprocess in the "in head" insertion mode...
-               } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
                  !!!cp ('t140');
                  !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
                  ## Ignore the token
                  !!!next-token;
                  next B;
+               } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
+                 !!!cp ('t140.1');
+                 !!!parse-error (type => 'unmatched end tag:' . $token->{tag_name}, token => $token);
+                 ## Ignore the token
+                 !!!next-token;
+                 next B;
                } else {
-                 !!!cp ('t141');
+                 die "$0: $self->{insertion_mode}: Unknown insertion mode";
                }
+             } elsif ($token->{tag_name} eq 'p') {
-               #
+               !!!cp ('t142');
-             } elsif ({
+               !!!parse-error (type => 'unmatched end tag:p', token => $token);
-                       p => 1, br => 1,
+               ## Ignore the token
-                      }->{$token->{tag_name}}) {
+               !!!next-token;
+               next B;
+             } elsif ($token->{tag_name} eq 'br') {
                if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
-                 !!!cp ('t142');
+                 !!!cp ('t142.2');
-                 ## As if <head>
+                 ## (before head) as if <head>, (in head) as if </head>
                  !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
                  $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
-                 push @{$self->{open_elements}},
+                 $self->{insertion_mode} = AFTER_HEAD_IM;
-                     [$self->{head_element}, $el_category->{head}];
+                 ## Reprocess in the "after head" insertion mode...
+               } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
+                 !!!cp ('t143.2');
+                 ## As if </head>
+                 pop @{$self->{open_elements}};
+                 $self->{insertion_mode} = AFTER_HEAD_IM;
+                 ## Reprocess in the "after head" insertion mode...
+               } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
+                 !!!cp ('t143.3');
+                 ## ISSUE: Two parse errors for <head><noscript></br>
+                 !!!parse-error (type => 'unmatched end tag:br', token => $token);
+                 ## As if </noscript>
+                 pop @{$self->{open_elements}};
                  $self->{insertion_mode} = IN_HEAD_IM;
                  ## Reprocess in the "in head" insertion mode...
-               } else {
+                 ## As if </head>
-                 !!!cp ('t143');
+                 pop @{$self->{open_elements}};
-               }
+                 $self->{insertion_mode} = AFTER_HEAD_IM;
-               #
+                 ## Reprocess in the "after head" insertion mode...
-             } else {
+               } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
-               if ($self->{insertion_mode} == AFTER_HEAD_IM) {
+                 !!!cp ('t143.4');
-                 !!!cp ('t144');
                  #
                } else {
-                 !!!cp ('t145');
+                 die "$0: $self->{insertion_mode}: Unknown insertion mode";
-                 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
-                 ## Ignore the token
-                 !!!next-token;
-                 next B;
                }
+               ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
+               !!!parse-error (type => 'unmatched end tag:br', token => $token);
+               ## Ignore the token
+               !!!next-token;
+               next B;
+             } else {
+               !!!cp ('t145');
+               !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
+               ## Ignore the token
+               !!!next-token;
+               next B;
              }
              if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {

 Legend:



Removed from v.1.136
 


changed lines


 
Added in v.1.139
 Legend:



Removed from v.1.136
 


changed lines


 
Added in v.1.139
-Removed from v.1.136
+Added in v.1.139

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24