/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory | Revision Log | View Patch Patch

-revision 1.25 by wakaba,
Sun Jun 24 05:12:11 2007 UTC
+revision 1.26 by wakaba,
Sun Jun 24 06:20:37 2007 UTC
 Line 247 
 sub _get_next_token ($) {
      } elsif ($self->{state} eq 'entity data') {
        ## (cannot happen in CDATA state)
-       my $token = $self->_tokenize_attempt_to_consume_an_entity;
+       my $token = $self->_tokenize_attempt_to_consume_an_entity (0);
        $self->{state} = 'data';
        # next-input-character is already done
 Line 899 
 sub _get_next_token ($) {
          redo A;
        }
      } elsif ($self->{state} eq 'entity in attribute value') {
-       my $token = $self->_tokenize_attempt_to_consume_an_entity;
+       my $token = $self->_tokenize_attempt_to_consume_an_entity (1);
        unless (defined $token) {
          $self->{current_attribute}->{value} .= '&';
 Line 1409 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed DOCTYPE');
          $self->{state} = 'data';
-         ## recomsume
+         ## reconsume
          delete $self->{current_token}->{correct};
          !!!emit ($self->{current_token}); # DOCTYPE
 Line 1452 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed DOCTYPE');
          $self->{state} = 'data';
-         ## recomsume
+         ## reconsume
          delete $self->{current_token}->{correct};
          !!!emit ($self->{current_token}); # DOCTYPE
 Line 1527 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed DOCTYPE');
          $self->{state} = 'data';
-         ## recomsume
+         ## reconsume
          delete $self->{current_token}->{correct};
          !!!emit ($self->{current_token}); # DOCTYPE
 Line 1570 
 sub _get_next_token ($) {
    die "$0: _get_next_token: unexpected case";
  } # _get_next_token
- sub _tokenize_attempt_to_consume_an_entity ($) {
+ sub _tokenize_attempt_to_consume_an_entity ($$) {
-   my $self = shift;
+   my ($self, $in_attr) = @_;
    if ({
 x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
 Line 1584 
 sub _tokenize_attempt_to_consume_an_enti
      !!!next-input-character;
      if ($self->{next_input_character} == 0x0078 or # x
          $self->{next_input_character} == 0x0058) { # X
-       my $num;
+       my $code;
        X: {
          my $x_char = $self->{next_input_character};
          !!!next-input-character;
          if (0x0030 <= $self->{next_input_character} and
              $self->{next_input_character} <= 0x0039) { # 0..9
-           $num ||= 0;
+           $code ||= 0;
-           $num *= 0x10;
+           $code *= 0x10;
-           $num += $self->{next_input_character} - 0x0030;
+           $code += $self->{next_input_character} - 0x0030;
            redo X;
          } elsif (0x0061 <= $self->{next_input_character} and
                   $self->{next_input_character} <= 0x0066) { # a..f
-           ## ISSUE: the spec says U+0078, which is apparently incorrect
+           $code ||= 0;
-           $num ||= 0;
+           $code *= 0x10;
-           $num *= 0x10;
+           $code += $self->{next_input_character} - 0x0060 + 9;
-           $num += $self->{next_input_character} - 0x0060 + 9;
            redo X;
          } elsif (0x0041 <= $self->{next_input_character} and
                   $self->{next_input_character} <= 0x0046) { # A..F
-           ## ISSUE: the spec says U+0058, which is apparently incorrect
+           $code ||= 0;
-           $num ||= 0;
+           $code *= 0x10;
-           $num *= 0x10;
+           $code += $self->{next_input_character} - 0x0040 + 9;
-           $num += $self->{next_input_character} - 0x0040 + 9;
            redo X;
-         } elsif (not defined $num) { # no hexadecimal digit
+         } elsif (not defined $code) { # no hexadecimal digit
            !!!parse-error (type => 'bare hcro');
            $self->{next_input_character} = 0x0023; # #
            !!!back-next-input-character ($x_char);
-Line 1619 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1617 
 sub _tokenize_attempt_to_consume_an_enti
            !!!parse-error (type => 'no refc');
          }
-         ## TODO: check the definition for |a valid Unicode character|.
+         if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
-         ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
+           !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
-         if ($num > 1114111 or $num == 0) {
+           $code = 0xFFFD;
-           $num = 0xFFFD; # REPLACEMENT CHARACTER
+         } elsif ($code > 0x10FFFF) {
-           ## ISSUE: Why this is not an error?
+           !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
-         } elsif (0x80 <= $num and $num <= 0x9F) {
+           $code = 0xFFFD;
-           !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
+         } elsif ($code == 0x000D) {
-           $num = $c1_entity_char->{$num};
+           !!!parse-error (type => 'CR character reference');
+           $code = 0x000A;
+         } elsif (0x80 <= $code and $code <= 0x9F) {
+           !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
+           $code = $c1_entity_char->{$code};
          }
-         return {type => 'character', data => chr $num};
+         return {type => 'character', data => chr $code};
        } # X
      } elsif (0x0030 <= $self->{next_input_character} and
               $self->{next_input_character} <= 0x0039) { # 0..9
-Line 1650 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1652 
 sub _tokenize_attempt_to_consume_an_enti
          !!!parse-error (type => 'no refc');
        }
-       ## TODO: check the definition for |a valid Unicode character|.
+       if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
-       if ($code > 1114111 or $code == 0) {
+         !!!parse-error (type => sprintf 'invalid character reference:U+%04X', $code);
-         $code = 0xFFFD; # REPLACEMENT CHARACTER
+         $code = 0xFFFD;
-         ## ISSUE: Why this is not an error?
+       } elsif ($code > 0x10FFFF) {
+         !!!parse-error (type => sprintf 'invalid character reference:U-%08X', $code);
+         $code = 0xFFFD;
+       } elsif ($code == 0x000D) {
+         !!!parse-error (type => 'CR character reference');
+         $code = 0x000A;
        } elsif (0x80 <= $code and $code <= 0x9F) {
          !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
          $code = $c1_entity_char->{$code};
-Line 1689 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1696 
 sub _tokenize_attempt_to_consume_an_enti
              $self->{next_input_character} == 0x003B)) { # ;
        $entity_name .= chr $self->{next_input_character};
        if (defined $EntityChar->{$entity_name}) {
-         $value = $EntityChar->{$entity_name};
          if ($self->{next_input_character} == 0x003B) { # ;
+           $value = $EntityChar->{$entity_name};
            $match = 1;
            !!!next-input-character;
            last;
-         } else {
+         } elsif (not $in_attr) {
+           $value = $EntityChar->{$entity_name};
            $match = -1;
+         } else {
+           $value .= chr $self->{next_input_character};
          }
        } else {
          $value .= chr $self->{next_input_character};
-Line 1711 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1721 
 sub _tokenize_attempt_to_consume_an_enti
      } else {
        !!!parse-error (type => 'bare ero');
        ## NOTE: No characters are consumed in the spec.
-       !!!back-token ({type => 'character', data => $value});
+       return {type => 'character', data => '&'.$value};
-       return undef;
      }
    } else {
      ## no characters are consumed
-Line 1907 
 sub _tree_construction_initial ($) {
+Line 1916 
 sub _tree_construction_initial ($) {
      } elsif ($token->{type} eq 'character') {
        if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
          ## Ignore the token
          unless (length $token->{data}) {
            ## Stay in the phase
            !!!next-token;
-Line 1949 
 sub _tree_construction_root_element ($)
+Line 1959 
 sub _tree_construction_root_element ($)
          !!!next-token;
          redo B;
        } elsif ($token->{type} eq 'character') {
-         if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
+         if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
-           $self->{document}->manakai_append_text ($1);
+           ## Ignore the token.
-           ## ISSUE: DOM3 Core does not allow Document > Text
            unless (length $token->{data}) {
              ## Stay in the phase
              !!!next-token;
-Line 2451 
 sub _tree_construction_main ($) {
+Line 2461 
 sub _tree_construction_main ($) {
          !!!insert-element-t ($token->{tag_name}, $token->{attributes});
          pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
          !!!next-token;
+         ## TODO: Extracting |charset| from |meta|.
          return;
        } elsif ($token->{tag_name} eq 'title') {
          !!!parse-error (type => 'in body:title');
-Line 3350 
 sub _tree_construction_main ($) {
+Line 3361 
 sub _tree_construction_main ($) {
                }
                !!!insert-element ($token->{tag_name}, $token->{attributes});
                pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
+               ## TODO: Extracting |charset| from |meta|.
                pop @{$self->{open_elements}}
                    if $self->{insertion_mode} eq 'after head';
                !!!next-token;
-Line 5310 
 sub get_inner_html ($$$) {
+Line 5322 
 sub get_inner_html ($$$) {
        if (not $in_cdata and {
          style => 1, script => 1, xmp => 1, iframe => 1,
          noembed => 1, noframes => 1, noscript => 1,
+         plaintext => 1,
        }->{$tag_name}) {
          unshift @node, 'cdata-out';
          $in_cdata = 1;

 Legend:



Removed from v.1.25
 


changed lines


 
Added in v.1.26
 Legend:



Removed from v.1.25
 


changed lines


 
Added in v.1.26
-Removed from v.1.25
+Added in v.1.26

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24