/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory | Revision Log | View Patch Patch

-revision 1.3 by wakaba,
Wed May  2 13:44:34 2007 UTC
+revision 1.4 by wakaba,
Fri May  4 09:16:04 2007 UTC
 Line 278 
 my $entity_char = {
    zeta => "\x{03B6}",
    zwj => "\x{200D}",
    zwnj => "\x{200C}",
- };
+ }; # $entity_char
+ ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
+ my $c1_entity_char = {
+, 8364,
+, 65533,
+, 8218,
+, 402,
+, 8222,
+, 8230,
+, 8224,
+, 8225,
+, 710,
+, 8240,
+, 352,
+, 8249,
+, 338,
+, 65533,
+, 381,
+, 65533,
+, 65533,
+, 8216,
+, 8217,
+, 8220,
+, 8221,
+, 8226,
+, 8211,
+, 8212,
+, 732,
+, 8482,
+, 353,
+, 8250,
+, 339,
+, 65533,
+, 382,
+, 376,
+ }; # $c1_entity_char
  my $special_category = {
    address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
-Line 318 
 sub parse_string ($$$;$) {
+Line 354 
 sub parse_string ($$$;$) {
      $self->{next_input_character} = ord substr $$s, $i++, 1;
      $column++;
-     if ($self->{next_input_character} == 0x000D) { # CR
+     if ($self->{next_input_character} == 0x000A) { # LF
+       $line++;
+       $column = 0;
+     } elsif ($self->{next_input_character} == 0x000D) { # CR
        if ($i >= length $$s) {
          #
        } else {
-Line 331 
 sub parse_string ($$$;$) {
+Line 370 
 sub parse_string ($$$;$) {
        }
        $self->{next_input_character} = 0x000A; # LF # MUST
        $line++;
-       $column = -1;
+       $column = 0;
      } elsif ($self->{next_input_character} > 0x10FFFF) {
        $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
      } elsif ($self->{next_input_character} == 0x0000) { # NULL
-Line 1311 
 sub _get_next_token ($) {
+Line 1350 
 sub _get_next_token ($) {
          redo A;
        } elsif (0x0061 <= $self->{next_input_character} and
                 $self->{next_input_character} <= 0x007A) { # a..z
+ ## ISSUE: "Set the token's name name to the" in the spec
          $self->{current_token} = {type => 'DOCTYPE',
                            name => chr ($self->{next_input_character} - 0x0020),
                            error => 1};
-Line 1337 
 sub _get_next_token ($) {
+Line 1377 
 sub _get_next_token ($) {
          $self->{current_token} = {type => 'DOCTYPE',
                            name => chr ($self->{next_input_character}),
                            error => 1};
+ ## ISSUE: "Set the token's name name to the" in the spec
          $self->{state} = 'DOCTYPE name';
          !!!next-input-character;
          redo A;
-Line 1454 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1495 
 sub _tokenize_attempt_to_consume_an_enti
    if ($self->{next_input_character} == 0x0023) { # #
      !!!next-input-character;
-     my $num;
      if ($self->{next_input_character} == 0x0078 or # x
          $self->{next_input_character} == 0x0058) { # X
+       my $num;
        X: {
          my $x_char = $self->{next_input_character};
          !!!next-input-character;
-Line 1492 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1533 
 sub _tokenize_attempt_to_consume_an_enti
          }
          ## TODO: check the definition for |a valid Unicode character|.
+         ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
          if ($num > 1114111 or $num == 0) {
            $num = 0xFFFD; # REPLACEMENT CHARACTER
            ## ISSUE: Why this is not an error?
+         } elsif (0x80 <= $num and $num <= 0x9F) {
+           ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
+           ## ISSUE: Not in the spec yet; parse error?
+           $num = $c1_entity_char->{$num};
          }
          return {type => 'character', data => chr $num};
-Line 1522 
 sub _tokenize_attempt_to_consume_an_enti
+Line 1568 
 sub _tokenize_attempt_to_consume_an_enti
        if ($code > 1114111 or $code == 0) {
          $code = 0xFFFD; # REPLACEMENT CHARACTER
          ## ISSUE: Why this is not an error?
+       } elsif (0x80 <= $code and $code <= 0x9F) {
+         ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
+         ## ISSUE: Not in the spec yet; parse error?
+         $code = $c1_entity_char->{$code};
        }
        return {type => 'character', data => chr $code};
-Line 1975 
 sub _tree_construction_main ($) {
+Line 2025 
 sub _tree_construction_main ($) {
              $formatting_element_i_in_open = $_;
              last INSCOPE;
            } else { # in open elements but not in scope
-             !!!parse-error;
+             !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
              ## Ignore the token
              !!!next-token;
              return;
-Line 1988 
 sub _tree_construction_main ($) {
+Line 2038 
 sub _tree_construction_main ($) {
          }
        } # INSCOPE
        unless (defined $formatting_element_i_in_open) {
-         !!!parse-error;
+         !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
          pop @$active_formatting_elements; # $formatting_element
          !!!next-token; ## TODO: ok?
          return;
        }
        if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
-         !!!parse-error;
+         !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
        }
        ## Step 2
-Line 4807 
 sub set_inner_html ($$$) {
+Line 4857 
 sub set_inner_html ($$$) {
        $self->{next_input_character} = -1 and return if $i >= length $$s;
        $self->{next_input_character} = ord substr $$s, $i++, 1;
        $column++;
-       if ($self->{next_input_character} == 0x000D) { # CR
+       if ($self->{next_input_character} == 0x000A) { # LF
+         $line++;
+         $column = 0;
+       } elsif ($self->{next_input_character} == 0x000D) { # CR
          if ($i >= length $$s) {
            #
          } else {
-Line 4821 
 sub set_inner_html ($$$) {
+Line 4874 
 sub set_inner_html ($$$) {
          }
          $self->{next_input_character} = 0x000A; # LF # MUST
          $line++;
-         $column = -1;
+         $column = 0;
        } elsif ($self->{next_input_character} > 0x10FFFF) {
          $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
        } elsif ($self->{next_input_character} == 0x0000) { # NULL

 Legend:



Removed from v.1.3
 


changed lines


 
Added in v.1.4
 Legend:



Removed from v.1.3
 


changed lines


 
Added in v.1.4
-Removed from v.1.3
+Added in v.1.4

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24