/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3 by wakaba, Wed May 2 13:44:34 2007 UTC revision 1.4 by wakaba, Fri May 4 09:16:04 2007 UTC
# Line 278  my $entity_char = { Line 278  my $entity_char = {
278    zeta => "\x{03B6}",    zeta => "\x{03B6}",
279    zwj => "\x{200D}",    zwj => "\x{200D}",
280    zwnj => "\x{200C}",    zwnj => "\x{200C}",
281  };  }; # $entity_char
282    
283    ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
284    my $c1_entity_char = {
285         128, 8364,
286         129, 65533,
287         130, 8218,
288         131, 402,
289         132, 8222,
290         133, 8230,
291         134, 8224,
292         135, 8225,
293         136, 710,
294         137, 8240,
295         138, 352,
296         139, 8249,
297         140, 338,
298         141, 65533,
299         142, 381,
300         143, 65533,
301         144, 65533,
302         145, 8216,
303         146, 8217,
304         147, 8220,
305         148, 8221,
306         149, 8226,
307         150, 8211,
308         151, 8212,
309         152, 732,
310         153, 8482,
311         154, 353,
312         155, 8250,
313         156, 339,
314         157, 65533,
315         158, 382,
316         159, 376,
317    }; # $c1_entity_char
318    
319  my $special_category = {  my $special_category = {
320    address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,    address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
# Line 318  sub parse_string ($$$;$) { Line 354  sub parse_string ($$$;$) {
354      $self->{next_input_character} = ord substr $$s, $i++, 1;      $self->{next_input_character} = ord substr $$s, $i++, 1;
355      $column++;      $column++;
356            
357      if ($self->{next_input_character} == 0x000D) { # CR      if ($self->{next_input_character} == 0x000A) { # LF
358          $line++;
359          $column = 0;
360        } elsif ($self->{next_input_character} == 0x000D) { # CR
361        if ($i >= length $$s) {        if ($i >= length $$s) {
362          #          #
363        } else {        } else {
# Line 331  sub parse_string ($$$;$) { Line 370  sub parse_string ($$$;$) {
370        }        }
371        $self->{next_input_character} = 0x000A; # LF # MUST        $self->{next_input_character} = 0x000A; # LF # MUST
372        $line++;        $line++;
373        $column = -1;        $column = 0;
374      } elsif ($self->{next_input_character} > 0x10FFFF) {      } elsif ($self->{next_input_character} > 0x10FFFF) {
375        $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST        $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
376      } elsif ($self->{next_input_character} == 0x0000) { # NULL      } elsif ($self->{next_input_character} == 0x0000) { # NULL
# Line 1311  sub _get_next_token ($) { Line 1350  sub _get_next_token ($) {
1350          redo A;          redo A;
1351        } elsif (0x0061 <= $self->{next_input_character} and        } elsif (0x0061 <= $self->{next_input_character} and
1352                 $self->{next_input_character} <= 0x007A) { # a..z                 $self->{next_input_character} <= 0x007A) { # a..z
1353    ## ISSUE: "Set the token's name name to the" in the spec
1354          $self->{current_token} = {type => 'DOCTYPE',          $self->{current_token} = {type => 'DOCTYPE',
1355                            name => chr ($self->{next_input_character} - 0x0020),                            name => chr ($self->{next_input_character} - 0x0020),
1356                            error => 1};                            error => 1};
# Line 1337  sub _get_next_token ($) { Line 1377  sub _get_next_token ($) {
1377          $self->{current_token} = {type => 'DOCTYPE',          $self->{current_token} = {type => 'DOCTYPE',
1378                            name => chr ($self->{next_input_character}),                            name => chr ($self->{next_input_character}),
1379                            error => 1};                            error => 1};
1380    ## ISSUE: "Set the token's name name to the" in the spec
1381          $self->{state} = 'DOCTYPE name';          $self->{state} = 'DOCTYPE name';
1382          !!!next-input-character;          !!!next-input-character;
1383          redo A;          redo A;
# Line 1454  sub _tokenize_attempt_to_consume_an_enti Line 1495  sub _tokenize_attempt_to_consume_an_enti
1495        
1496    if ($self->{next_input_character} == 0x0023) { # #    if ($self->{next_input_character} == 0x0023) { # #
1497      !!!next-input-character;      !!!next-input-character;
     my $num;  
1498      if ($self->{next_input_character} == 0x0078 or # x      if ($self->{next_input_character} == 0x0078 or # x
1499          $self->{next_input_character} == 0x0058) { # X          $self->{next_input_character} == 0x0058) { # X
1500          my $num;
1501        X: {        X: {
1502          my $x_char = $self->{next_input_character};          my $x_char = $self->{next_input_character};
1503          !!!next-input-character;          !!!next-input-character;
# Line 1492  sub _tokenize_attempt_to_consume_an_enti Line 1533  sub _tokenize_attempt_to_consume_an_enti
1533          }          }
1534    
1535          ## TODO: check the definition for |a valid Unicode character|.          ## TODO: check the definition for |a valid Unicode character|.
1536            ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1537          if ($num > 1114111 or $num == 0) {          if ($num > 1114111 or $num == 0) {
1538            $num = 0xFFFD; # REPLACEMENT CHARACTER            $num = 0xFFFD; # REPLACEMENT CHARACTER
1539            ## ISSUE: Why this is not an error?            ## ISSUE: Why this is not an error?
1540            } elsif (0x80 <= $num and $num <= 0x9F) {
1541              ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
1542              ## ISSUE: Not in the spec yet; parse error?
1543              $num = $c1_entity_char->{$num};
1544          }          }
1545    
1546          return {type => 'character', data => chr $num};          return {type => 'character', data => chr $num};
# Line 1522  sub _tokenize_attempt_to_consume_an_enti Line 1568  sub _tokenize_attempt_to_consume_an_enti
1568        if ($code > 1114111 or $code == 0) {        if ($code > 1114111 or $code == 0) {
1569          $code = 0xFFFD; # REPLACEMENT CHARACTER          $code = 0xFFFD; # REPLACEMENT CHARACTER
1570          ## ISSUE: Why this is not an error?          ## ISSUE: Why this is not an error?
1571          } elsif (0x80 <= $code and $code <= 0x9F) {
1572            ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
1573            ## ISSUE: Not in the spec yet; parse error?
1574            $code = $c1_entity_char->{$code};
1575        }        }
1576                
1577        return {type => 'character', data => chr $code};        return {type => 'character', data => chr $code};
# Line 1975  sub _tree_construction_main ($) { Line 2025  sub _tree_construction_main ($) {
2025              $formatting_element_i_in_open = $_;              $formatting_element_i_in_open = $_;
2026              last INSCOPE;              last INSCOPE;
2027            } else { # in open elements but not in scope            } else { # in open elements but not in scope
2028              !!!parse-error;              !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2029              ## Ignore the token              ## Ignore the token
2030              !!!next-token;              !!!next-token;
2031              return;              return;
# Line 1988  sub _tree_construction_main ($) { Line 2038  sub _tree_construction_main ($) {
2038          }          }
2039        } # INSCOPE        } # INSCOPE
2040        unless (defined $formatting_element_i_in_open) {        unless (defined $formatting_element_i_in_open) {
2041          !!!parse-error;          !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2042          pop @$active_formatting_elements; # $formatting_element          pop @$active_formatting_elements; # $formatting_element
2043          !!!next-token; ## TODO: ok?          !!!next-token; ## TODO: ok?
2044          return;          return;
2045        }        }
2046        if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {        if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2047          !!!parse-error;          !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2048        }        }
2049                
2050        ## Step 2        ## Step 2
# Line 4807  sub set_inner_html ($$$) { Line 4857  sub set_inner_html ($$$) {
4857        $self->{next_input_character} = -1 and return if $i >= length $$s;        $self->{next_input_character} = -1 and return if $i >= length $$s;
4858        $self->{next_input_character} = ord substr $$s, $i++, 1;        $self->{next_input_character} = ord substr $$s, $i++, 1;
4859        $column++;        $column++;
4860          
4861        if ($self->{next_input_character} == 0x000D) { # CR        if ($self->{next_input_character} == 0x000A) { # LF
4862            $line++;
4863            $column = 0;
4864          } elsif ($self->{next_input_character} == 0x000D) { # CR
4865          if ($i >= length $$s) {          if ($i >= length $$s) {
4866            #            #
4867          } else {          } else {
# Line 4821  sub set_inner_html ($$$) { Line 4874  sub set_inner_html ($$$) {
4874          }          }
4875          $self->{next_input_character} = 0x000A; # LF # MUST          $self->{next_input_character} = 0x000A; # LF # MUST
4876          $line++;          $line++;
4877          $column = -1;          $column = 0;
4878        } elsif ($self->{next_input_character} > 0x10FFFF) {        } elsif ($self->{next_input_character} > 0x10FFFF) {
4879          $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST          $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4880        } elsif ($self->{next_input_character} == 0x0000) { # NULL        } elsif ($self->{next_input_character} == 0x0000) { # NULL

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.4

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24