/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.117 by wakaba, Wed Mar 19 23:43:47 2008 UTC revision 1.118 by wakaba, Thu Mar 20 01:34:00 2008 UTC
# Line 466  sub _get_next_token ($) { Line 466  sub _get_next_token ($) {
466        # Anything else        # Anything else
467        my $token = {type => CHARACTER_TOKEN,        my $token = {type => CHARACTER_TOKEN,
468                     data => chr $self->{next_char},                     data => chr $self->{next_char},
469                     line => $self->{line}, column => $self->{column}};                     #line => $self->{line}, column => $self->{column},
470                      };
471        ## Stay in the data state        ## Stay in the data state
472        !!!next-input-character;        !!!next-input-character;
473    
# Line 476  sub _get_next_token ($) { Line 477  sub _get_next_token ($) {
477      } elsif ($self->{state} == ENTITY_DATA_STATE) {      } elsif ($self->{state} == ENTITY_DATA_STATE) {
478        ## (cannot happen in CDATA state)        ## (cannot happen in CDATA state)
479    
480        my ($l, $c) = ($self->{line_prev}, $self->{column_prev});        #my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
481                
482        my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);        my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
483    
# Line 486  sub _get_next_token ($) { Line 487  sub _get_next_token ($) {
487        unless (defined $token) {        unless (defined $token) {
488          !!!cp (13);          !!!cp (13);
489          !!!emit ({type => CHARACTER_TOKEN, data => '&',          !!!emit ({type => CHARACTER_TOKEN, data => '&',
490                    line => $l, column => $c});                    #line => $l, column => $c,
491                     });
492        } else {        } else {
493          !!!cp (14);          !!!cp (14);
494          !!!emit ($token);          !!!emit ($token);
# Line 506  sub _get_next_token ($) { Line 508  sub _get_next_token ($) {
508            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
509    
510            !!!emit ({type => CHARACTER_TOKEN, data => '<',            !!!emit ({type => CHARACTER_TOKEN, data => '<',
511                      line => $self->{line_prev},                      #line => $self->{line_prev},
512                      column => $self->{column_prev}});                      #column => $self->{column_prev},
513                       });
514    
515            redo A;            redo A;
516          }          }
# Line 552  sub _get_next_token ($) { Line 555  sub _get_next_token ($) {
555            !!!next-input-character;            !!!next-input-character;
556    
557            !!!emit ({type => CHARACTER_TOKEN, data => '<>',            !!!emit ({type => CHARACTER_TOKEN, data => '<>',
558                      line => $self->{line_prev},                      #line => $self->{line_prev},
559                      column => $self->{column_prev}});                      #column => $self->{column_prev},
560                       });
561    
562            redo A;            redo A;
563          } elsif ($self->{next_char} == 0x003F) { # ?          } elsif ($self->{next_char} == 0x003F) { # ?
# Line 563  sub _get_next_token ($) { Line 567  sub _get_next_token ($) {
567                            column => $self->{column_prev});                            column => $self->{column_prev});
568            $self->{state} = BOGUS_COMMENT_STATE;            $self->{state} = BOGUS_COMMENT_STATE;
569            $self->{current_token} = {type => COMMENT_TOKEN, data => '',            $self->{current_token} = {type => COMMENT_TOKEN, data => '',
570                                      line => $self->{line_prev},                                      #line => $self->{line_prev},
571                                      column => $self->{column_prev}};                                      #column => $self->{column_prev},
572                                       };
573            ## $self->{next_char} is intentionally left as is            ## $self->{next_char} is intentionally left as is
574            redo A;            redo A;
575          } else {          } else {
# Line 574  sub _get_next_token ($) { Line 579  sub _get_next_token ($) {
579            ## reconsume            ## reconsume
580    
581            !!!emit ({type => CHARACTER_TOKEN, data => '<',            !!!emit ({type => CHARACTER_TOKEN, data => '<',
582                      line => $self->{line_prev},                      #line => $self->{line_prev},
583                      column => $self->{column_prev}});                      #column => $self->{column_prev},
584                       });
585    
586            redo A;            redo A;
587          }          }
# Line 604  sub _get_next_token ($) { Line 610  sub _get_next_token ($) {
610                $self->{state} = DATA_STATE;                $self->{state} = DATA_STATE;
611    
612                !!!emit ({type => CHARACTER_TOKEN, data => '</',                !!!emit ({type => CHARACTER_TOKEN, data => '</',
613                          line => $l, column => $c});                          #line => $l, column => $c,
614                           });
615        
616                redo A;                redo A;
617              }              }
# Line 624  sub _get_next_token ($) { Line 631  sub _get_next_token ($) {
631              !!!back-next-input-character (@next_char);              !!!back-next-input-character (@next_char);
632              $self->{state} = DATA_STATE;              $self->{state} = DATA_STATE;
633              !!!emit ({type => CHARACTER_TOKEN, data => '</',              !!!emit ({type => CHARACTER_TOKEN, data => '</',
634                        line => $l, column => $c});                        #line => $l, column => $c,
635                         });
636              redo A;              redo A;
637            } else {            } else {
638              !!!cp (27);              !!!cp (27);
# Line 638  sub _get_next_token ($) { Line 646  sub _get_next_token ($) {
646            # next-input-character is already done            # next-input-character is already done
647            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
648            !!!emit ({type => CHARACTER_TOKEN, data => '</',            !!!emit ({type => CHARACTER_TOKEN, data => '</',
649                      line => $l, column => $c});                      #line => $l, column => $c,
650                       });
651            redo A;            redo A;
652          }          }
653        }        }
# Line 677  sub _get_next_token ($) { Line 686  sub _get_next_token ($) {
686          # reconsume          # reconsume
687    
688          !!!emit ({type => CHARACTER_TOKEN, data => '</',          !!!emit ({type => CHARACTER_TOKEN, data => '</',
689                    line => $l, column => $c});                    #line => $l, column => $c,
690                     });
691    
692          redo A;          redo A;
693        } else {        } else {
# Line 685  sub _get_next_token ($) { Line 695  sub _get_next_token ($) {
695          !!!parse-error (type => 'bogus end tag');          !!!parse-error (type => 'bogus end tag');
696          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
697          $self->{current_token} = {type => COMMENT_TOKEN, data => '',          $self->{current_token} = {type => COMMENT_TOKEN, data => '',
698                                    line => $self->{line_prev}, # "<" of "</"                                    #line => $self->{line_prev}, # "<" of "</"
699                                    column => $self->{column_prev} - 1};                                    #column => $self->{column_prev} - 1,
700                                     };
701          ## $self->{next_char} is intentionally left as is          ## $self->{next_char} is intentionally left as is
702          redo A;          redo A;
703        }        }
# Line 1425  sub _get_next_token ($) { Line 1436  sub _get_next_token ($) {
1436      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1437        ## (only happen if PCDATA state)        ## (only happen if PCDATA state)
1438    
1439        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);        #my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1440    
1441        my @next_char;        my @next_char;
1442        push @next_char, $self->{next_char};        push @next_char, $self->{next_char};
# Line 1436  sub _get_next_token ($) { Line 1447  sub _get_next_token ($) {
1447          if ($self->{next_char} == 0x002D) { # -          if ($self->{next_char} == 0x002D) { # -
1448            !!!cp (127);            !!!cp (127);
1449            $self->{current_token} = {type => COMMENT_TOKEN, data => '',            $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1450                                      line => $l, column => $c};                                      #line => $l, column => $c,
1451                                       };
1452            $self->{state} = COMMENT_START_STATE;            $self->{state} = COMMENT_START_STATE;
1453            !!!next-input-character;            !!!next-input-character;
1454            redo A;            redo A;
# Line 1474  sub _get_next_token ($) { Line 1486  sub _get_next_token ($) {
1486                      $self->{state} = DOCTYPE_STATE;                      $self->{state} = DOCTYPE_STATE;
1487                      $self->{current_token} = {type => DOCTYPE_TOKEN,                      $self->{current_token} = {type => DOCTYPE_TOKEN,
1488                                                quirks => 1,                                                quirks => 1,
1489                                                line => $l, column => $c};                                                #line => $l, column => $c,
1490                                                 };
1491                      !!!next-input-character;                      !!!next-input-character;
1492                      redo A;                      redo A;
1493                    } else {                    } else {
# Line 1504  sub _get_next_token ($) { Line 1517  sub _get_next_token ($) {
1517        !!!back-next-input-character (@next_char);        !!!back-next-input-character (@next_char);
1518        $self->{state} = BOGUS_COMMENT_STATE;        $self->{state} = BOGUS_COMMENT_STATE;
1519        $self->{current_token} = {type => COMMENT_TOKEN, data => '',        $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1520                                  line => $l, column => $c};                                  #line => $l, column => $c,
1521                                   };
1522        redo A;        redo A;
1523                
1524        ## ISSUE: typos in spec: chacacters, is is a parse error        ## ISSUE: typos in spec: chacacters, is is a parse error
# Line 2302  sub _tokenize_attempt_to_consume_an_enti Line 2316  sub _tokenize_attempt_to_consume_an_enti
2316          }          }
2317    
2318          return {type => CHARACTER_TOKEN, data => chr $code,          return {type => CHARACTER_TOKEN, data => chr $code,
2319                  has_reference => 1, line => $l, column => $c};                  has_reference => 1,
2320                    #line => $l, column => $c,
2321                   };
2322        } # X        } # X
2323      } elsif (0x0030 <= $self->{next_char} and      } elsif (0x0030 <= $self->{next_char} and
2324               $self->{next_char} <= 0x0039) { # 0..9               $self->{next_char} <= 0x0039) { # 0..9
# Line 2345  sub _tokenize_attempt_to_consume_an_enti Line 2361  sub _tokenize_attempt_to_consume_an_enti
2361        }        }
2362                
2363        return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,        return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2364                line => $l, column => $c};                #line => $l, column => $c,
2365                 };
2366      } else {      } else {
2367        !!!cp (1019);        !!!cp (1019);
2368        !!!parse-error (type => 'bare nero', line => $l, column => $c);        !!!parse-error (type => 'bare nero', line => $l, column => $c);
# Line 2399  sub _tokenize_attempt_to_consume_an_enti Line 2416  sub _tokenize_attempt_to_consume_an_enti
2416      if ($match > 0) {      if ($match > 0) {
2417        !!!cp (1023);        !!!cp (1023);
2418        return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,        return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2419                line => $l, column => $c};                #line => $l, column => $c,
2420                 };
2421      } elsif ($match < 0) {      } elsif ($match < 0) {
2422        !!!parse-error (type => 'no refc', line => $l, column => $c);        !!!parse-error (type => 'no refc', line => $l, column => $c);
2423        if ($in_attr and $match < -1) {        if ($in_attr and $match < -1) {
2424          !!!cp (1024);          !!!cp (1024);
2425          return {type => CHARACTER_TOKEN, data => '&'.$entity_name,          return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2426                  line => $l, column => $c};                  #line => $l, column => $c,
2427                   };
2428        } else {        } else {
2429          !!!cp (1025);          !!!cp (1025);
2430          return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,          return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2431                  line => $l, column => $c};                  #line => $l, column => $c,
2432                   };
2433        }        }
2434      } else {      } else {
2435        !!!cp (1026);        !!!cp (1026);
2436        !!!parse-error (type => 'bare ero', line => $l, column => $c);        !!!parse-error (type => 'bare ero', line => $l, column => $c);
2437        ## NOTE: "No characters are consumed" in the spec.        ## NOTE: "No characters are consumed" in the spec.
2438        return {type => CHARACTER_TOKEN, data => '&'.$value,        return {type => CHARACTER_TOKEN, data => '&'.$value,
2439                line => $l, column => $c};                #line => $l, column => $c,
2440                 };
2441      }      }
2442    } else {    } else {
2443      !!!cp (1027);      !!!cp (1027);
# Line 2727  sub _tree_construction_root_element ($) Line 2748  sub _tree_construction_root_element ($)
2748              !!!cp ('t24');              !!!cp ('t24');
2749              $self->{application_cache_selection}              $self->{application_cache_selection}
2750                  ->($token->{attributes}->{manifest}->{value});                  ->($token->{attributes}->{manifest}->{value});
2751              ## ISSUE: No relative reference resolution?              ## ISSUE: Spec is unclear on relative references.
2752                ## According to Hixie (#whatwg 2008-03-19), it should be
2753                ## resolved against the base URI of the document in HTML
2754                ## or xml:base of the element in XHTML.
2755            } else {            } else {
2756              !!!cp ('t25');              !!!cp ('t25');
2757              $self->{application_cache_selection}->(undef);              $self->{application_cache_selection}->(undef);
# Line 5908  sub _tree_construction_main ($) { Line 5932  sub _tree_construction_main ($) {
5932            if ($prompt_attr) {            if ($prompt_attr) {
5933              !!!cp ('t390');              !!!cp ('t390');
5934              push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},              push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
5935                             line => $token->{line}, column => $token->{column}};                             #line => $token->{line}, column => $token->{column},
5936                              };
5937            } else {            } else {
5938              !!!cp ('t391');              !!!cp ('t391');
5939              push @tokens, {type => CHARACTER_TOKEN,              push @tokens, {type => CHARACTER_TOKEN,
5940                             data => 'This is a searchable index. Insert your search keywords here: ',                             data => 'This is a searchable index. Insert your search keywords here: ',
5941                             line => $token->{line}, column => $token->{column}}; # SHOULD                             #line => $token->{line}, column => $token->{column},
5942                              }; # SHOULD
5943              ## TODO: make this configurable              ## TODO: make this configurable
5944            }            }
5945            push @tokens,            push @tokens,

Legend:
Removed from v.1.117  
changed lines
  Added in v.1.118

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24