/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.21 by wakaba, Sun Oct 19 09:25:21 2008 UTC revision 1.24 by wakaba, Sun Oct 19 14:05:20 2008 UTC
# Line 3078  sub _get_next_token ($) { Line 3078  sub _get_next_token ($) {
3078              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
3079              $self->{entity_add} => 1,              $self->{entity_add} => 1,
3080            }->{$self->{nc}}) {            }->{$self->{nc}}) {
3081          !!!cp (1001);          if ($self->{is_xml}) {
3082              !!!cp (1001.1);
3083              !!!parse-error (type => 'bare ero',
3084                              line => $self->{line_prev},
3085                              column => $self->{column_prev}
3086                                  + ($self->{nc} == -1 ? 1 : 0));
3087            } else {
3088              !!!cp (1001);
3089              ## No error
3090            }
3091          ## Don't consume          ## Don't consume
         ## No error  
3092          ## Return nothing.          ## Return nothing.
3093          #          #
3094        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
# Line 3089  sub _get_next_token ($) { Line 3097  sub _get_next_token ($) {
3097          $self->{kwd} = '#';          $self->{kwd} = '#';
3098          !!!next-input-character;          !!!next-input-character;
3099          redo A;          redo A;
3100        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
3101                   (0x0041 <= $self->{nc} and
3102                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
3103                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
3104                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
# Line 3361  sub _get_next_token ($) { Line 3370  sub _get_next_token ($) {
3370             $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x007A) or # z
3371            (0x0030 <= $self->{nc} and # 0            (0x0030 <= $self->{nc} and # 0
3372             $self->{nc} <= 0x0039) or # 9             $self->{nc} <= 0x0039) or # 9
3373            $self->{nc} == 0x003B) { # ;            $self->{nc} == 0x003B or # ;
3374              ($self->{is_xml} and
3375               not ($is_space->{$self->{nc}} or
3376                    {
3377                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
3378                      $self->{entity_add} => 1,
3379                    }->{$self->{nc}}))) {
3380          our $EntityChar;          our $EntityChar;
3381          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3382          if (defined $EntityChar->{$self->{kwd}} or          if (defined $EntityChar->{$self->{kwd}} or
# Line 3644  sub _get_next_token ($) { Line 3659  sub _get_next_token ($) {
3659          ## XML5: Not defined yet.          ## XML5: Not defined yet.
3660    
3661          ## TODO:          ## TODO:
3662    
3663            if (not $self->{stop_processing} and
3664                not $self->{document}->xml_standalone) {
3665              !!!parse-error (type => 'stop processing', ## TODO: type
3666                              level => $self->{level}->{info});
3667              $self->{stop_processing} = 1;
3668            }
3669    
3670          !!!next-input-character;          !!!next-input-character;
3671          redo A;          redo A;
3672        } elsif ($self->{nc} == 0x005D) { # ]        } elsif ($self->{nc} == 0x005D) { # ]
# Line 3878  sub _get_next_token ($) { Line 3901  sub _get_next_token ($) {
3901          }          }
3902          $self->{ct} = {type => ELEMENT_TOKEN, name => '',          $self->{ct} = {type => ELEMENT_TOKEN, name => '',
3903                         line => $self->{line_prev},                         line => $self->{line_prev},
3904                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 7};
3905          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
3906          !!!next-input-character;          !!!next-input-character;
3907          redo A;          redo A;
# Line 3926  sub _get_next_token ($) { Line 3949  sub _get_next_token ($) {
3949          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
3950                         attrdefs => [],                         attrdefs => [],
3951                         line => $self->{line_prev},                         line => $self->{line_prev},
3952                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 7};
3953          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
3954          !!!next-input-character;          !!!next-input-character;
3955          redo A;          redo A;
# Line 3975  sub _get_next_token ($) { Line 3998  sub _get_next_token ($) {
3998          }          }
3999          $self->{ct} = {type => NOTATION_TOKEN, name => '',          $self->{ct} = {type => NOTATION_TOKEN, name => '',
4000                         line => $self->{line_prev},                         line => $self->{line_prev},
4001                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 8};
4002          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
4003          !!!next-input-character;          !!!next-input-character;
4004          redo A;          redo A;
# Line 4777  sub _get_next_token ($) { Line 4800  sub _get_next_token ($) {
4800          redo A;          redo A;
4801        }        }
4802      } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
       ## TODO: XMLize  
   
4803        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
4804            {            {
4805              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4806              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4807            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4808            !!!parse-error (type => 'bare ero',
4809                            line => $self->{line_prev},
4810                            column => $self->{column_prev}
4811                                + ($self->{nc} == -1 ? 1 : 0));
4812          ## Don't consume          ## Don't consume
         ## No error  
4813          ## Return nothing.          ## Return nothing.
4814          #          #
4815        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
# Line 4794  sub _get_next_token ($) { Line 4818  sub _get_next_token ($) {
4818          $self->{kwd} = '#';          $self->{kwd} = '#';
4819          !!!next-input-character;          !!!next-input-character;
4820          redo A;          redo A;
       } elsif ((0x0041 <= $self->{nc} and  
                 $self->{nc} <= 0x005A) or # A..Z  
                (0x0061 <= $self->{nc} and  
                 $self->{nc} <= 0x007A)) { # a..z  
         #  
4821        } else {        } else {
         !!!parse-error (type => 'bare ero');  
         ## Return nothing.  
4822          #          #
4823        }        }
4824    

Legend:
Removed from v.1.21  
changed lines
  Added in v.1.24

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24