/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26 by wakaba, Thu Jul 2 21:42:43 2009 UTC revision 1.30 by wakaba, Sun Aug 16 05:24:47 2009 UTC
# Line 1248  sub _get_next_token ($) { Line 1248  sub _get_next_token ($) {
1248          if ({          if ({
1249               0x0022 => 1, # "               0x0022 => 1, # "
1250               0x0027 => 1, # '               0x0027 => 1, # '
1251                 0x003C => 1, # <
1252               0x003D => 1, # =               0x003D => 1, # =
1253              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1254                        
# Line 1430  sub _get_next_token ($) { Line 1431  sub _get_next_token ($) {
1431    
1432          redo A;          redo A;
1433        } else {        } else {
1434          if ($self->{nc} == 0x0022 or # "          if ({
1435              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1436                 0x0027 => 1, # '
1437                 0x003C => 1, # <
1438                }->{$self->{nc}}) {
1439                        
1440            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1441            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 1602  sub _get_next_token ($) { Line 1606  sub _get_next_token ($) {
1606                        
1607          }          }
1608    
1609          if ($self->{nc} == 0x0022 or # "          if ({
1610              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1611                 0x0027 => 1, # '
1612                 0x003C => 1, # <
1613                }->{$self->{nc}}) {
1614                        
1615            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1616            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 2999  sub _get_next_token ($) { Line 3006  sub _get_next_token ($) {
3006          redo A;          redo A;
3007        } else {        } else {
3008                    
         ## XML5: Not a parse error.  
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',  
                         line => $self->{line_prev},  
                         column => $self->{column_prev});  
3009          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3010          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3011                    
# Line 3034  sub _get_next_token ($) { Line 3037  sub _get_next_token ($) {
3037      }      }
3038        
3039          redo A;          redo A;
3040          } elsif ($self->{nc} == -1) {
3041            
3042            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3043            $self->{ct}->{quirks} = 1;
3044    
3045            $self->{state} = DATA_STATE;
3046            ## Reconsume.
3047            return  ($self->{ct}); # DOCTYPE (quirks)
3048    
3049            redo A;
3050        } else {        } else {
3051                    
3052          ## XML5: Unless EOF, swith to the bogus comment state.          ## XML5: Swith to the bogus comment state.
3053          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3054          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3055          ## reconsume          ## reconsume
# Line 3081  sub _get_next_token ($) { Line 3094  sub _get_next_token ($) {
3094          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3095    
3096          redo A;          redo A;
3097          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3098            
3099            $self->{ct}->{name} # DOCTYPE
3100                = chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3101            delete $self->{ct}->{quirks};
3102            $self->{state} = DOCTYPE_NAME_STATE;
3103            
3104        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3105          $self->{line_prev} = $self->{line};
3106          $self->{column_prev} = $self->{column};
3107          $self->{column}++;
3108          $self->{nc}
3109              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3110        } else {
3111          $self->{set_nc}->($self);
3112        }
3113      
3114            redo A;
3115        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3116                    
3117          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
# Line 3167  sub _get_next_token ($) { Line 3198  sub _get_next_token ($) {
3198          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3199    
3200          redo A;          redo A;
3201          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3202            
3203            $self->{ct}->{name} # DOCTYPE
3204                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3205            delete $self->{ct}->{quirks};
3206            ## Stay in the state.
3207            
3208        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3209          $self->{line_prev} = $self->{line};
3210          $self->{column_prev} = $self->{column};
3211          $self->{column}++;
3212          $self->{nc}
3213              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3214        } else {
3215          $self->{set_nc}->($self);
3216        }
3217      
3218            redo A;
3219        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3220                    
3221          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
# Line 3198  sub _get_next_token ($) { Line 3247  sub _get_next_token ($) {
3247          redo A;          redo A;
3248        } else {        } else {
3249                    
3250          $self->{ct}->{name}          $self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE
3251            .= chr ($self->{nc}); # DOCTYPE          ## Stay in the state.
         ## Stay in the state  
3252                    
3253      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3254        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.30

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24