/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26 by wakaba, Thu Jul 2 21:42:43 2009 UTC revision 1.31 by wakaba, Sat Sep 5 09:26:55 2009 UTC
# Line 105  sub COMMENT_START_STATE () { 14 } Line 105  sub COMMENT_START_STATE () { 14 }
105  sub COMMENT_START_DASH_STATE () { 15 }  sub COMMENT_START_DASH_STATE () { 15 }
106  sub COMMENT_STATE () { 16 }  sub COMMENT_STATE () { 16 }
107  sub COMMENT_END_STATE () { 17 }  sub COMMENT_END_STATE () { 17 }
108    sub COMMENT_END_BANG_STATE () { 102 } ## LAST
109  sub COMMENT_END_DASH_STATE () { 18 }  sub COMMENT_END_DASH_STATE () { 18 }
110  sub BOGUS_COMMENT_STATE () { 19 }  sub BOGUS_COMMENT_STATE () { 19 }
111  sub DOCTYPE_STATE () { 20 }  sub DOCTYPE_STATE () { 20 }
# Line 1248  sub _get_next_token ($) { Line 1249  sub _get_next_token ($) {
1249          if ({          if ({
1250               0x0022 => 1, # "               0x0022 => 1, # "
1251               0x0027 => 1, # '               0x0027 => 1, # '
1252                 0x003C => 1, # <
1253               0x003D => 1, # =               0x003D => 1, # =
1254              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1255                        
# Line 1430  sub _get_next_token ($) { Line 1432  sub _get_next_token ($) {
1432    
1433          redo A;          redo A;
1434        } else {        } else {
1435          if ($self->{nc} == 0x0022 or # "          if ({
1436              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1437                 0x0027 => 1, # '
1438                 0x003C => 1, # <
1439                }->{$self->{nc}}) {
1440                        
1441            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1442            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 1602  sub _get_next_token ($) { Line 1607  sub _get_next_token ($) {
1607                        
1608          }          }
1609    
1610          if ($self->{nc} == 0x0022 or # "          if ({
1611              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1612                 0x0027 => 1, # '
1613                 0x003C => 1, # <
1614                }->{$self->{nc}}) {
1615                        
1616            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1617            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 2935  sub _get_next_token ($) { Line 2943  sub _get_next_token ($) {
2943        
2944          redo A;          redo A;
2945        }        }
2946      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE or
2947                 $self->{state} == COMMENT_END_BANG_STATE) {
2948        ## XML5: "Comment end state" and "DOCTYPE comment end state".        ## XML5: "Comment end state" and "DOCTYPE comment end state".
2949          ## (No comment end bang state.)
2950    
2951        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2952          if ($self->{in_subset}) {          if ($self->{in_subset}) {
# Line 2963  sub _get_next_token ($) { Line 2973  sub _get_next_token ($) {
2973    
2974          redo A;          redo A;
2975        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2976            if ($self->{state} == COMMENT_END_BANG_STATE) {
2977              
2978              $self->{ct}->{data} .= '--!'; # comment
2979              $self->{state} = COMMENT_END_DASH_STATE;
2980            } else {
2981              
2982              ## XML5: Not a parse error.
2983              $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2984                              line => $self->{line_prev},
2985                              column => $self->{column_prev});
2986              $self->{ct}->{data} .= '-'; # comment
2987              ## Stay in the state
2988            }
2989                    
2990          ## XML5: Not a parse error.      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2991          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',        $self->{line_prev} = $self->{line};
2992                          line => $self->{line_prev},        $self->{column_prev} = $self->{column};
2993                          column => $self->{column_prev});        $self->{column}++;
2994          $self->{ct}->{data} .= '-'; # comment        $self->{nc}
2995          ## Stay in the state            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2996        } else {
2997          $self->{set_nc}->($self);
2998        }
2999      
3000            redo A;
3001          } elsif ($self->{nc} == 0x0021 and # !
3002                   $self->{state} != COMMENT_END_BANG_STATE) {
3003            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end bang'); # XXX error type
3004            $self->{state} = COMMENT_END_BANG_STATE;
3005                    
3006      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3007        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2992  sub _get_next_token ($) { Line 3024  sub _get_next_token ($) {
3024            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
3025            $self->{s_kwd} = '';            $self->{s_kwd} = '';
3026          }          }
3027          ## reconsume          ## Reconsume.
3028    
3029          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
3030    
3031          redo A;          redo A;
3032        } else {        } else {
3033                    
3034          ## XML5: Not a parse error.          if ($self->{state} == COMMENT_END_BANG_STATE) {
3035          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',            $self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment
3036                          line => $self->{line_prev},          } else {
3037                          column => $self->{column_prev});            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3038          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          }
3039          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3040                    
3041      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3034  sub _get_next_token ($) { Line 3066  sub _get_next_token ($) {
3066      }      }
3067        
3068          redo A;          redo A;
3069          } elsif ($self->{nc} == -1) {
3070            
3071            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3072            $self->{ct}->{quirks} = 1;
3073    
3074            $self->{state} = DATA_STATE;
3075            ## Reconsume.
3076            return  ($self->{ct}); # DOCTYPE (quirks)
3077    
3078            redo A;
3079        } else {        } else {
3080                    
3081          ## XML5: Unless EOF, swith to the bogus comment state.          ## XML5: Swith to the bogus comment state.
3082          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3083          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3084          ## reconsume          ## reconsume
# Line 3081  sub _get_next_token ($) { Line 3123  sub _get_next_token ($) {
3123          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3124    
3125          redo A;          redo A;
3126          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3127            
3128            $self->{ct}->{name} # DOCTYPE
3129                = chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3130            delete $self->{ct}->{quirks};
3131            $self->{state} = DOCTYPE_NAME_STATE;
3132            
3133        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3134          $self->{line_prev} = $self->{line};
3135          $self->{column_prev} = $self->{column};
3136          $self->{column}++;
3137          $self->{nc}
3138              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3139        } else {
3140          $self->{set_nc}->($self);
3141        }
3142      
3143            redo A;
3144        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3145                    
3146          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
# Line 3167  sub _get_next_token ($) { Line 3227  sub _get_next_token ($) {
3227          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3228    
3229          redo A;          redo A;
3230          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3231            
3232            $self->{ct}->{name} # DOCTYPE
3233                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3234            delete $self->{ct}->{quirks};
3235            ## Stay in the state.
3236            
3237        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3238          $self->{line_prev} = $self->{line};
3239          $self->{column_prev} = $self->{column};
3240          $self->{column}++;
3241          $self->{nc}
3242              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3243        } else {
3244          $self->{set_nc}->($self);
3245        }
3246      
3247            redo A;
3248        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3249                    
3250          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
# Line 3198  sub _get_next_token ($) { Line 3276  sub _get_next_token ($) {
3276          redo A;          redo A;
3277        } else {        } else {
3278                    
3279          $self->{ct}->{name}          $self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE
3280            .= chr ($self->{nc}); # DOCTYPE          ## Stay in the state.
         ## Stay in the state  
3281                    
3282      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3283        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.31

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24