/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.30 by wakaba, Sun Aug 16 05:24:47 2009 UTC revision 1.32 by wakaba, Sat Sep 5 09:57:55 2009 UTC
# Line 105  sub COMMENT_START_STATE () { 14 } Line 105  sub COMMENT_START_STATE () { 14 }
105  sub COMMENT_START_DASH_STATE () { 15 }  sub COMMENT_START_DASH_STATE () { 15 }
106  sub COMMENT_STATE () { 16 }  sub COMMENT_STATE () { 16 }
107  sub COMMENT_END_STATE () { 17 }  sub COMMENT_END_STATE () { 17 }
108    sub COMMENT_END_BANG_STATE () { 102 }
109    sub COMMENT_END_SPACE_STATE () { 103 } ## LAST
110  sub COMMENT_END_DASH_STATE () { 18 }  sub COMMENT_END_DASH_STATE () { 18 }
111  sub BOGUS_COMMENT_STATE () { 19 }  sub BOGUS_COMMENT_STATE () { 19 }
112  sub DOCTYPE_STATE () { 20 }  sub DOCTYPE_STATE () { 20 }
# Line 2942  sub _get_next_token ($) { Line 2944  sub _get_next_token ($) {
2944        
2945          redo A;          redo A;
2946        }        }
2947      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE or
2948                 $self->{state} == COMMENT_END_BANG_STATE) {
2949        ## XML5: "Comment end state" and "DOCTYPE comment end state".        ## XML5: "Comment end state" and "DOCTYPE comment end state".
2950          ## (No comment end bang state.)
2951    
2952        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2953          if ($self->{in_subset}) {          if ($self->{in_subset}) {
# Line 2970  sub _get_next_token ($) { Line 2974  sub _get_next_token ($) {
2974    
2975          redo A;          redo A;
2976        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2977            if ($self->{state} == COMMENT_END_BANG_STATE) {
2978              
2979              $self->{ct}->{data} .= '--!'; # comment
2980              $self->{state} = COMMENT_END_DASH_STATE;
2981            } else {
2982              
2983              ## XML5: Not a parse error.
2984              $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2985                              line => $self->{line_prev},
2986                              column => $self->{column_prev});
2987              $self->{ct}->{data} .= '-'; # comment
2988              ## Stay in the state
2989            }
2990                    
2991          ## XML5: Not a parse error.      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2992          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',        $self->{line_prev} = $self->{line};
2993                          line => $self->{line_prev},        $self->{column_prev} = $self->{column};
2994                          column => $self->{column_prev});        $self->{column}++;
2995          $self->{ct}->{data} .= '-'; # comment        $self->{nc}
2996          ## Stay in the state            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2997        } else {
2998          $self->{set_nc}->($self);
2999        }
3000      
3001            redo A;
3002          } elsif ($self->{state} != COMMENT_END_BANG_STATE and
3003                   $is_space->{$self->{nc}}) {
3004            
3005            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end space'); # XXX error type
3006            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3007            $self->{state} = COMMENT_END_SPACE_STATE;
3008            
3009        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3010          $self->{line_prev} = $self->{line};
3011          $self->{column_prev} = $self->{column};
3012          $self->{column}++;
3013          $self->{nc}
3014              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3015        } else {
3016          $self->{set_nc}->($self);
3017        }
3018      
3019            redo A;
3020          } elsif ($self->{state} != COMMENT_END_BANG_STATE and
3021                   $self->{nc} == 0x0021) { # !
3022            
3023            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end bang'); # XXX error type
3024            $self->{state} = COMMENT_END_BANG_STATE;
3025                    
3026      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3027        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2999  sub _get_next_token ($) { Line 3044  sub _get_next_token ($) {
3044            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
3045            $self->{s_kwd} = '';            $self->{s_kwd} = '';
3046          }          }
3047          ## reconsume          ## Reconsume.
3048    
3049          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
3050    
3051          redo A;          redo A;
3052        } else {        } else {
3053                    
3054          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          if ($self->{state} == COMMENT_END_BANG_STATE) {
3055              $self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment
3056            } else {
3057              $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3058            }
3059          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3060                    
3061      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3021  sub _get_next_token ($) { Line 3070  sub _get_next_token ($) {
3070        
3071          redo A;          redo A;
3072        }        }
3073        } elsif ($self->{state} == COMMENT_END_SPACE_STATE) {
3074          ## XML5: Not exist.
3075    
3076          if ($self->{nc} == 0x003E) { # >
3077            if ($self->{in_subset}) {
3078              
3079              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3080            } else {
3081              
3082              $self->{state} = DATA_STATE;
3083              $self->{s_kwd} = '';
3084            }
3085            
3086        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3087          $self->{line_prev} = $self->{line};
3088          $self->{column_prev} = $self->{column};
3089          $self->{column}++;
3090          $self->{nc}
3091              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3092        } else {
3093          $self->{set_nc}->($self);
3094        }
3095      
3096    
3097            return  ($self->{ct}); # comment
3098    
3099            redo A;
3100          } elsif ($is_space->{$self->{nc}}) {
3101            
3102            $self->{ct}->{data} .= chr ($self->{nc}); # comment
3103            ## Stay in the state.
3104            
3105        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3106          $self->{line_prev} = $self->{line};
3107          $self->{column_prev} = $self->{column};
3108          $self->{column}++;
3109          $self->{nc}
3110              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3111        } else {
3112          $self->{set_nc}->($self);
3113        }
3114      
3115            redo A;
3116          } elsif ($self->{nc} == -1) {
3117            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
3118            if ($self->{in_subset}) {
3119              
3120              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3121            } else {
3122              
3123              $self->{state} = DATA_STATE;
3124              $self->{s_kwd} = '';
3125            }
3126            ## Reconsume.
3127    
3128            return  ($self->{ct}); # comment
3129    
3130            redo A;
3131          } else {
3132            
3133            $self->{ct}->{data} .= chr ($self->{nc}); # comment
3134            $self->{state} = COMMENT_STATE;
3135            
3136        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3137          $self->{line_prev} = $self->{line};
3138          $self->{column_prev} = $self->{column};
3139          $self->{column}++;
3140          $self->{nc}
3141              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3142        } else {
3143          $self->{set_nc}->($self);
3144        }
3145      
3146            redo A;
3147          }
3148      } elsif ($self->{state} == DOCTYPE_STATE) {      } elsif ($self->{state} == DOCTYPE_STATE) {
3149        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3150                    

Legend:
Removed from v.1.30  
changed lines
  Added in v.1.32

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24