/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26 by wakaba, Thu Jul 2 21:42:43 2009 UTC revision 1.34 by wakaba, Sat Sep 5 11:31:58 2009 UTC
# Line 105  sub COMMENT_START_STATE () { 14 } Line 105  sub COMMENT_START_STATE () { 14 }
105  sub COMMENT_START_DASH_STATE () { 15 }  sub COMMENT_START_DASH_STATE () { 15 }
106  sub COMMENT_STATE () { 16 }  sub COMMENT_STATE () { 16 }
107  sub COMMENT_END_STATE () { 17 }  sub COMMENT_END_STATE () { 17 }
108    sub COMMENT_END_BANG_STATE () { 102 }
109    sub COMMENT_END_SPACE_STATE () { 103 } ## LAST
110  sub COMMENT_END_DASH_STATE () { 18 }  sub COMMENT_END_DASH_STATE () { 18 }
111  sub BOGUS_COMMENT_STATE () { 19 }  sub BOGUS_COMMENT_STATE () { 19 }
112  sub DOCTYPE_STATE () { 20 }  sub DOCTYPE_STATE () { 20 }
# Line 204  sub FOREIGN_EL () { 0b1_00000000000 } Line 206  sub FOREIGN_EL () { 0b1_00000000000 }
206  ## Character reference mappings  ## Character reference mappings
207    
208  my $charref_map = {  my $charref_map = {
209      0x00 => 0xFFFD, # REPLACEMENT CHARACTER
210    0x0D => 0x000A,    0x0D => 0x000A,
211    0x80 => 0x20AC,    0x80 => 0x20AC,
212    0x81 => 0xFFFD,    0x81 => 0x0081,
213    0x82 => 0x201A,    0x82 => 0x201A,
214    0x83 => 0x0192,    0x83 => 0x0192,
215    0x84 => 0x201E,    0x84 => 0x201E,
# Line 218  my $charref_map = { Line 221  my $charref_map = {
221    0x8A => 0x0160,    0x8A => 0x0160,
222    0x8B => 0x2039,    0x8B => 0x2039,
223    0x8C => 0x0152,    0x8C => 0x0152,
224    0x8D => 0xFFFD,    0x8D => 0x008D,
225    0x8E => 0x017D,    0x8E => 0x017D,
226    0x8F => 0xFFFD,    0x8F => 0x008F,
227    0x90 => 0xFFFD,    0x90 => 0x0090,
228    0x91 => 0x2018,    0x91 => 0x2018,
229    0x92 => 0x2019,    0x92 => 0x2019,
230    0x93 => 0x201C,    0x93 => 0x201C,
# Line 234  my $charref_map = { Line 237  my $charref_map = {
237    0x9A => 0x0161,    0x9A => 0x0161,
238    0x9B => 0x203A,    0x9B => 0x203A,
239    0x9C => 0x0153,    0x9C => 0x0153,
240    0x9D => 0xFFFD,    0x9D => 0x009D,
241    0x9E => 0x017E,    0x9E => 0x017E,
242    0x9F => 0x0178,    0x9F => 0x0178,
243  }; # $charref_map  }; # $charref_map
244  $charref_map->{$_} = 0xFFFD  $charref_map->{$_} = $_
245      for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F,      for 0x0001..0x0008, 0x000B, 0x000E..0x001F, 0x007F,
246          0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF          0xD800..0xDFFF, 0xFDD0..0xFDEF,
247          0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,          0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
248          0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,          0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
249          0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,          0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
# Line 1100  sub _get_next_token ($) { Line 1103  sub _get_next_token ($) {
1103          $self->{s_kwd} = '';          $self->{s_kwd} = '';
1104          # reconsume          # reconsume
1105    
1106          return  ($self->{ct}); # start tag or end tag          ## Discard the token.
1107            #return  ($self->{ct}); # start tag or end tag
1108    
1109          redo A;          redo A;
1110        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
# Line 1241  sub _get_next_token ($) { Line 1245  sub _get_next_token ($) {
1245          $self->{s_kwd} = '';          $self->{s_kwd} = '';
1246          # reconsume          # reconsume
1247    
1248          return  ($self->{ct}); # start tag or end tag          ## Discard the token.
1249            #return  ($self->{ct}); # start tag or end tag
1250    
1251          redo A;          redo A;
1252        } else {        } else {
1253          if ({          if ({
1254               0x0022 => 1, # "               0x0022 => 1, # "
1255               0x0027 => 1, # '               0x0027 => 1, # '
1256                 0x003C => 1, # <
1257               0x003D => 1, # =               0x003D => 1, # =
1258              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1259                        
# Line 1426  sub _get_next_token ($) { Line 1432  sub _get_next_token ($) {
1432          $self->{s_kwd} = '';          $self->{s_kwd} = '';
1433          # reconsume          # reconsume
1434    
1435          return  ($self->{ct}); # start tag or end tag          ## Discard the token.
1436            #return  ($self->{ct}); # start tag or end tag
1437    
1438          redo A;          redo A;
1439        } else {        } else {
1440          if ($self->{nc} == 0x0022 or # "          if ({
1441              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1442                 0x0027 => 1, # '
1443                 0x003C => 1, # <
1444                }->{$self->{nc}}) {
1445                        
1446            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1447            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 1590  sub _get_next_token ($) { Line 1600  sub _get_next_token ($) {
1600          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1601          # reconsume          # reconsume
1602    
1603          return  ($self->{ct}); # start tag or end tag          ## Discard the token.
1604            #return  ($self->{ct}); # start tag or end tag
1605    
1606          redo A;          redo A;
1607        } else {        } else {
# Line 1602  sub _get_next_token ($) { Line 1613  sub _get_next_token ($) {
1613                        
1614          }          }
1615    
1616          if ($self->{nc} == 0x0022 or # "          if ({
1617              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1618                 0x0027 => 1, # '
1619                 0x003C => 1, # <
1620                }->{$self->{nc}}) {
1621                        
1622            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1623            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 1736  sub _get_next_token ($) { Line 1750  sub _get_next_token ($) {
1750          $self->{s_kwd} = '';          $self->{s_kwd} = '';
1751          ## reconsume          ## reconsume
1752    
1753          return  ($self->{ct}); # start tag or end tag          ## Discard the token.
1754            #return  ($self->{ct}); # start tag or end tag
1755    
1756          redo A;          redo A;
1757        } else {        } else {
# Line 1857  sub _get_next_token ($) { Line 1872  sub _get_next_token ($) {
1872            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
1873            $self->{s_kwd} = '';            $self->{s_kwd} = '';
1874            ## reconsume            ## reconsume
1875            return  ($self->{ct}); # end tag  
1876              ## Discard the token.
1877              #return  ($self->{ct}); # end tag
1878    
1879            redo A;            redo A;
1880          } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {          } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1881            ## XML5: No parse error above; not defined yet.            ## XML5: No parse error above; not defined yet.
1882            push @{$self->{ct}->{attrdefs}}, $self->{ca};            push @{$self->{ct}->{attrdefs}}, $self->{ca};
1883            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1884            ## Reconsume.            ## Reconsume.
1885            return  ($self->{ct}); # ATTLIST  
1886              ## Discard the token.
1887              #return  ($self->{ct}); # ATTLIST
1888    
1889            redo A;            redo A;
1890          } else {          } else {
1891            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
# Line 1973  sub _get_next_token ($) { Line 1994  sub _get_next_token ($) {
1994            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
1995            $self->{s_kwd} = '';            $self->{s_kwd} = '';
1996            ## reconsume            ## reconsume
1997            return  ($self->{ct}); # start tag  
1998              ## Discard the token.
1999              #return  ($self->{ct}); # start tag
2000    
2001            redo A;            redo A;
2002          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2003            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1988  sub _get_next_token ($) { Line 2012  sub _get_next_token ($) {
2012            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
2013            $self->{s_kwd} = '';            $self->{s_kwd} = '';
2014            ## reconsume            ## reconsume
2015            return  ($self->{ct}); # end tag  
2016              ## Discard the token.
2017              #return  ($self->{ct}); # end tag
2018    
2019            redo A;            redo A;
2020          } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {          } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2021            ## XML5: No parse error above; not defined yet.            ## XML5: No parse error above; not defined yet.
2022            push @{$self->{ct}->{attrdefs}}, $self->{ca};            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2023            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2024            ## Reconsume.            ## Reconsume.
2025            return  ($self->{ct}); # ATTLIST  
2026              ## Discard the token.
2027              #return  ($self->{ct}); # ATTLIST
2028    
2029            redo A;            redo A;
2030          } else {          } else {
2031            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
# Line 2150  sub _get_next_token ($) { Line 2180  sub _get_next_token ($) {
2180            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
2181            $self->{s_kwd} = '';            $self->{s_kwd} = '';
2182            ## reconsume            ## reconsume
2183            return  ($self->{ct}); # start tag  
2184              ## Discard the token.
2185              #return  ($self->{ct}); # start tag
2186              
2187            redo A;            redo A;
2188          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2189            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
# Line 2166  sub _get_next_token ($) { Line 2199  sub _get_next_token ($) {
2199            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
2200            $self->{s_kwd} = '';            $self->{s_kwd} = '';
2201            ## reconsume            ## reconsume
2202            return  ($self->{ct}); # end tag  
2203              ## Discard the token.
2204              #return  ($self->{ct}); # end tag
2205    
2206            redo A;            redo A;
2207          } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {          } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2208            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2209            push @{$self->{ct}->{attrdefs}}, $self->{ca};            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2210            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2211            ## Reconsume.            ## Reconsume.
2212            return  ($self->{ct}); # ATTLIST  
2213              ## Discard the token.
2214              #return  ($self->{ct}); # ATTLIST
2215    
2216            redo A;            redo A;
2217          } else {          } else {
2218            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
# Line 2293  sub _get_next_token ($) { Line 2332  sub _get_next_token ($) {
2332          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2333          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2334          ## Reconsume.          ## Reconsume.
2335          return  ($self->{ct}); # start tag or end tag  
2336            ## Discard the token.
2337            #return  ($self->{ct}); # start tag or end tag
2338    
2339          redo A;          redo A;
2340        } else {        } else {
2341                    
# Line 2360  sub _get_next_token ($) { Line 2402  sub _get_next_token ($) {
2402          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2403          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2404          ## Reconsume.          ## Reconsume.
2405          return  ($self->{ct}); # start tag or end tag  
2406            ## Discard the token.
2407            #return  ($self->{ct}); # start tag or end tag
2408    
2409          redo A;          redo A;
2410        } else {        } else {
2411                    
# Line 2935  sub _get_next_token ($) { Line 2980  sub _get_next_token ($) {
2980        
2981          redo A;          redo A;
2982        }        }
2983      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE or
2984                 $self->{state} == COMMENT_END_BANG_STATE) {
2985        ## XML5: "Comment end state" and "DOCTYPE comment end state".        ## XML5: "Comment end state" and "DOCTYPE comment end state".
2986          ## (No comment end bang state.)
2987    
2988        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2989          if ($self->{in_subset}) {          if ($self->{in_subset}) {
# Line 2963  sub _get_next_token ($) { Line 3010  sub _get_next_token ($) {
3010    
3011          redo A;          redo A;
3012        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
3013            if ($self->{state} == COMMENT_END_BANG_STATE) {
3014              
3015              $self->{ct}->{data} .= '--!'; # comment
3016              $self->{state} = COMMENT_END_DASH_STATE;
3017            } else {
3018              
3019              ## XML5: Not a parse error.
3020              $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
3021                              line => $self->{line_prev},
3022                              column => $self->{column_prev});
3023              $self->{ct}->{data} .= '-'; # comment
3024              ## Stay in the state
3025            }
3026                    
3027          ## XML5: Not a parse error.      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3028          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',        $self->{line_prev} = $self->{line};
3029                          line => $self->{line_prev},        $self->{column_prev} = $self->{column};
3030                          column => $self->{column_prev});        $self->{column}++;
3031          $self->{ct}->{data} .= '-'; # comment        $self->{nc}
3032          ## Stay in the state            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3033        } else {
3034          $self->{set_nc}->($self);
3035        }
3036      
3037            redo A;
3038          } elsif ($self->{state} != COMMENT_END_BANG_STATE and
3039                   $is_space->{$self->{nc}}) {
3040            
3041            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end space'); # XXX error type
3042            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3043            $self->{state} = COMMENT_END_SPACE_STATE;
3044            
3045        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3046          $self->{line_prev} = $self->{line};
3047          $self->{column_prev} = $self->{column};
3048          $self->{column}++;
3049          $self->{nc}
3050              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3051        } else {
3052          $self->{set_nc}->($self);
3053        }
3054      
3055            redo A;
3056          } elsif ($self->{state} != COMMENT_END_BANG_STATE and
3057                   $self->{nc} == 0x0021) { # !
3058            
3059            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end bang'); # XXX error type
3060            $self->{state} = COMMENT_END_BANG_STATE;
3061                    
3062      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3063        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2992  sub _get_next_token ($) { Line 3080  sub _get_next_token ($) {
3080            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
3081            $self->{s_kwd} = '';            $self->{s_kwd} = '';
3082          }          }
3083          ## reconsume          ## Reconsume.
3084    
3085          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
3086    
3087          redo A;          redo A;
3088        } else {        } else {
3089                    
3090          ## XML5: Not a parse error.          if ($self->{state} == COMMENT_END_BANG_STATE) {
3091          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',            $self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment
3092                          line => $self->{line_prev},          } else {
3093                          column => $self->{column_prev});            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3094          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          }
3095          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3096                    
3097      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3018  sub _get_next_token ($) { Line 3106  sub _get_next_token ($) {
3106        
3107          redo A;          redo A;
3108        }        }
3109        } elsif ($self->{state} == COMMENT_END_SPACE_STATE) {
3110          ## XML5: Not exist.
3111    
3112          if ($self->{nc} == 0x003E) { # >
3113            if ($self->{in_subset}) {
3114              
3115              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3116            } else {
3117              
3118              $self->{state} = DATA_STATE;
3119              $self->{s_kwd} = '';
3120            }
3121            
3122        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3123          $self->{line_prev} = $self->{line};
3124          $self->{column_prev} = $self->{column};
3125          $self->{column}++;
3126          $self->{nc}
3127              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3128        } else {
3129          $self->{set_nc}->($self);
3130        }
3131      
3132    
3133            return  ($self->{ct}); # comment
3134    
3135            redo A;
3136          } elsif ($is_space->{$self->{nc}}) {
3137            
3138            $self->{ct}->{data} .= chr ($self->{nc}); # comment
3139            ## Stay in the state.
3140            
3141        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3142          $self->{line_prev} = $self->{line};
3143          $self->{column_prev} = $self->{column};
3144          $self->{column}++;
3145          $self->{nc}
3146              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3147        } else {
3148          $self->{set_nc}->($self);
3149        }
3150      
3151            redo A;
3152          } elsif ($self->{nc} == -1) {
3153            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
3154            if ($self->{in_subset}) {
3155              
3156              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3157            } else {
3158              
3159              $self->{state} = DATA_STATE;
3160              $self->{s_kwd} = '';
3161            }
3162            ## Reconsume.
3163    
3164            return  ($self->{ct}); # comment
3165    
3166            redo A;
3167          } else {
3168            
3169            $self->{ct}->{data} .= chr ($self->{nc}); # comment
3170            $self->{state} = COMMENT_STATE;
3171            
3172        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3173          $self->{line_prev} = $self->{line};
3174          $self->{column_prev} = $self->{column};
3175          $self->{column}++;
3176          $self->{nc}
3177              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3178        } else {
3179          $self->{set_nc}->($self);
3180        }
3181      
3182            redo A;
3183          }
3184      } elsif ($self->{state} == DOCTYPE_STATE) {      } elsif ($self->{state} == DOCTYPE_STATE) {
3185        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3186                    
# Line 3034  sub _get_next_token ($) { Line 3197  sub _get_next_token ($) {
3197      }      }
3198        
3199          redo A;          redo A;
3200          } elsif ($self->{nc} == -1) {
3201            
3202            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3203            $self->{ct}->{quirks} = 1;
3204    
3205            $self->{state} = DATA_STATE;
3206            ## Reconsume.
3207            return  ($self->{ct}); # DOCTYPE (quirks)
3208    
3209            redo A;
3210        } else {        } else {
3211                    
3212          ## XML5: Unless EOF, swith to the bogus comment state.          ## XML5: Swith to the bogus comment state.
3213          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3214          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3215          ## reconsume          ## reconsume
# Line 3081  sub _get_next_token ($) { Line 3254  sub _get_next_token ($) {
3254          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3255    
3256          redo A;          redo A;
3257          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3258            
3259            $self->{ct}->{name} # DOCTYPE
3260                = chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3261            delete $self->{ct}->{quirks};
3262            $self->{state} = DOCTYPE_NAME_STATE;
3263            
3264        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3265          $self->{line_prev} = $self->{line};
3266          $self->{column_prev} = $self->{column};
3267          $self->{column}++;
3268          $self->{nc}
3269              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3270        } else {
3271          $self->{set_nc}->($self);
3272        }
3273      
3274            redo A;
3275        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3276                    
3277          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
# Line 3167  sub _get_next_token ($) { Line 3358  sub _get_next_token ($) {
3358          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3359    
3360          redo A;          redo A;
3361          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3362            
3363            $self->{ct}->{name} # DOCTYPE
3364                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3365            delete $self->{ct}->{quirks};
3366            ## Stay in the state.
3367            
3368        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3369          $self->{line_prev} = $self->{line};
3370          $self->{column_prev} = $self->{column};
3371          $self->{column}++;
3372          $self->{nc}
3373              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3374        } else {
3375          $self->{set_nc}->($self);
3376        }
3377      
3378            redo A;
3379        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3380                    
3381          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
# Line 3198  sub _get_next_token ($) { Line 3407  sub _get_next_token ($) {
3407          redo A;          redo A;
3408        } else {        } else {
3409                    
3410          $self->{ct}->{name}          $self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE
3411            .= chr ($self->{nc}); # DOCTYPE          ## Stay in the state.
         ## Stay in the state  
3412                    
3413      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3414        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.34

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24