/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3 by wakaba, Tue Oct 14 05:34:05 2008 UTC revision 1.6 by wakaba, Tue Oct 14 14:57:52 2008 UTC
# Line 178  sub _initialize_tokenizer ($) { Line 178  sub _initialize_tokenizer ($) {
178    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
179    
180    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
181    #$self->{s_kwd}; # state keyword - initialized when used    $self->{s_kwd} = ''; # state keyword
182    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
183    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
184    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 362  sub _get_next_token ($) { Line 362  sub _get_next_token ($) {
362          }          }
363        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
364          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
365            $self->{s_kwd} .= '-';            if ($self->{s_kwd} eq '<!-') {
             
           if ($self->{s_kwd} eq '<!--') {  
366                            
367              $self->{escape} = 1; # unless $self->{escape};              $self->{escape} = 1; # unless $self->{escape};
368              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
369              #              #
370            } elsif ($self->{s_kwd} eq '---') {            } elsif ($self->{s_kwd} eq '-') {
371                            
372              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
373              #              #
374              } elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') {
375                
376                $self->{s_kwd} .= '-';
377                #
378            } else {            } else {
379                            
380                $self->{s_kwd} = '-';
381              #              #
382            }            }
383          }          }
# Line 420  sub _get_next_token ($) { Line 423  sub _get_next_token ($) {
423            if ($self->{s_kwd} eq '--') {            if ($self->{s_kwd} eq '--') {
424                            
425              delete $self->{escape};              delete $self->{escape};
426                #
427            } else {            } else {
428                            
429                #
430            }            }
431            } elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') {
432              
433              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched mse', ## TODO: type
434                              line => $self->{line_prev},
435                              column => $self->{column_prev} - 1);
436              #
437          } else {          } else {
438                        
439              #
440          }          }
441                    
442          $self->{s_kwd} = '';          $self->{s_kwd} = '';
443          #          #
444          } elsif ($self->{nc} == 0x005D) { # ]
445            if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') {
446              
447              $self->{s_kwd} .= ']';
448            } elsif ($self->{s_kwd} eq ']]') {
449              
450              #
451            } else {
452              
453              $self->{s_kwd} = '';
454            }
455            #
456        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
457                    
458          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 446  sub _get_next_token ($) { Line 470  sub _get_next_token ($) {
470                     data => chr $self->{nc},                     data => chr $self->{nc},
471                     line => $self->{line}, column => $self->{column},                     line => $self->{line}, column => $self->{column},
472                    };                    };
473        if ($self->{read_until}->($token->{data}, q[-!<>&],        if ($self->{read_until}->($token->{data}, q{-!<>&\]},
474                                  length $token->{data})) {                                  length $token->{data})) {
475          $self->{s_kwd} = '';          $self->{s_kwd} = '';
476        }        }
477    
478        ## Stay in the data state.        ## Stay in the data state.
479        if ($self->{content_model} == PCDATA_CONTENT_MODEL) {        if (not $self->{is_xml} and
480              $self->{content_model} == PCDATA_CONTENT_MODEL) {
481                    
482          $self->{state} = PCDATA_STATE;          $self->{state} = PCDATA_STATE;
483        } else {        } else {
# Line 500  sub _get_next_token ($) { Line 525  sub _get_next_token ($) {
525    
526          ## reconsume          ## reconsume
527          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
528            $self->{s_kwd} = '';
529          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
530                    line => $self->{line_prev},                    line => $self->{line_prev},
531                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 541  sub _get_next_token ($) { Line 567  sub _get_next_token ($) {
567                        
568            $self->{ct}            $self->{ct}
569              = {type => START_TAG_TOKEN,              = {type => START_TAG_TOKEN,
570                 tag_name => chr ($self->{nc} + 0x0020),                 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
571                 line => $self->{line_prev},                 line => $self->{line_prev},
572                 column => $self->{column_prev}};                 column => $self->{column_prev}};
573            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
# Line 583  sub _get_next_token ($) { Line 609  sub _get_next_token ($) {
609                            line => $self->{line_prev},                            line => $self->{line_prev},
610                            column => $self->{column_prev});                            column => $self->{column_prev});
611            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
612              $self->{s_kwd} = '';
613                        
614      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
615        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 619  sub _get_next_token ($) { Line 646  sub _get_next_token ($) {
646                            line => $self->{line_prev},                            line => $self->{line_prev},
647                            column => $self->{column_prev});                            column => $self->{column_prev});
648            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
649              $self->{s_kwd} = '';
650            ## reconsume            ## reconsume
651    
652            return  ({type => CHARACTER_TOKEN, data => '<',            return  ({type => CHARACTER_TOKEN, data => '<',
# Line 647  sub _get_next_token ($) { Line 675  sub _get_next_token ($) {
675            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
676                        
677            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
678              $self->{s_kwd} = '';
679            ## Reconsume.            ## Reconsume.
680            return  ({type => CHARACTER_TOKEN, data => '</',            return  ({type => CHARACTER_TOKEN, data => '</',
681                      line => $l, column => $c,                      line => $l, column => $c,
# Line 660  sub _get_next_token ($) { Line 689  sub _get_next_token ($) {
689                    
690          $self->{ct}          $self->{ct}
691              = {type => END_TAG_TOKEN,              = {type => END_TAG_TOKEN,
692                 tag_name => chr ($self->{nc} + 0x0020),                 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
693                 line => $l, column => $c};                 line => $l, column => $c};
694          $self->{state} = TAG_NAME_STATE;          $self->{state} = TAG_NAME_STATE;
695                    
# Line 700  sub _get_next_token ($) { Line 729  sub _get_next_token ($) {
729                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
730                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
731          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
732            $self->{s_kwd} = '';
733                    
734      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
735        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 715  sub _get_next_token ($) { Line 745  sub _get_next_token ($) {
745        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
746                    
747          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');
748            $self->{s_kwd} = '';
749          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
750          # reconsume          # reconsume
751    
# Line 764  sub _get_next_token ($) { Line 795  sub _get_next_token ($) {
795          } else {          } else {
796                        
797            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
798              $self->{s_kwd} = '';
799            ## Reconsume.            ## Reconsume.
800            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
801                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{s_kwd},
# Line 782  sub _get_next_token ($) { Line 814  sub _get_next_token ($) {
814                        
815            ## Reconsume.            ## Reconsume.
816            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
817              $self->{s_kwd} = '';
818            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
819                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{s_kwd},
820                      line => $self->{line_prev},                      line => $self->{line_prev},
# Line 833  sub _get_next_token ($) { Line 866  sub _get_next_token ($) {
866            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
867          }          }
868          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
869            $self->{s_kwd} = '';
870                    
871      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
872        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 851  sub _get_next_token ($) { Line 885  sub _get_next_token ($) {
885        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
886                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
887                    
888          $self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020);          $self->{ct}->{tag_name}
889                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
890            # start tag or end tag            # start tag or end tag
891          ## Stay in this state          ## Stay in this state
892                    
# Line 884  sub _get_next_token ($) { Line 919  sub _get_next_token ($) {
919            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
920          }          }
921          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
922            $self->{s_kwd} = '';
923          # reconsume          # reconsume
924    
925          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 954  sub _get_next_token ($) { Line 990  sub _get_next_token ($) {
990            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
991          }          }
992          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
993            $self->{s_kwd} = '';
994                    
995      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
996        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 973  sub _get_next_token ($) { Line 1010  sub _get_next_token ($) {
1010                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
1011                    
1012          $self->{ca}          $self->{ca}
1013              = {name => chr ($self->{nc} + 0x0020),              = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
1014                 value => '',                 value => '',
1015                 line => $self->{line}, column => $self->{column}};                 line => $self->{line}, column => $self->{column}};
1016          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
# Line 1021  sub _get_next_token ($) { Line 1058  sub _get_next_token ($) {
1058            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1059          }          }
1060          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1061            $self->{s_kwd} = '';
1062          # reconsume          # reconsume
1063    
1064          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1116  sub _get_next_token ($) { Line 1154  sub _get_next_token ($) {
1154            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1155          }          }
1156          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1157            $self->{s_kwd} = '';
1158                    
1159      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1160        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1134  sub _get_next_token ($) { Line 1173  sub _get_next_token ($) {
1173        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
1174                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
1175                    
1176          $self->{ca}->{name} .= chr ($self->{nc} + 0x0020);          $self->{ca}->{name}
1177                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
1178          ## Stay in the state          ## Stay in the state
1179                    
1180      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1183  sub _get_next_token ($) { Line 1223  sub _get_next_token ($) {
1223            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1224          }          }
1225          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1226            $self->{s_kwd} = '';
1227          # reconsume          # reconsume
1228    
1229          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1259  sub _get_next_token ($) { Line 1300  sub _get_next_token ($) {
1300            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1301          }          }
1302          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1303            $self->{s_kwd} = '';
1304                    
1305      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1306        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1278  sub _get_next_token ($) { Line 1320  sub _get_next_token ($) {
1320                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
1321                    
1322          $self->{ca}          $self->{ca}
1323              = {name => chr ($self->{nc} + 0x0020),              = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
1324                 value => '',                 value => '',
1325                 line => $self->{line}, column => $self->{column}};                 line => $self->{line}, column => $self->{column}};
1326          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
# Line 1326  sub _get_next_token ($) { Line 1368  sub _get_next_token ($) {
1368          } else {          } else {
1369            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1370          }          }
1371            $self->{s_kwd} = '';
1372          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1373          # reconsume          # reconsume
1374    
# Line 1427  sub _get_next_token ($) { Line 1470  sub _get_next_token ($) {
1470            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1471          }          }
1472          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1473            $self->{s_kwd} = '';
1474                    
1475      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1476        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1460  sub _get_next_token ($) { Line 1504  sub _get_next_token ($) {
1504            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1505          }          }
1506          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1507            $self->{s_kwd} = '';
1508          ## reconsume          ## reconsume
1509    
1510          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1542  sub _get_next_token ($) { Line 1587  sub _get_next_token ($) {
1587            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1588          }          }
1589          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1590            $self->{s_kwd} = '';
1591          ## reconsume          ## reconsume
1592    
1593          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1623  sub _get_next_token ($) { Line 1669  sub _get_next_token ($) {
1669            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1670          }          }
1671          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1672            $self->{s_kwd} = '';
1673          ## reconsume          ## reconsume
1674    
1675          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1703  sub _get_next_token ($) { Line 1750  sub _get_next_token ($) {
1750            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1751          }          }
1752          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1753            $self->{s_kwd} = '';
1754                    
1755      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1756        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1736  sub _get_next_token ($) { Line 1784  sub _get_next_token ($) {
1784            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1785          }          }
1786          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1787            $self->{s_kwd} = '';
1788          ## reconsume          ## reconsume
1789    
1790          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1804  sub _get_next_token ($) { Line 1853  sub _get_next_token ($) {
1853            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1854          }          }
1855          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1856            $self->{s_kwd} = '';
1857                    
1858      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1859        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1851  sub _get_next_token ($) { Line 1901  sub _get_next_token ($) {
1901            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1902          }          }
1903          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1904            $self->{s_kwd} = '';
1905          ## Reconsume.          ## Reconsume.
1906          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
1907          redo A;          redo A;
# Line 1881  sub _get_next_token ($) { Line 1932  sub _get_next_token ($) {
1932          }          }
1933    
1934          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1935            $self->{s_kwd} = '';
1936                    
1937      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1938        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1913  sub _get_next_token ($) { Line 1965  sub _get_next_token ($) {
1965            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1966          }          }
1967          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1968            $self->{s_kwd} = '';
1969          ## Reconsume.          ## Reconsume.
1970          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
1971          redo A;          redo A;
# Line 1933  sub _get_next_token ($) { Line 1986  sub _get_next_token ($) {
1986        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
1987                    
1988          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1989            $self->{s_kwd} = '';
1990                    
1991      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1992        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1950  sub _get_next_token ($) { Line 2004  sub _get_next_token ($) {
2004        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2005                    
2006          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2007            $self->{s_kwd} = '';
2008          ## reconsume          ## reconsume
2009    
2010          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2173  sub _get_next_token ($) { Line 2228  sub _get_next_token ($) {
2228        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{s_kwd} eq '[CDATA' and
2229                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2230                    
2231    
2232            if ($self->{is_xml} and
2233                not $self->{tainted} and
2234                @{$self->{open_elements} or []} == 0) {
2235              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2236                              line => $self->{line_prev},
2237                              column => $self->{column_prev} - 7);
2238              $self->{tainted} = 1;
2239            }
2240    
2241          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2242                                    data => '',                                    data => '',
2243                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2224  sub _get_next_token ($) { Line 2289  sub _get_next_token ($) {
2289                    
2290          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2291          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2292            $self->{s_kwd} = '';
2293                    
2294      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2295        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2243  sub _get_next_token ($) { Line 2309  sub _get_next_token ($) {
2309                    
2310          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2311          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2312            $self->{s_kwd} = '';
2313          ## reconsume          ## reconsume
2314    
2315          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2286  sub _get_next_token ($) { Line 2353  sub _get_next_token ($) {
2353                    
2354          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2355          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2356            $self->{s_kwd} = '';
2357                    
2358      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2359        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2305  sub _get_next_token ($) { Line 2373  sub _get_next_token ($) {
2373                    
2374          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2375          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2376            $self->{s_kwd} = '';
2377          ## reconsume          ## reconsume
2378    
2379          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2348  sub _get_next_token ($) { Line 2417  sub _get_next_token ($) {
2417                    
2418          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2419          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2420            $self->{s_kwd} = '';
2421          ## reconsume          ## reconsume
2422    
2423          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2393  sub _get_next_token ($) { Line 2463  sub _get_next_token ($) {
2463        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2464                    
2465          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2466            $self->{s_kwd} = '';
2467          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2468            $self->{s_kwd} = '';
2469          ## reconsume          ## reconsume
2470    
2471          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2420  sub _get_next_token ($) { Line 2492  sub _get_next_token ($) {
2492        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2493                    
2494          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2495            $self->{s_kwd} = '';
2496                    
2497      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2498        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2458  sub _get_next_token ($) { Line 2531  sub _get_next_token ($) {
2531                    
2532          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2533          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2534            $self->{s_kwd} = '';
2535          ## reconsume          ## reconsume
2536    
2537          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2526  sub _get_next_token ($) { Line 2600  sub _get_next_token ($) {
2600                    
2601          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
2602          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2603            $self->{s_kwd} = '';
2604                    
2605      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2606        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2545  sub _get_next_token ($) { Line 2620  sub _get_next_token ($) {
2620                    
2621          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
2622          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2623            $self->{s_kwd} = '';
2624          ## reconsume          ## reconsume
2625    
2626          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
# Line 2588  sub _get_next_token ($) { Line 2664  sub _get_next_token ($) {
2664        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2665                    
2666          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2667            $self->{s_kwd} = '';
2668                    
2669      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2670        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2607  sub _get_next_token ($) { Line 2684  sub _get_next_token ($) {
2684                    
2685          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
2686          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2687            $self->{s_kwd} = '';
2688          ## reconsume          ## reconsume
2689    
2690          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2650  sub _get_next_token ($) { Line 2728  sub _get_next_token ($) {
2728        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2729                    
2730          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2731            $self->{s_kwd} = '';
2732                    
2733      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2734        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2669  sub _get_next_token ($) { Line 2748  sub _get_next_token ($) {
2748                    
2749          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
2750          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2751            $self->{s_kwd} = '';
2752          ## reconsume          ## reconsume
2753    
2754          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2897  sub _get_next_token ($) { Line 2977  sub _get_next_token ($) {
2977          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
2978    
2979          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2980            $self->{s_kwd} = '';
2981                    
2982      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2983        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2918  sub _get_next_token ($) { Line 2999  sub _get_next_token ($) {
2999          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3000    
3001          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3002            $self->{s_kwd} = '';
3003          ## reconsume          ## reconsume
3004    
3005          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2964  sub _get_next_token ($) { Line 3046  sub _get_next_token ($) {
3046          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3047    
3048          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3049            $self->{s_kwd} = '';
3050                    
3051      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3052        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2985  sub _get_next_token ($) { Line 3068  sub _get_next_token ($) {
3068          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3069    
3070          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3071            $self->{s_kwd} = '';
3072          ## reconsume          ## reconsume
3073    
3074          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3033  sub _get_next_token ($) { Line 3117  sub _get_next_token ($) {
3117          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3118    
3119          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3120            $self->{s_kwd} = '';
3121                    
3122      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3123        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3054  sub _get_next_token ($) { Line 3139  sub _get_next_token ($) {
3139          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3140    
3141          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3142            $self->{s_kwd} = '';
3143          ## reconsume          ## reconsume
3144    
3145          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3132  sub _get_next_token ($) { Line 3218  sub _get_next_token ($) {
3218        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3219                    
3220          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3221            $self->{s_kwd} = '';
3222                    
3223      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3224        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3152  sub _get_next_token ($) { Line 3239  sub _get_next_token ($) {
3239          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3240    
3241          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3242            $self->{s_kwd} = '';
3243          ## reconsume          ## reconsume
3244    
3245          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3229  sub _get_next_token ($) { Line 3317  sub _get_next_token ($) {
3317                    
3318          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3319          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3320            $self->{s_kwd} = '';
3321                    
3322      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3323        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3250  sub _get_next_token ($) { Line 3339  sub _get_next_token ($) {
3339          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3340    
3341          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3342            $self->{s_kwd} = '';
3343          ## reconsume          ## reconsume
3344    
3345          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3296  sub _get_next_token ($) { Line 3386  sub _get_next_token ($) {
3386          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3387    
3388          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3389            $self->{s_kwd} = '';
3390                    
3391      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3392        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3317  sub _get_next_token ($) { Line 3408  sub _get_next_token ($) {
3408          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3409    
3410          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3411            $self->{s_kwd} = '';
3412          ## reconsume          ## reconsume
3413    
3414          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3365  sub _get_next_token ($) { Line 3457  sub _get_next_token ($) {
3457          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3458    
3459          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3460            $self->{s_kwd} = '';
3461                    
3462      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3463        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3386  sub _get_next_token ($) { Line 3479  sub _get_next_token ($) {
3479          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3480    
3481          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3482            $self->{s_kwd} = '';
3483          ## reconsume          ## reconsume
3484    
3485          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3432  sub _get_next_token ($) { Line 3526  sub _get_next_token ($) {
3526        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3527                    
3528          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3529            $self->{s_kwd} = '';
3530                    
3531      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3532        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3451  sub _get_next_token ($) { Line 3546  sub _get_next_token ($) {
3546                    
3547          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3548          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3549            $self->{s_kwd} = '';
3550          ## reconsume          ## reconsume
3551    
3552          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3480  sub _get_next_token ($) { Line 3576  sub _get_next_token ($) {
3576        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3577                    
3578          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3579            $self->{s_kwd} = '';
3580                    
3581      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3582        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3498  sub _get_next_token ($) { Line 3595  sub _get_next_token ($) {
3595        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3596                    
3597          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3598            $self->{s_kwd} = '';
3599          ## reconsume          ## reconsume
3600    
3601          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
# Line 3543  sub _get_next_token ($) { Line 3641  sub _get_next_token ($) {
3641        
3642          redo A;          redo A;
3643        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3644            if ($self->{is_xml}) {
3645              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
3646            }
3647    
3648          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3649            $self->{s_kwd} = '';
3650                    
3651      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3652        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3612  sub _get_next_token ($) { Line 3715  sub _get_next_token ($) {
3715      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
3716        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3717          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3718            $self->{s_kwd} = '';
3719                    
3720      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3721        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3719  sub _get_next_token ($) { Line 3823  sub _get_next_token ($) {
3823        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
3824                    
3825          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
3826            $self->{s_kwd} = '';
3827          ## Reconsume.          ## Reconsume.
3828          return  ({type => CHARACTER_TOKEN, data => '&',          return  ({type => CHARACTER_TOKEN, data => '&',
3829                    line => $self->{line_prev},                    line => $self->{line_prev},
# Line 3729  sub _get_next_token ($) { Line 3834  sub _get_next_token ($) {
3834                    
3835          $self->{ca}->{value} .= '&';          $self->{ca}->{value} .= '&';
3836          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
3837            $self->{s_kwd} = '';
3838          ## Reconsume.          ## Reconsume.
3839          redo A;          redo A;
3840        }        }
# Line 3779  sub _get_next_token ($) { Line 3885  sub _get_next_token ($) {
3885          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
3886                        
3887            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
3888              $self->{s_kwd} = '';
3889            ## Reconsume.            ## Reconsume.
3890            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
3891                      data => '&#',                      data => '&#',
# Line 3790  sub _get_next_token ($) { Line 3897  sub _get_next_token ($) {
3897                        
3898            $self->{ca}->{value} .= '&#';            $self->{ca}->{value} .= '&#';
3899            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
3900              $self->{s_kwd} = '';
3901            ## Reconsume.            ## Reconsume.
3902            redo A;            redo A;
3903          }          }
# Line 3855  sub _get_next_token ($) { Line 3963  sub _get_next_token ($) {
3963        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
3964                    
3965          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
3966            $self->{s_kwd} = '';
3967          ## Reconsume.          ## Reconsume.
3968          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
3969                    line => $l, column => $c,                    line => $l, column => $c,
# Line 3865  sub _get_next_token ($) { Line 3974  sub _get_next_token ($) {
3974          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
3975          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
3976          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
3977            $self->{s_kwd} = '';
3978          ## Reconsume.          ## Reconsume.
3979          redo A;          redo A;
3980        }        }
# Line 3890  sub _get_next_token ($) { Line 4000  sub _get_next_token ($) {
4000          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
4001                        
4002            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4003              $self->{s_kwd} = '';
4004            ## Reconsume.            ## Reconsume.
4005            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4006                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{s_kwd},
# Line 3901  sub _get_next_token ($) { Line 4012  sub _get_next_token ($) {
4012                        
4013            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{s_kwd};
4014            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4015              $self->{s_kwd} = '';
4016            ## Reconsume.            ## Reconsume.
4017            redo A;            redo A;
4018          }          }
# Line 4003  sub _get_next_token ($) { Line 4115  sub _get_next_token ($) {
4115        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4116                    
4117          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4118            $self->{s_kwd} = '';
4119          ## Reconsume.          ## Reconsume.
4120          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4121                    line => $l, column => $c,                    line => $l, column => $c,
# Line 4013  sub _get_next_token ($) { Line 4126  sub _get_next_token ($) {
4126          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
4127          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
4128          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4129            $self->{s_kwd} = '';
4130          ## Reconsume.          ## Reconsume.
4131          redo A;          redo A;
4132        }        }
# Line 4125  sub _get_next_token ($) { Line 4239  sub _get_next_token ($) {
4239        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4240                    
4241          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4242            $self->{s_kwd} = '';
4243          ## Reconsume.          ## Reconsume.
4244          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
4245                    data => $data,                    data => $data,
# Line 4137  sub _get_next_token ($) { Line 4252  sub _get_next_token ($) {
4252          $self->{ca}->{value} .= $data;          $self->{ca}->{value} .= $data;
4253          $self->{ca}->{has_reference} = 1 if $has_ref;          $self->{ca}->{has_reference} = 1 if $has_ref;
4254          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4255            $self->{s_kwd} = '';
4256          ## Reconsume.          ## Reconsume.
4257          redo A;          redo A;
4258        }        }

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.6

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24