/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Tue Oct 14 11:46:57 2008 UTC revision 1.10 by wakaba, Wed Oct 15 08:51:02 2008 UTC
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 114  sub HEXREF_HEX_STATE () { 48 }
114  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
115  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
116    
117    ## XML states
118    sub PI_STATE () { 51 }
119    sub PI_TARGET_STATE () { 52 }
120    sub PI_TARGET_AFTER_STATE () { 53 }
121    sub PI_DATA_STATE () { 54 }
122    sub PI_AFTER_STATE () { 55 }
123    sub PI_DATA_AFTER_STATE () { 56 }
124    
125  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
126  ## list and descriptions)  ## list and descriptions)
127    
# Line 178  sub _initialize_tokenizer ($) { Line 186  sub _initialize_tokenizer ($) {
186    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
187    
188    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
189    #$self->{s_kwd}; # state keyword - initialized when used    $self->{s_kwd} = ''; # state keyword
190    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
191    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
192    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 219  sub _initialize_tokenizer ($) { Line 227  sub _initialize_tokenizer ($) {
227  ##        ->{value}  ##        ->{value}
228  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
229  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
230    ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
231  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
232  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
233  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 362  sub _get_next_token ($) { Line 371  sub _get_next_token ($) {
371          }          }
372        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
373          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
374            $self->{s_kwd} .= '-';            if ($self->{s_kwd} eq '<!-') {
             
           if ($self->{s_kwd} eq '<!--') {  
375                            
376              $self->{escape} = 1; # unless $self->{escape};              $self->{escape} = 1; # unless $self->{escape};
377              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
378              #              #
379            } elsif ($self->{s_kwd} eq '---') {            } elsif ($self->{s_kwd} eq '-') {
380                            
381              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
382              #              #
383              } elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') {
384                
385                $self->{s_kwd} .= '-';
386                #
387            } else {            } else {
388                            
389                $self->{s_kwd} = '-';
390              #              #
391            }            }
392          }          }
# Line 420  sub _get_next_token ($) { Line 432  sub _get_next_token ($) {
432            if ($self->{s_kwd} eq '--') {            if ($self->{s_kwd} eq '--') {
433                            
434              delete $self->{escape};              delete $self->{escape};
435                #
436            } else {            } else {
437                            
438                #
439            }            }
440            } elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') {
441              
442              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched mse', ## TODO: type
443                              line => $self->{line_prev},
444                              column => $self->{column_prev} - 1);
445              #
446          } else {          } else {
447                        
448              #
449          }          }
450                    
451          $self->{s_kwd} = '';          $self->{s_kwd} = '';
452          #          #
453          } elsif ($self->{nc} == 0x005D) { # ]
454            if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') {
455              
456              $self->{s_kwd} .= ']';
457            } elsif ($self->{s_kwd} eq ']]') {
458              
459              #
460            } else {
461              
462              $self->{s_kwd} = '';
463            }
464            #
465        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
466                    
467          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 446  sub _get_next_token ($) { Line 479  sub _get_next_token ($) {
479                     data => chr $self->{nc},                     data => chr $self->{nc},
480                     line => $self->{line}, column => $self->{column},                     line => $self->{line}, column => $self->{column},
481                    };                    };
482        if ($self->{read_until}->($token->{data}, q[-!<>&],        if ($self->{read_until}->($token->{data}, q{-!<>&\]},
483                                  length $token->{data})) {                                  length $token->{data})) {
484          $self->{s_kwd} = '';          $self->{s_kwd} = '';
485        }        }
486    
487        ## Stay in the data state.        ## Stay in the data state.
488        if ($self->{content_model} == PCDATA_CONTENT_MODEL) {        if (not $self->{is_xml} and
489              $self->{content_model} == PCDATA_CONTENT_MODEL) {
490                    
491          $self->{state} = PCDATA_STATE;          $self->{state} = PCDATA_STATE;
492        } else {        } else {
# Line 473  sub _get_next_token ($) { Line 507  sub _get_next_token ($) {
507        return  ($token);        return  ($token);
508        redo A;        redo A;
509      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
510          ## XML5: "tag state".
511    
512        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
513          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
514                        
# Line 500  sub _get_next_token ($) { Line 536  sub _get_next_token ($) {
536    
537          ## reconsume          ## reconsume
538          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
539            $self->{s_kwd} = '';
540          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
541                    line => $self->{line_prev},                    line => $self->{line_prev},
542                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 583  sub _get_next_token ($) { Line 620  sub _get_next_token ($) {
620                            line => $self->{line_prev},                            line => $self->{line_prev},
621                            column => $self->{column_prev});                            column => $self->{column_prev});
622            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
623              $self->{s_kwd} = '';
624                        
625      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
626        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 602  sub _get_next_token ($) { Line 640  sub _get_next_token ($) {
640    
641            redo A;            redo A;
642          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
643                        if ($self->{is_xml}) {
644            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
645                            line => $self->{line_prev},              $self->{state} = PI_STATE;
646                            column => $self->{column_prev});              
647            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
648            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
649                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
650                                      column => $self->{column_prev},        $self->{column}++;
651                                     };        $self->{nc}
652            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
653            redo A;      } else {
654          } else {        $self->{set_nc}->($self);
655        }
656      
657                redo A;
658              } else {
659                
660                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
661                                line => $self->{line_prev},
662                                column => $self->{column_prev});
663                $self->{state} = BOGUS_COMMENT_STATE;
664                $self->{ct} = {type => COMMENT_TOKEN, data => '',
665                               line => $self->{line_prev},
666                               column => $self->{column_prev},
667                              };
668                ## $self->{nc} is intentionally left as is
669                redo A;
670              }
671            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
672                        
673            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
674                            line => $self->{line_prev},                            line => $self->{line_prev},
675                            column => $self->{column_prev});                            column => $self->{column_prev});
676            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
677              $self->{s_kwd} = '';
678            ## reconsume            ## reconsume
679    
680            return  ({type => CHARACTER_TOKEN, data => '<',            return  ({type => CHARACTER_TOKEN, data => '<',
# Line 627  sub _get_next_token ($) { Line 683  sub _get_next_token ($) {
683                     });                     });
684    
685            redo A;            redo A;
686            } else {
687              ## XML5: "<:" is a parse error.
688              
689              $self->{ct} = {type => START_TAG_TOKEN,
690                                        tag_name => chr ($self->{nc}),
691                                        line => $self->{line_prev},
692                                        column => $self->{column_prev}};
693              $self->{state} = TAG_NAME_STATE;
694              
695        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
696          $self->{line_prev} = $self->{line};
697          $self->{column_prev} = $self->{column};
698          $self->{column}++;
699          $self->{nc}
700              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
701        } else {
702          $self->{set_nc}->($self);
703        }
704      
705              redo A;
706          }          }
707        } else {        } else {
708          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 635  sub _get_next_token ($) { Line 711  sub _get_next_token ($) {
711        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
712        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
713    
714          ## XML5: "end tag state".
715    
716        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
717        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
718          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
# Line 647  sub _get_next_token ($) { Line 725  sub _get_next_token ($) {
725            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
726                        
727            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
728              $self->{s_kwd} = '';
729            ## Reconsume.            ## Reconsume.
730            return  ({type => CHARACTER_TOKEN, data => '</',            return  ({type => CHARACTER_TOKEN, data => '</',
731                      line => $l, column => $c,                      line => $l, column => $c,
# Line 695  sub _get_next_token ($) { Line 774  sub _get_next_token ($) {
774        
775          redo A;          redo A;
776        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
777          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
778                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
779                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
780          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
781                    $self->{s_kwd} = '';
782            if ($self->{is_xml}) {
783              
784              ## XML5: No parse error.
785              
786              ## NOTE: This parser raises a parse error, since it supports
787              ## XML1, not XML5.
788    
789              ## NOTE: A short end tag token.
790              my $ct = {type => END_TAG_TOKEN,
791                        tag_name => '',
792                        line => $self->{line_prev},
793                        column => $self->{column_prev} - 1,
794                       };
795              
796        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
797          $self->{line_prev} = $self->{line};
798          $self->{column_prev} = $self->{column};
799          $self->{column}++;
800          $self->{nc}
801              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
802        } else {
803          $self->{set_nc}->($self);
804        }
805      
806              return  ($ct);
807            } else {
808              
809              
810      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
811        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
812        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 711  sub _get_next_token ($) { Line 817  sub _get_next_token ($) {
817        $self->{set_nc}->($self);        $self->{set_nc}->($self);
818      }      }
819        
820            }
821          redo A;          redo A;
822        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
823                    
824          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');
825            $self->{s_kwd} = '';
826          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
827          # reconsume          # reconsume
828    
# Line 723  sub _get_next_token ($) { Line 831  sub _get_next_token ($) {
831                   });                   });
832    
833          redo A;          redo A;
834        } else {        } elsif (not $self->{is_xml} or
835                   $is_space->{$self->{nc}}) {
836                    
837          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
838                            line => $self->{line_prev}, # "<" of "</"
839                            column => $self->{column_prev} - 1);
840          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
841          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
842                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 738  sub _get_next_token ($) { Line 849  sub _get_next_token ($) {
849          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
850          ## "bogus comment state" entry.          ## "bogus comment state" entry.
851          redo A;          redo A;
852          } else {
853            ## XML5: "</:" is a parse error.
854            
855            $self->{ct} = {type => END_TAG_TOKEN,
856                           tag_name => chr ($self->{nc}),
857                           line => $l, column => $c};
858            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
859            
860        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
861          $self->{line_prev} = $self->{line};
862          $self->{column_prev} = $self->{column};
863          $self->{column}++;
864          $self->{nc}
865              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
866        } else {
867          $self->{set_nc}->($self);
868        }
869      
870            redo A;
871        }        }
872      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
873        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;
# Line 764  sub _get_next_token ($) { Line 894  sub _get_next_token ($) {
894          } else {          } else {
895                        
896            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
897              $self->{s_kwd} = '';
898            ## Reconsume.            ## Reconsume.
899            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
900                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{s_kwd},
# Line 782  sub _get_next_token ($) { Line 913  sub _get_next_token ($) {
913                        
914            ## Reconsume.            ## Reconsume.
915            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
916              $self->{s_kwd} = '';
917            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
918                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{s_kwd},
919                      line => $self->{line_prev},                      line => $self->{line_prev},
# Line 833  sub _get_next_token ($) { Line 965  sub _get_next_token ($) {
965            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
966          }          }
967          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
968            $self->{s_kwd} = '';
969                    
970      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
971        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 885  sub _get_next_token ($) { Line 1018  sub _get_next_token ($) {
1018            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1019          }          }
1020          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1021            $self->{s_kwd} = '';
1022          # reconsume          # reconsume
1023    
1024          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 955  sub _get_next_token ($) { Line 1089  sub _get_next_token ($) {
1089            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1090          }          }
1091          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1092            $self->{s_kwd} = '';
1093                    
1094      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1095        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1022  sub _get_next_token ($) { Line 1157  sub _get_next_token ($) {
1157            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1158          }          }
1159          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1160            $self->{s_kwd} = '';
1161          # reconsume          # reconsume
1162    
1163          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1117  sub _get_next_token ($) { Line 1253  sub _get_next_token ($) {
1253            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1254          }          }
1255          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1256            $self->{s_kwd} = '';
1257                    
1258      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1259        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1185  sub _get_next_token ($) { Line 1322  sub _get_next_token ($) {
1322            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1323          }          }
1324          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1325            $self->{s_kwd} = '';
1326          # reconsume          # reconsume
1327    
1328          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1261  sub _get_next_token ($) { Line 1399  sub _get_next_token ($) {
1399            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1400          }          }
1401          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1402            $self->{s_kwd} = '';
1403                    
1404      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1405        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1328  sub _get_next_token ($) { Line 1467  sub _get_next_token ($) {
1467          } else {          } else {
1468            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1469          }          }
1470            $self->{s_kwd} = '';
1471          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1472          # reconsume          # reconsume
1473    
# Line 1429  sub _get_next_token ($) { Line 1569  sub _get_next_token ($) {
1569            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1570          }          }
1571          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1572            $self->{s_kwd} = '';
1573                    
1574      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1575        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1462  sub _get_next_token ($) { Line 1603  sub _get_next_token ($) {
1603            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1604          }          }
1605          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1606            $self->{s_kwd} = '';
1607          ## reconsume          ## reconsume
1608    
1609          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1544  sub _get_next_token ($) { Line 1686  sub _get_next_token ($) {
1686            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1687          }          }
1688          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1689            $self->{s_kwd} = '';
1690          ## reconsume          ## reconsume
1691    
1692          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1625  sub _get_next_token ($) { Line 1768  sub _get_next_token ($) {
1768            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1769          }          }
1770          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1771            $self->{s_kwd} = '';
1772          ## reconsume          ## reconsume
1773    
1774          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1705  sub _get_next_token ($) { Line 1849  sub _get_next_token ($) {
1849            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1850          }          }
1851          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1852            $self->{s_kwd} = '';
1853                    
1854      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1855        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1738  sub _get_next_token ($) { Line 1883  sub _get_next_token ($) {
1883            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1884          }          }
1885          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1886            $self->{s_kwd} = '';
1887          ## reconsume          ## reconsume
1888    
1889          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1806  sub _get_next_token ($) { Line 1952  sub _get_next_token ($) {
1952            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1953          }          }
1954          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1955            $self->{s_kwd} = '';
1956                    
1957      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1958        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1853  sub _get_next_token ($) { Line 2000  sub _get_next_token ($) {
2000            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2001          }          }
2002          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2003            $self->{s_kwd} = '';
2004          ## Reconsume.          ## Reconsume.
2005          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
2006          redo A;          redo A;
# Line 1883  sub _get_next_token ($) { Line 2031  sub _get_next_token ($) {
2031          }          }
2032    
2033          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2034            $self->{s_kwd} = '';
2035                    
2036      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2037        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1915  sub _get_next_token ($) { Line 2064  sub _get_next_token ($) {
2064            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2065          }          }
2066          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2067            $self->{s_kwd} = '';
2068          ## Reconsume.          ## Reconsume.
2069          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
2070          redo A;          redo A;
# Line 1935  sub _get_next_token ($) { Line 2085  sub _get_next_token ($) {
2085        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2086                    
2087          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2088            $self->{s_kwd} = '';
2089                    
2090      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2091        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1952  sub _get_next_token ($) { Line 2103  sub _get_next_token ($) {
2103        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2104                    
2105          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2106            $self->{s_kwd} = '';
2107          ## reconsume          ## reconsume
2108    
2109          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2053  sub _get_next_token ($) { Line 2205  sub _get_next_token ($) {
2205                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2206                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2207                                   };                                   };
2208          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2209                    
2210      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2211        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2116  sub _get_next_token ($) { Line 2268  sub _get_next_token ($) {
2268        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{s_kwd}) == 6 and
2269                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2270                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2271                    if ($self->{s_kwd} ne 'DOCTYP') {
2272              
2273              ## XML5: case-sensitive.
2274              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2275                              text => 'DOCTYPE',
2276                              line => $self->{line_prev},
2277                              column => $self->{column_prev} - 5);
2278            } else {
2279              
2280            }
2281          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2282          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2283                                    quirks => 1,                                    quirks => 1,
# Line 2174  sub _get_next_token ($) { Line 2335  sub _get_next_token ($) {
2335          redo A;          redo A;
2336        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{s_kwd} eq '[CDATA' and
2337                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2338                    if ($self->{is_xml} and
2339                not $self->{tainted} and
2340                @{$self->{open_elements} or []} == 0) {
2341              
2342              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2343                              line => $self->{line_prev},
2344                              column => $self->{column_prev} - 7);
2345              $self->{tainted} = 1;
2346            } else {
2347              
2348            }
2349    
2350          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2351                                    data => '',                                    data => '',
2352                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2226  sub _get_next_token ($) { Line 2398  sub _get_next_token ($) {
2398                    
2399          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2400          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2401            $self->{s_kwd} = '';
2402                    
2403      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2404        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2245  sub _get_next_token ($) { Line 2418  sub _get_next_token ($) {
2418                    
2419          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2420          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2421            $self->{s_kwd} = '';
2422          ## reconsume          ## reconsume
2423    
2424          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2288  sub _get_next_token ($) { Line 2462  sub _get_next_token ($) {
2462                    
2463          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2464          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2465            $self->{s_kwd} = '';
2466                    
2467      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2468        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2307  sub _get_next_token ($) { Line 2482  sub _get_next_token ($) {
2482                    
2483          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2484          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2485            $self->{s_kwd} = '';
2486          ## reconsume          ## reconsume
2487    
2488          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2350  sub _get_next_token ($) { Line 2526  sub _get_next_token ($) {
2526                    
2527          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2528          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2529            $self->{s_kwd} = '';
2530          ## reconsume          ## reconsume
2531    
2532          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2377  sub _get_next_token ($) { Line 2554  sub _get_next_token ($) {
2554          redo A;          redo A;
2555        }        }
2556      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2557          ## XML5: "comment dash state".
2558    
2559        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2560                    
2561          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2395  sub _get_next_token ($) { Line 2574  sub _get_next_token ($) {
2574        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2575                    
2576          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2577            $self->{s_kwd} = '';
2578          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2579            $self->{s_kwd} = '';
2580          ## reconsume          ## reconsume
2581    
2582          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2422  sub _get_next_token ($) { Line 2603  sub _get_next_token ($) {
2603        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2604                    
2605          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2606            $self->{s_kwd} = '';
2607                    
2608      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2609        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2439  sub _get_next_token ($) { Line 2621  sub _get_next_token ($) {
2621          redo A;          redo A;
2622        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2623                    
2624            ## XML5: Not a parse error.
2625          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2626                          line => $self->{line_prev},                          line => $self->{line_prev},
2627                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2460  sub _get_next_token ($) { Line 2643  sub _get_next_token ($) {
2643                    
2644          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2645          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2646            $self->{s_kwd} = '';
2647          ## reconsume          ## reconsume
2648    
2649          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2467  sub _get_next_token ($) { Line 2651  sub _get_next_token ($) {
2651          redo A;          redo A;
2652        } else {        } else {
2653                    
2654            ## XML5: Not a parse error.
2655          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2656                          line => $self->{line_prev},                          line => $self->{line_prev},
2657                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2528  sub _get_next_token ($) { Line 2713  sub _get_next_token ($) {
2713                    
2714          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
2715          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2716            $self->{s_kwd} = '';
2717                    
2718      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2719        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2547  sub _get_next_token ($) { Line 2733  sub _get_next_token ($) {
2733                    
2734          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
2735          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2736            $self->{s_kwd} = '';
2737          ## reconsume          ## reconsume
2738    
2739          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
# Line 2590  sub _get_next_token ($) { Line 2777  sub _get_next_token ($) {
2777        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2778                    
2779          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2780            $self->{s_kwd} = '';
2781                    
2782      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2783        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2609  sub _get_next_token ($) { Line 2797  sub _get_next_token ($) {
2797                    
2798          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
2799          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2800            $self->{s_kwd} = '';
2801          ## reconsume          ## reconsume
2802    
2803          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2652  sub _get_next_token ($) { Line 2841  sub _get_next_token ($) {
2841        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2842                    
2843          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2844            $self->{s_kwd} = '';
2845                    
2846      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2847        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2671  sub _get_next_token ($) { Line 2861  sub _get_next_token ($) {
2861                    
2862          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
2863          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2864            $self->{s_kwd} = '';
2865          ## reconsume          ## reconsume
2866    
2867          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2899  sub _get_next_token ($) { Line 3090  sub _get_next_token ($) {
3090          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3091    
3092          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3093            $self->{s_kwd} = '';
3094                    
3095      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3096        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2920  sub _get_next_token ($) { Line 3112  sub _get_next_token ($) {
3112          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3113    
3114          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3115            $self->{s_kwd} = '';
3116          ## reconsume          ## reconsume
3117    
3118          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2966  sub _get_next_token ($) { Line 3159  sub _get_next_token ($) {
3159          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3160    
3161          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3162            $self->{s_kwd} = '';
3163                    
3164      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3165        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2987  sub _get_next_token ($) { Line 3181  sub _get_next_token ($) {
3181          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3182    
3183          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3184            $self->{s_kwd} = '';
3185          ## reconsume          ## reconsume
3186    
3187          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3035  sub _get_next_token ($) { Line 3230  sub _get_next_token ($) {
3230          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3231    
3232          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3233            $self->{s_kwd} = '';
3234                    
3235      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3236        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3056  sub _get_next_token ($) { Line 3252  sub _get_next_token ($) {
3252          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3253    
3254          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3255            $self->{s_kwd} = '';
3256          ## reconsume          ## reconsume
3257    
3258          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3134  sub _get_next_token ($) { Line 3331  sub _get_next_token ($) {
3331        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3332                    
3333          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3334            $self->{s_kwd} = '';
3335                    
3336      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3337        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3154  sub _get_next_token ($) { Line 3352  sub _get_next_token ($) {
3352          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3353    
3354          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3355            $self->{s_kwd} = '';
3356          ## reconsume          ## reconsume
3357    
3358          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3231  sub _get_next_token ($) { Line 3430  sub _get_next_token ($) {
3430                    
3431          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3432          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3433            $self->{s_kwd} = '';
3434                    
3435      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3436        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3252  sub _get_next_token ($) { Line 3452  sub _get_next_token ($) {
3452          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3453    
3454          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3455            $self->{s_kwd} = '';
3456          ## reconsume          ## reconsume
3457    
3458          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3298  sub _get_next_token ($) { Line 3499  sub _get_next_token ($) {
3499          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3500    
3501          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3502            $self->{s_kwd} = '';
3503                    
3504      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3505        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3319  sub _get_next_token ($) { Line 3521  sub _get_next_token ($) {
3521          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3522    
3523          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3524            $self->{s_kwd} = '';
3525          ## reconsume          ## reconsume
3526    
3527          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3367  sub _get_next_token ($) { Line 3570  sub _get_next_token ($) {
3570          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3571    
3572          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3573            $self->{s_kwd} = '';
3574                    
3575      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3576        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3388  sub _get_next_token ($) { Line 3592  sub _get_next_token ($) {
3592          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3593    
3594          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3595            $self->{s_kwd} = '';
3596          ## reconsume          ## reconsume
3597    
3598          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3434  sub _get_next_token ($) { Line 3639  sub _get_next_token ($) {
3639        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3640                    
3641          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3642            $self->{s_kwd} = '';
3643                    
3644      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3645        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3453  sub _get_next_token ($) { Line 3659  sub _get_next_token ($) {
3659                    
3660          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3661          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3662            $self->{s_kwd} = '';
3663          ## reconsume          ## reconsume
3664    
3665          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3482  sub _get_next_token ($) { Line 3689  sub _get_next_token ($) {
3689        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3690                    
3691          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3692            $self->{s_kwd} = '';
3693                    
3694      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3695        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3500  sub _get_next_token ($) { Line 3708  sub _get_next_token ($) {
3708        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3709                    
3710          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3711            $self->{s_kwd} = '';
3712          ## reconsume          ## reconsume
3713    
3714          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
# Line 3528  sub _get_next_token ($) { Line 3737  sub _get_next_token ($) {
3737        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
3738        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
3739        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
3740    
3741          ## XML5: "CDATA state".
3742                
3743        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
3744                    
# Line 3545  sub _get_next_token ($) { Line 3756  sub _get_next_token ($) {
3756        
3757          redo A;          redo A;
3758        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3759            if ($self->{is_xml}) {
3760              
3761              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
3762            } else {
3763              
3764            }
3765    
3766          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3767                    $self->{s_kwd} = '';
3768      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {          ## Reconsume.
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
3769          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
3770                        
3771            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3589  sub _get_next_token ($) { Line 3798  sub _get_next_token ($) {
3798    
3799        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
3800      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
3801          ## XML5: "CDATA bracket state".
3802    
3803        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
3804                    
3805          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3606  sub _get_next_token ($) { Line 3817  sub _get_next_token ($) {
3817          redo A;          redo A;
3818        } else {        } else {
3819                    
3820            ## XML5: If EOF, "]" is not appended and changed to the data state.
3821          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
3822          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
3823          ## Reconsume.          ## Reconsume.
3824          redo A;          redo A;
3825        }        }
3826      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
3827          ## XML5: "CDATA end state".
3828    
3829        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3830          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3831            $self->{s_kwd} = '';
3832                    
3833      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3834        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3653  sub _get_next_token ($) { Line 3868  sub _get_next_token ($) {
3868                    
3869          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
3870          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
3871          ## Reconsume.          ## Reconsume. ## XML5: Emit.
3872          redo A;          redo A;
3873        }        }
3874      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3721  sub _get_next_token ($) { Line 3936  sub _get_next_token ($) {
3936        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
3937                    
3938          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
3939            $self->{s_kwd} = '';
3940          ## Reconsume.          ## Reconsume.
3941          return  ({type => CHARACTER_TOKEN, data => '&',          return  ({type => CHARACTER_TOKEN, data => '&',
3942                    line => $self->{line_prev},                    line => $self->{line_prev},
# Line 3731  sub _get_next_token ($) { Line 3947  sub _get_next_token ($) {
3947                    
3948          $self->{ca}->{value} .= '&';          $self->{ca}->{value} .= '&';
3949          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
3950            $self->{s_kwd} = '';
3951          ## Reconsume.          ## Reconsume.
3952          redo A;          redo A;
3953        }        }
# Line 3781  sub _get_next_token ($) { Line 3998  sub _get_next_token ($) {
3998          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
3999                        
4000            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4001              $self->{s_kwd} = '';
4002            ## Reconsume.            ## Reconsume.
4003            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4004                      data => '&#',                      data => '&#',
# Line 3792  sub _get_next_token ($) { Line 4010  sub _get_next_token ($) {
4010                        
4011            $self->{ca}->{value} .= '&#';            $self->{ca}->{value} .= '&#';
4012            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4013              $self->{s_kwd} = '';
4014            ## Reconsume.            ## Reconsume.
4015            redo A;            redo A;
4016          }          }
# Line 3857  sub _get_next_token ($) { Line 4076  sub _get_next_token ($) {
4076        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4077                    
4078          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4079            $self->{s_kwd} = '';
4080          ## Reconsume.          ## Reconsume.
4081          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4082                      has_reference => 1,
4083                    line => $l, column => $c,                    line => $l, column => $c,
4084                   });                   });
4085          redo A;          redo A;
# Line 3867  sub _get_next_token ($) { Line 4088  sub _get_next_token ($) {
4088          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
4089          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
4090          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4091            $self->{s_kwd} = '';
4092          ## Reconsume.          ## Reconsume.
4093          redo A;          redo A;
4094        }        }
# Line 3892  sub _get_next_token ($) { Line 4114  sub _get_next_token ($) {
4114          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
4115                        
4116            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4117              $self->{s_kwd} = '';
4118            ## Reconsume.            ## Reconsume.
4119            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4120                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{s_kwd},
# Line 3903  sub _get_next_token ($) { Line 4126  sub _get_next_token ($) {
4126                        
4127            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{s_kwd};
4128            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4129              $self->{s_kwd} = '';
4130            ## Reconsume.            ## Reconsume.
4131            redo A;            redo A;
4132          }          }
# Line 4005  sub _get_next_token ($) { Line 4229  sub _get_next_token ($) {
4229        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4230                    
4231          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4232            $self->{s_kwd} = '';
4233          ## Reconsume.          ## Reconsume.
4234          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4235                      has_reference => 1,
4236                    line => $l, column => $c,                    line => $l, column => $c,
4237                   });                   });
4238          redo A;          redo A;
# Line 4015  sub _get_next_token ($) { Line 4241  sub _get_next_token ($) {
4241          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
4242          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
4243          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4244            $self->{s_kwd} = '';
4245          ## Reconsume.          ## Reconsume.
4246          redo A;          redo A;
4247        }        }
# Line 4127  sub _get_next_token ($) { Line 4354  sub _get_next_token ($) {
4354        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4355                    
4356          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4357            $self->{s_kwd} = '';
4358          ## Reconsume.          ## Reconsume.
4359          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
4360                    data => $data,                    data => $data,
4361                      has_reference => $has_ref,
4362                    line => $self->{line_prev},                    line => $self->{line_prev},
4363                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{s_kwd},
4364                   });                   });
# Line 4139  sub _get_next_token ($) { Line 4368  sub _get_next_token ($) {
4368          $self->{ca}->{value} .= $data;          $self->{ca}->{value} .= $data;
4369          $self->{ca}->{has_reference} = 1 if $has_ref;          $self->{ca}->{has_reference} = 1 if $has_ref;
4370          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4371            $self->{s_kwd} = '';
4372            ## Reconsume.
4373            redo A;
4374          }
4375    
4376        ## XML-only states
4377    
4378        } elsif ($self->{state} == PI_STATE) {
4379          if ($is_space->{$self->{nc}} or
4380              $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"
4381              $self->{nc} == -1) {
4382            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
4383                            line => $self->{line_prev},
4384                            column => $self->{column_prev}
4385                                - 1 * ($self->{nc} != -1));
4386            $self->{state} = BOGUS_COMMENT_STATE;
4387            ## Reconsume.
4388            $self->{ct} = {type => COMMENT_TOKEN,
4389                           data => '?',
4390                           line => $self->{line_prev},
4391                           column => $self->{column_prev}
4392                               - 1 * ($self->{nc} != -1),
4393                          };
4394            redo A;
4395          } else {
4396            $self->{ct} = {type => PI_TOKEN,
4397                           target => chr $self->{nc},
4398                           data => '',
4399                           line => $self->{line_prev},
4400                           column => $self->{column_prev} - 1,
4401                          };
4402            $self->{state} = PI_TARGET_STATE;
4403            
4404        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4405          $self->{line_prev} = $self->{line};
4406          $self->{column_prev} = $self->{column};
4407          $self->{column}++;
4408          $self->{nc}
4409              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4410        } else {
4411          $self->{set_nc}->($self);
4412        }
4413      
4414            redo A;
4415          }
4416        } elsif ($self->{state} == PI_TARGET_STATE) {
4417          if ($is_space->{$self->{nc}}) {
4418            $self->{state} = PI_TARGET_AFTER_STATE;
4419            
4420        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4421          $self->{line_prev} = $self->{line};
4422          $self->{column_prev} = $self->{column};
4423          $self->{column}++;
4424          $self->{nc}
4425              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4426        } else {
4427          $self->{set_nc}->($self);
4428        }
4429      
4430            redo A;
4431          } elsif ($self->{nc} == -1) {
4432            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4433            $self->{state} = DATA_STATE;
4434            $self->{s_kwd} = '';
4435          ## Reconsume.          ## Reconsume.
4436            return  ($self->{ct}); # pi
4437            redo A;
4438          } elsif ($self->{nc} == 0x003F) { # ?
4439            $self->{state} = PI_AFTER_STATE;
4440            
4441        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4442          $self->{line_prev} = $self->{line};
4443          $self->{column_prev} = $self->{column};
4444          $self->{column}++;
4445          $self->{nc}
4446              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4447        } else {
4448          $self->{set_nc}->($self);
4449        }
4450      
4451            redo A;
4452          } else {
4453            ## XML5: typo ("tag name" -> "target")
4454            $self->{ct}->{target} .= chr $self->{nc}; # pi
4455            
4456        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4457          $self->{line_prev} = $self->{line};
4458          $self->{column_prev} = $self->{column};
4459          $self->{column}++;
4460          $self->{nc}
4461              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4462        } else {
4463          $self->{set_nc}->($self);
4464        }
4465      
4466            redo A;
4467          }
4468        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
4469          if ($is_space->{$self->{nc}}) {
4470            ## Stay in the state.
4471            
4472        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4473          $self->{line_prev} = $self->{line};
4474          $self->{column_prev} = $self->{column};
4475          $self->{column}++;
4476          $self->{nc}
4477              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4478        } else {
4479          $self->{set_nc}->($self);
4480        }
4481      
4482            redo A;
4483          } else {
4484            $self->{state} = PI_DATA_STATE;
4485            ## Reprocess.
4486            redo A;
4487          }
4488        } elsif ($self->{state} == PI_DATA_STATE) {
4489          if ($self->{nc} == 0x003F) { # ?
4490            $self->{state} = PI_DATA_AFTER_STATE;
4491            
4492        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4493          $self->{line_prev} = $self->{line};
4494          $self->{column_prev} = $self->{column};
4495          $self->{column}++;
4496          $self->{nc}
4497              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4498        } else {
4499          $self->{set_nc}->($self);
4500        }
4501      
4502            redo A;
4503          } elsif ($self->{nc} == -1) {
4504            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4505            $self->{state} = DATA_STATE;
4506            $self->{s_kwd} = '';
4507            ## Reprocess.
4508            return  ($self->{ct}); # pi
4509            redo A;
4510          } else {
4511            $self->{ct}->{data} .= chr $self->{nc}; # pi
4512            $self->{read_until}->($self->{ct}->{data}, q[?],
4513                                  length $self->{ct}->{data});
4514            ## Stay in the state.
4515            
4516        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4517          $self->{line_prev} = $self->{line};
4518          $self->{column_prev} = $self->{column};
4519          $self->{column}++;
4520          $self->{nc}
4521              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4522        } else {
4523          $self->{set_nc}->($self);
4524        }
4525      
4526            ## Reprocess.
4527            redo A;
4528          }
4529        } elsif ($self->{state} == PI_AFTER_STATE) {
4530          if ($self->{nc} == 0x003E) { # >
4531            $self->{state} = DATA_STATE;
4532            $self->{s_kwd} = '';
4533            
4534        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4535          $self->{line_prev} = $self->{line};
4536          $self->{column_prev} = $self->{column};
4537          $self->{column}++;
4538          $self->{nc}
4539              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4540        } else {
4541          $self->{set_nc}->($self);
4542        }
4543      
4544            return  ($self->{ct}); # pi
4545            redo A;
4546          } elsif ($self->{nc} == 0x003F) { # ?
4547            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4548                            line => $self->{line_prev},
4549                            column => $self->{column_prev}); ## XML5: no error
4550            $self->{ct}->{data} .= '?';
4551            $self->{state} = PI_DATA_AFTER_STATE;
4552            
4553        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4554          $self->{line_prev} = $self->{line};
4555          $self->{column_prev} = $self->{column};
4556          $self->{column}++;
4557          $self->{nc}
4558              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4559        } else {
4560          $self->{set_nc}->($self);
4561        }
4562      
4563            redo A;
4564          } else {
4565            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4566                            line => $self->{line_prev},
4567                            column => $self->{column_prev}
4568                                + 1 * ($self->{nc} == -1)); ## XML5: no error
4569            $self->{ct}->{data} .= '?'; ## XML5: not appended
4570            $self->{state} = PI_DATA_STATE;
4571            ## Reprocess.
4572          redo A;          redo A;
4573        }        }
4574        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
4575          ## XML5: Same as "pi after state" in XML5
4576          if ($self->{nc} == 0x003E) { # >
4577            $self->{state} = DATA_STATE;
4578            $self->{s_kwd} = '';
4579            
4580        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4581          $self->{line_prev} = $self->{line};
4582          $self->{column_prev} = $self->{column};
4583          $self->{column}++;
4584          $self->{nc}
4585              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4586        } else {
4587          $self->{set_nc}->($self);
4588        }
4589      
4590            return  ($self->{ct}); # pi
4591            redo A;
4592          } elsif ($self->{nc} == 0x003F) { # ?
4593            $self->{ct}->{data} .= '?';
4594            ## Stay in the state.
4595            
4596        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4597          $self->{line_prev} = $self->{line};
4598          $self->{column_prev} = $self->{column};
4599          $self->{column}++;
4600          $self->{nc}
4601              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4602        } else {
4603          $self->{set_nc}->($self);
4604        }
4605      
4606            redo A;
4607          } else {
4608            $self->{ct}->{data} .= '?'; ## XML5: not appended
4609            $self->{state} = PI_DATA_STATE;
4610            ## Reprocess.
4611            redo A;
4612          }
4613            
4614      } else {      } else {
4615        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
4616      }      }

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.10

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24