/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Tue Oct 14 04:32:49 2008 UTC revision 1.15 by wakaba, Sat Oct 18 08:05:29 2008 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145    ## XML-only states
146    sub PI_STATE () { 51 }
147    sub PI_TARGET_STATE () { 52 }
148    sub PI_TARGET_AFTER_STATE () { 53 }
149    sub PI_DATA_STATE () { 54 }
150    sub PI_AFTER_STATE () { 55 }
151    sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    
181  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
182  ## list and descriptions)  ## list and descriptions)
183    
# Line 175  sub _initialize_tokenizer ($) { Line 239  sub _initialize_tokenizer ($) {
239    #$self->{level}    #$self->{level}
240    #$self->{set_nc}    #$self->{set_nc}
241    #$self->{parse_error}    #$self->{parse_error}
242      #$self->{is_xml} (if XML)
243    
244    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
245    #$self->{s_kwd}; # state keyword - initialized when used    $self->{s_kwd} = ''; # Data state keyword
246      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
247    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
248    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
249    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 207  sub _initialize_tokenizer ($) { Line 273  sub _initialize_tokenizer ($) {
273    
274  ## A token has:  ## A token has:
275  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
276  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
277  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
278  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
279    ##   ->{target} (PI_TOKEN)
280  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
281  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
282  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 217  sub _initialize_tokenizer ($) { Line 284  sub _initialize_tokenizer ($) {
284  ##        ->{name}  ##        ->{name}
285  ##        ->{value}  ##        ->{value}
286  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
287  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
288    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
289    ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
290    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
291    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
292    
293  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
294  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
295  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 237  my $is_space = { Line 309  my $is_space = {
309    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
310    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
311    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
312    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
313    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
314    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
315  };  };
# Line 361  sub _get_next_token ($) { Line 433  sub _get_next_token ($) {
433          }          }
434        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
435          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
436            $self->{s_kwd} .= '-';            if ($self->{s_kwd} eq '<!-') {
             
           if ($self->{s_kwd} eq '<!--') {  
437                            
438              $self->{escape} = 1; # unless $self->{escape};              $self->{escape} = 1; # unless $self->{escape};
439              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
440              #              #
441            } elsif ($self->{s_kwd} eq '---') {            } elsif ($self->{s_kwd} eq '-') {
442                            
443              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
444              #              #
445              } elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') {
446                
447                $self->{s_kwd} .= '-';
448                #
449            } else {            } else {
450                            
451                $self->{s_kwd} = '-';
452              #              #
453            }            }
454          }          }
# Line 419  sub _get_next_token ($) { Line 494  sub _get_next_token ($) {
494            if ($self->{s_kwd} eq '--') {            if ($self->{s_kwd} eq '--') {
495                            
496              delete $self->{escape};              delete $self->{escape};
497                #
498            } else {            } else {
499                            
500                #
501            }            }
502            } elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') {
503              
504              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched mse', ## TODO: type
505                              line => $self->{line_prev},
506                              column => $self->{column_prev} - 1);
507              #
508          } else {          } else {
509                        
510              #
511          }          }
512                    
513          $self->{s_kwd} = '';          $self->{s_kwd} = '';
514          #          #
515          } elsif ($self->{nc} == 0x005D) { # ]
516            if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') {
517              
518              $self->{s_kwd} .= ']';
519            } elsif ($self->{s_kwd} eq ']]') {
520              
521              #
522            } else {
523              
524              $self->{s_kwd} = '';
525            }
526            #
527        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
528                    
529          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 445  sub _get_next_token ($) { Line 541  sub _get_next_token ($) {
541                     data => chr $self->{nc},                     data => chr $self->{nc},
542                     line => $self->{line}, column => $self->{column},                     line => $self->{line}, column => $self->{column},
543                    };                    };
544        if ($self->{read_until}->($token->{data}, q[-!<>&],        if ($self->{read_until}->($token->{data}, q{-!<>&\]},
545                                  length $token->{data})) {                                  length $token->{data})) {
546          $self->{s_kwd} = '';          $self->{s_kwd} = '';
547        }        }
548    
549        ## Stay in the data state.        ## Stay in the data state.
550        if ($self->{content_model} == PCDATA_CONTENT_MODEL) {        if (not $self->{is_xml} and
551              $self->{content_model} == PCDATA_CONTENT_MODEL) {
552                    
553          $self->{state} = PCDATA_STATE;          $self->{state} = PCDATA_STATE;
554        } else {        } else {
# Line 472  sub _get_next_token ($) { Line 569  sub _get_next_token ($) {
569        return  ($token);        return  ($token);
570        redo A;        redo A;
571      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
572          ## XML5: "tag state".
573    
574        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
575          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
576                        
# Line 490  sub _get_next_token ($) { Line 589  sub _get_next_token ($) {
589            redo A;            redo A;
590          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
591                        
592            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
593            #            #
594          } else {          } else {
595                        
596              $self->{s_kwd} = '';
597            #            #
598          }          }
599    
# Line 540  sub _get_next_token ($) { Line 640  sub _get_next_token ($) {
640                        
641            $self->{ct}            $self->{ct}
642              = {type => START_TAG_TOKEN,              = {type => START_TAG_TOKEN,
643                 tag_name => chr ($self->{nc} + 0x0020),                 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
644                 line => $self->{line_prev},                 line => $self->{line_prev},
645                 column => $self->{column_prev}};                 column => $self->{column_prev}};
646            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
# Line 582  sub _get_next_token ($) { Line 682  sub _get_next_token ($) {
682                            line => $self->{line_prev},                            line => $self->{line_prev},
683                            column => $self->{column_prev});                            column => $self->{column_prev});
684            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
685              $self->{s_kwd} = '';
686                        
687      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
688        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 601  sub _get_next_token ($) { Line 702  sub _get_next_token ($) {
702    
703            redo A;            redo A;
704          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
705                        if ($self->{is_xml}) {
706            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
707                            line => $self->{line_prev},              $self->{state} = PI_STATE;
708                            column => $self->{column_prev});              
709            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
710            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
711                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
712                                      column => $self->{column_prev},        $self->{column}++;
713                                     };        $self->{nc}
714            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
715            redo A;      } else {
716          } else {        $self->{set_nc}->($self);
717        }
718      
719                redo A;
720              } else {
721                
722                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
723                                line => $self->{line_prev},
724                                column => $self->{column_prev});
725                $self->{state} = BOGUS_COMMENT_STATE;
726                $self->{ct} = {type => COMMENT_TOKEN, data => '',
727                               line => $self->{line_prev},
728                               column => $self->{column_prev},
729                              };
730                ## $self->{nc} is intentionally left as is
731                redo A;
732              }
733            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
734                        
735            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
736                            line => $self->{line_prev},                            line => $self->{line_prev},
737                            column => $self->{column_prev});                            column => $self->{column_prev});
738            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
739              $self->{s_kwd} = '';
740            ## reconsume            ## reconsume
741    
742            return  ({type => CHARACTER_TOKEN, data => '<',            return  ({type => CHARACTER_TOKEN, data => '<',
# Line 626  sub _get_next_token ($) { Line 745  sub _get_next_token ($) {
745                     });                     });
746    
747            redo A;            redo A;
748            } else {
749              ## XML5: "<:" is a parse error.
750              
751              $self->{ct} = {type => START_TAG_TOKEN,
752                                        tag_name => chr ($self->{nc}),
753                                        line => $self->{line_prev},
754                                        column => $self->{column_prev}};
755              $self->{state} = TAG_NAME_STATE;
756              
757        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
758          $self->{line_prev} = $self->{line};
759          $self->{column_prev} = $self->{column};
760          $self->{column}++;
761          $self->{nc}
762              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
763        } else {
764          $self->{set_nc}->($self);
765        }
766      
767              redo A;
768          }          }
769        } else {        } else {
770          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 634  sub _get_next_token ($) { Line 773  sub _get_next_token ($) {
773        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
774        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
775    
776          ## XML5: "end tag state".
777    
778        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
779        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
780          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
781            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
782            $self->{s_kwd} = '';            $self->{kwd} = '';
783            ## Reconsume.            ## Reconsume.
784            redo A;            redo A;
785          } else {          } else {
# Line 646  sub _get_next_token ($) { Line 787  sub _get_next_token ($) {
787            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
788                        
789            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
790              $self->{s_kwd} = '';
791            ## Reconsume.            ## Reconsume.
792            return  ({type => CHARACTER_TOKEN, data => '</',            return  ({type => CHARACTER_TOKEN, data => '</',
793                      line => $l, column => $c,                      line => $l, column => $c,
# Line 659  sub _get_next_token ($) { Line 801  sub _get_next_token ($) {
801                    
802          $self->{ct}          $self->{ct}
803              = {type => END_TAG_TOKEN,              = {type => END_TAG_TOKEN,
804                 tag_name => chr ($self->{nc} + 0x0020),                 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
805                 line => $l, column => $c};                 line => $l, column => $c};
806          $self->{state} = TAG_NAME_STATE;          $self->{state} = TAG_NAME_STATE;
807                    
# Line 694  sub _get_next_token ($) { Line 836  sub _get_next_token ($) {
836        
837          redo A;          redo A;
838        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
839          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
840                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
841                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
842          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
843                    $self->{s_kwd} = '';
844            if ($self->{is_xml}) {
845              
846              ## XML5: No parse error.
847              
848              ## NOTE: This parser raises a parse error, since it supports
849              ## XML1, not XML5.
850    
851              ## NOTE: A short end tag token.
852              my $ct = {type => END_TAG_TOKEN,
853                        tag_name => '',
854                        line => $self->{line_prev},
855                        column => $self->{column_prev} - 1,
856                       };
857              
858      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
859        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
860        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 710  sub _get_next_token ($) { Line 865  sub _get_next_token ($) {
865        $self->{set_nc}->($self);        $self->{set_nc}->($self);
866      }      }
867        
868              return  ($ct);
869            } else {
870              
871              
872        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
873          $self->{line_prev} = $self->{line};
874          $self->{column_prev} = $self->{column};
875          $self->{column}++;
876          $self->{nc}
877              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
878        } else {
879          $self->{set_nc}->($self);
880        }
881      
882            }
883          redo A;          redo A;
884        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
885                    
886          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');
887            $self->{s_kwd} = '';
888          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
889          # reconsume          # reconsume
890    
# Line 722  sub _get_next_token ($) { Line 893  sub _get_next_token ($) {
893                   });                   });
894    
895          redo A;          redo A;
896        } else {        } elsif (not $self->{is_xml} or
897                   $is_space->{$self->{nc}}) {
898                    
899          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
900                            line => $self->{line_prev}, # "<" of "</"
901                            column => $self->{column_prev} - 1);
902          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
903          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
904                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 737  sub _get_next_token ($) { Line 911  sub _get_next_token ($) {
911          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
912          ## "bogus comment state" entry.          ## "bogus comment state" entry.
913          redo A;          redo A;
914          } else {
915            ## XML5: "</:" is a parse error.
916            
917            $self->{ct} = {type => END_TAG_TOKEN,
918                           tag_name => chr ($self->{nc}),
919                           line => $l, column => $c};
920            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
921            
922        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
923          $self->{line_prev} = $self->{line};
924          $self->{column_prev} = $self->{column};
925          $self->{column}++;
926          $self->{nc}
927              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
928        } else {
929          $self->{set_nc}->($self);
930        }
931      
932            redo A;
933        }        }
934      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
935        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
936        if (length $ch) {        if (length $ch) {
937          my $CH = $ch;          my $CH = $ch;
938          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 747  sub _get_next_token ($) { Line 940  sub _get_next_token ($) {
940          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
941                        
942            ## Stay in the state.            ## Stay in the state.
943            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
944                        
945      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
946        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 763  sub _get_next_token ($) { Line 956  sub _get_next_token ($) {
956          } else {          } else {
957                        
958            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
959              $self->{s_kwd} = '';
960            ## Reconsume.            ## Reconsume.
961            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
962                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
963                      line => $self->{line_prev},                      line => $self->{line_prev},
964                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
965                     });                     });
966            redo A;            redo A;
967          }          }
# Line 781  sub _get_next_token ($) { Line 975  sub _get_next_token ($) {
975                        
976            ## Reconsume.            ## Reconsume.
977            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
978              $self->{s_kwd} = '';
979            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
980                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
981                      line => $self->{line_prev},                      line => $self->{line_prev},
982                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
983                     });                     });
984            redo A;            redo A;
985          } else {          } else {
# Line 793  sub _get_next_token ($) { Line 988  sub _get_next_token ($) {
988                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
989                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
990                   line => $self->{line_prev},                   line => $self->{line_prev},
991                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
992            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
993            ## Reconsume.            ## Reconsume.
994            redo A;            redo A;
# Line 832  sub _get_next_token ($) { Line 1027  sub _get_next_token ($) {
1027            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1028          }          }
1029          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1030            $self->{s_kwd} = '';
1031                    
1032      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1033        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 850  sub _get_next_token ($) { Line 1046  sub _get_next_token ($) {
1046        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
1047                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
1048                    
1049          $self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020);          $self->{ct}->{tag_name}
1050                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
1051            # start tag or end tag            # start tag or end tag
1052          ## Stay in this state          ## Stay in this state
1053                    
# Line 883  sub _get_next_token ($) { Line 1080  sub _get_next_token ($) {
1080            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1081          }          }
1082          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1083            $self->{s_kwd} = '';
1084          # reconsume          # reconsume
1085    
1086          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 922  sub _get_next_token ($) { Line 1120  sub _get_next_token ($) {
1120          redo A;          redo A;
1121        }        }
1122      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1123          ## XML5: "Tag attribute name before state".
1124    
1125        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1126                    
1127          ## Stay in the state          ## Stay in the state
# Line 953  sub _get_next_token ($) { Line 1153  sub _get_next_token ($) {
1153            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1154          }          }
1155          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1156            $self->{s_kwd} = '';
1157                    
1158      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1159        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 972  sub _get_next_token ($) { Line 1173  sub _get_next_token ($) {
1173                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
1174                    
1175          $self->{ca}          $self->{ca}
1176              = {name => chr ($self->{nc} + 0x0020),              = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
1177                 value => '',                 value => '',
1178                 line => $self->{line}, column => $self->{column}};                 line => $self->{line}, column => $self->{column}};
1179          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
# Line 1020  sub _get_next_token ($) { Line 1221  sub _get_next_token ($) {
1221            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1222          }          }
1223          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1224            $self->{s_kwd} = '';
1225          # reconsume          # reconsume
1226    
1227          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1032  sub _get_next_token ($) { Line 1234  sub _get_next_token ($) {
1234               0x003D => 1, # =               0x003D => 1, # =
1235              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1236                        
1237              ## XML5: Not a parse error.
1238            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1239          } else {          } else {
1240                        
1241              ## XML5: ":" raises a parse error and is ignored.
1242          }          }
1243          $self->{ca}          $self->{ca}
1244              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1055  sub _get_next_token ($) { Line 1259  sub _get_next_token ($) {
1259          redo A;          redo A;
1260        }        }
1261      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1262          ## XML5: "Tag attribute name state".
1263    
1264        my $before_leave = sub {        my $before_leave = sub {
1265          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1266              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1065  sub _get_next_token ($) { Line 1271  sub _get_next_token ($) {
1271                        
1272            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1273              = $self->{ca};              = $self->{ca};
1274              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1275          }          }
1276        }; # $before_leave        }; # $before_leave
1277    
# Line 1101  sub _get_next_token ($) { Line 1308  sub _get_next_token ($) {
1308        
1309          redo A;          redo A;
1310        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1311            if ($self->{is_xml}) {
1312              
1313              ## XML5: Not a parse error.
1314              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1315            } else {
1316              
1317            }
1318    
1319          $before_leave->();          $before_leave->();
1320          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1321                        
# Line 1115  sub _get_next_token ($) { Line 1330  sub _get_next_token ($) {
1330            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1331          }          }
1332          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1333            $self->{s_kwd} = '';
1334                    
1335      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1336        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1133  sub _get_next_token ($) { Line 1349  sub _get_next_token ($) {
1349        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
1350                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
1351                    
1352          $self->{ca}->{name} .= chr ($self->{nc} + 0x0020);          $self->{ca}->{name}
1353                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
1354          ## Stay in the state          ## Stay in the state
1355                    
1356      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1148  sub _get_next_token ($) { Line 1365  sub _get_next_token ($) {
1365        
1366          redo A;          redo A;
1367        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1368            if ($self->{is_xml}) {
1369              
1370              ## XML5: Not a parse error.
1371              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1372            } else {
1373              
1374            }
1375                    
1376          $before_leave->();          $before_leave->();
1377          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1182  sub _get_next_token ($) { Line 1406  sub _get_next_token ($) {
1406            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1407          }          }
1408          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1409            $self->{s_kwd} = '';
1410          # reconsume          # reconsume
1411    
1412          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1191  sub _get_next_token ($) { Line 1416  sub _get_next_token ($) {
1416          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1417              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1418                        
1419              ## XML5: Not a parse error.
1420            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1421          } else {          } else {
1422                        
# Line 1211  sub _get_next_token ($) { Line 1437  sub _get_next_token ($) {
1437          redo A;          redo A;
1438        }        }
1439      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1440          ## XML5: "Tag attribute name after state".
1441          
1442        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1443                    
1444          ## Stay in the state          ## Stay in the state
# Line 1242  sub _get_next_token ($) { Line 1470  sub _get_next_token ($) {
1470        
1471          redo A;          redo A;
1472        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1473            if ($self->{is_xml}) {
1474              
1475              ## XML5: Not a parse error.
1476              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1477            } else {
1478              
1479            }
1480    
1481          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1482                        
1483            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1258  sub _get_next_token ($) { Line 1494  sub _get_next_token ($) {
1494            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1495          }          }
1496          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1497            $self->{s_kwd} = '';
1498                    
1499      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1500        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1277  sub _get_next_token ($) { Line 1514  sub _get_next_token ($) {
1514                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
1515                    
1516          $self->{ca}          $self->{ca}
1517              = {name => chr ($self->{nc} + 0x0020),              = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
1518                 value => '',                 value => '',
1519                 line => $self->{line}, column => $self->{column}};                 line => $self->{line}, column => $self->{column}};
1520          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
# Line 1294  sub _get_next_token ($) { Line 1531  sub _get_next_token ($) {
1531        
1532          redo A;          redo A;
1533        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1534            if ($self->{is_xml}) {
1535              
1536              ## XML5: Not a parse error.
1537              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1538            } else {
1539              
1540            }
1541                    
1542          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1543                    
# Line 1325  sub _get_next_token ($) { Line 1569  sub _get_next_token ($) {
1569          } else {          } else {
1570            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1571          }          }
1572            $self->{s_kwd} = '';
1573          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1574          # reconsume          # reconsume
1575    
# Line 1332  sub _get_next_token ($) { Line 1577  sub _get_next_token ($) {
1577    
1578          redo A;          redo A;
1579        } else {        } else {
1580            if ($self->{is_xml}) {
1581              
1582              ## XML5: Not a parse error.
1583              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1584            } else {
1585              
1586            }
1587    
1588          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1589              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1590                        
1591              ## XML5: Not a parse error.
1592            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1593          } else {          } else {
1594                        
# Line 1358  sub _get_next_token ($) { Line 1612  sub _get_next_token ($) {
1612          redo A;                  redo A;        
1613        }        }
1614      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1615          ## XML5: "Tag attribute value before state".
1616    
1617        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1618                    
1619          ## Stay in the state          ## Stay in the state
# Line 1426  sub _get_next_token ($) { Line 1682  sub _get_next_token ($) {
1682            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1683          }          }
1684          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1685            $self->{s_kwd} = '';
1686                    
1687      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1688        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1459  sub _get_next_token ($) { Line 1716  sub _get_next_token ($) {
1716            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1717          }          }
1718          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1719            $self->{s_kwd} = '';
1720          ## reconsume          ## reconsume
1721    
1722          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1467  sub _get_next_token ($) { Line 1725  sub _get_next_token ($) {
1725        } else {        } else {
1726          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D) { # =
1727                        
1728              ## XML5: Not a parse error.
1729            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1730            } elsif ($self->{is_xml}) {
1731              
1732              ## XML5: No parse error.
1733              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1734          } else {          } else {
1735                        
1736          }          }
# Line 1487  sub _get_next_token ($) { Line 1750  sub _get_next_token ($) {
1750          redo A;          redo A;
1751        }        }
1752      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1753          ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1754          ## ATTLIST attribute value double quoted state".
1755          
1756        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1757                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1758          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1759              ## XML5: "DOCTYPE ATTLIST name after state".
1760              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1761              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1762            } else {
1763              
1764              ## XML5: "Tag attribute name before state".
1765              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1766            }
1767                    
1768      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1769        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1504  sub _get_next_token ($) { Line 1778  sub _get_next_token ($) {
1778          redo A;          redo A;
1779        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1780                    
1781            ## XML5: Not defined yet.
1782    
1783          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1784          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1785          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1528  sub _get_next_token ($) { Line 1804  sub _get_next_token ($) {
1804          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1805                        
1806            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1807    
1808              $self->{state} = DATA_STATE;
1809              $self->{s_kwd} = '';
1810              ## reconsume
1811              return  ($self->{ct}); # start tag
1812              redo A;
1813          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1814            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1815            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1537  sub _get_next_token ($) { Line 1819  sub _get_next_token ($) {
1819              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1820                            
1821            }            }
1822    
1823              $self->{state} = DATA_STATE;
1824              $self->{s_kwd} = '';
1825              ## reconsume
1826              return  ($self->{ct}); # end tag
1827              redo A;
1828            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1829              ## XML5: No parse error above; not defined yet.
1830              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1831              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1832              ## Reconsume.
1833              return  ($self->{ct}); # ATTLIST
1834              redo A;
1835          } else {          } else {
1836            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1837          }          }
         $self->{state} = DATA_STATE;  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1838        } else {        } else {
1839                    ## XML5 [ATTLIST]: Not defined yet.
1840            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1841              
1842              ## XML5: Not a parse error.
1843              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1844            } else {
1845              
1846            }
1847          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1848          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1849                                q["&],                                q["&<],
1850                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1851    
1852          ## Stay in the state          ## Stay in the state
# Line 1568  sub _get_next_token ($) { Line 1864  sub _get_next_token ($) {
1864          redo A;          redo A;
1865        }        }
1866      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1867          ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1868          ## ATTLIST attribute value single quoted state".
1869    
1870        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1871                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1872          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1873              ## XML5: "DOCTYPE ATTLIST name after state".
1874              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1875              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1876            } else {
1877              
1878              ## XML5: "Before attribute name state" (sic).
1879              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1880            }
1881                    
1882      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1883        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1585  sub _get_next_token ($) { Line 1892  sub _get_next_token ($) {
1892          redo A;          redo A;
1893        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1894                    
1895            ## XML5: Not defined yet.
1896    
1897          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1898          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1899          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1609  sub _get_next_token ($) { Line 1918  sub _get_next_token ($) {
1918          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1919                        
1920            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1921    
1922              $self->{state} = DATA_STATE;
1923              $self->{s_kwd} = '';
1924              ## reconsume
1925              return  ($self->{ct}); # start tag
1926              redo A;
1927          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1928            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1929            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1618  sub _get_next_token ($) { Line 1933  sub _get_next_token ($) {
1933              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1934                            
1935            }            }
1936    
1937              $self->{state} = DATA_STATE;
1938              $self->{s_kwd} = '';
1939              ## reconsume
1940              return  ($self->{ct}); # end tag
1941              redo A;
1942            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1943              ## XML5: No parse error above; not defined yet.
1944              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1945              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1946              ## Reconsume.
1947              return  ($self->{ct}); # ATTLIST
1948              redo A;
1949          } else {          } else {
1950            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1951          }          }
         $self->{state} = DATA_STATE;  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1952        } else {        } else {
1953                    ## XML5 [ATTLIST]: Not defined yet.
1954            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1955              
1956              ## XML5: Not a parse error.
1957              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1958            } else {
1959              
1960            }
1961          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1962          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1963                                q['&],                                q['&<],
1964                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1965    
1966          ## Stay in the state          ## Stay in the state
# Line 1649  sub _get_next_token ($) { Line 1978  sub _get_next_token ($) {
1978          redo A;          redo A;
1979        }        }
1980      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1981          ## XML5: "Tag attribute value unquoted state".
1982    
1983        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1984                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1985          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            
1986              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1987              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1988            } else {
1989              
1990              ## XML5: "Tag attribute name before state".
1991              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1992            }
1993                    
1994      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1995        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1666  sub _get_next_token ($) { Line 2004  sub _get_next_token ($) {
2004          redo A;          redo A;
2005        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
2006                    
2007    
2008            ## XML5: Not defined yet.
2009    
2010          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
2011          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
2012          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1689  sub _get_next_token ($) { Line 2030  sub _get_next_token ($) {
2030          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2031                        
2032            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2033    
2034              $self->{state} = DATA_STATE;
2035              $self->{s_kwd} = '';
2036              
2037        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2038          $self->{line_prev} = $self->{line};
2039          $self->{column_prev} = $self->{column};
2040          $self->{column}++;
2041          $self->{nc}
2042              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2043        } else {
2044          $self->{set_nc}->($self);
2045        }
2046      
2047              return  ($self->{ct}); # start tag
2048              redo A;
2049          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2050            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2051            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1698  sub _get_next_token ($) { Line 2055  sub _get_next_token ($) {
2055              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2056                            
2057            }            }
2058          } else {  
2059            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2060          }            $self->{s_kwd} = '';
2061          $self->{state} = DATA_STATE;            
           
2062      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2063        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2064        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1713  sub _get_next_token ($) { Line 2069  sub _get_next_token ($) {
2069        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2070      }      }
2071        
2072              return  ($self->{ct}); # end tag
2073          return  ($self->{ct}); # start tag or end tag            redo A;
2074            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2075          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2076              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2077              
2078        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2079          $self->{line_prev} = $self->{line};
2080          $self->{column_prev} = $self->{column};
2081          $self->{column}++;
2082          $self->{nc}
2083              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2084        } else {
2085          $self->{set_nc}->($self);
2086        }
2087      
2088              return  ($self->{ct}); # ATTLIST
2089              redo A;
2090            } else {
2091              die "$0: $self->{ct}->{type}: Unknown token type";
2092            }
2093        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2094          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2095                        
2096              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2097            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2098    
2099              $self->{state} = DATA_STATE;
2100              $self->{s_kwd} = '';
2101              ## reconsume
2102              return  ($self->{ct}); # start tag
2103              redo A;
2104          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2105              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2106            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2107            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2108                            
# Line 1731  sub _get_next_token ($) { Line 2111  sub _get_next_token ($) {
2111              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2112                            
2113            }            }
2114    
2115              $self->{state} = DATA_STATE;
2116              $self->{s_kwd} = '';
2117              ## reconsume
2118              return  ($self->{ct}); # end tag
2119              redo A;
2120            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2121              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2122              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2123              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2124              ## Reconsume.
2125              return  ($self->{ct}); # ATTLIST
2126              redo A;
2127          } else {          } else {
2128            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2129          }          }
         $self->{state} = DATA_STATE;  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2130        } else {        } else {
2131          if ({          if ({
2132               0x0022 => 1, # "               0x0022 => 1, # "
# Line 1747  sub _get_next_token ($) { Line 2134  sub _get_next_token ($) {
2134               0x003D => 1, # =               0x003D => 1, # =
2135              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2136                        
2137              ## XML5: Not a parse error.
2138            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
2139          } else {          } else {
2140                        
# Line 1803  sub _get_next_token ($) { Line 2191  sub _get_next_token ($) {
2191            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2192          }          }
2193          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2194            $self->{s_kwd} = '';
2195                    
2196      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2197        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1850  sub _get_next_token ($) { Line 2239  sub _get_next_token ($) {
2239            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2240          }          }
2241          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2242            $self->{s_kwd} = '';
2243          ## Reconsume.          ## Reconsume.
2244          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
2245          redo A;          redo A;
# Line 1861  sub _get_next_token ($) { Line 2251  sub _get_next_token ($) {
2251          redo A;          redo A;
2252        }        }
2253      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2254          ## XML5: "Empty tag state".
2255    
2256        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2257          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2258                        
# Line 1880  sub _get_next_token ($) { Line 2272  sub _get_next_token ($) {
2272          }          }
2273    
2274          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2275            $self->{s_kwd} = '';
2276                    
2277      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2278        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1911  sub _get_next_token ($) { Line 2304  sub _get_next_token ($) {
2304          } else {          } else {
2305            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2306          }          }
2307            ## XML5: "Tag attribute name before state".
2308          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2309            $self->{s_kwd} = '';
2310          ## Reconsume.          ## Reconsume.
2311          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
2312          redo A;          redo A;
# Line 1924  sub _get_next_token ($) { Line 2319  sub _get_next_token ($) {
2319          redo A;          redo A;
2320        }        }
2321      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2322        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2323    
2324        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2325        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2326                
2327        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2328                    if ($self->{in_subset}) {
2329          $self->{state} = DATA_STATE;            
2330              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2331            } else {
2332              
2333              $self->{state} = DATA_STATE;
2334              $self->{s_kwd} = '';
2335            }
2336                    
2337      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2338        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1947  sub _get_next_token ($) { Line 2348  sub _get_next_token ($) {
2348          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2349          redo A;          redo A;
2350        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2351                    if ($self->{in_subset}) {
2352          $self->{state} = DATA_STATE;            
2353              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2354            } else {
2355              
2356              $self->{state} = DATA_STATE;
2357              $self->{s_kwd} = '';
2358            }
2359          ## reconsume          ## reconsume
2360    
2361          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 1975  sub _get_next_token ($) { Line 2382  sub _get_next_token ($) {
2382          redo A;          redo A;
2383        }        }
2384      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2385        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2386                
2387        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2388                    
# Line 1997  sub _get_next_token ($) { Line 2404  sub _get_next_token ($) {
2404          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2405                    
2406          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2407          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2408                    
2409      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2410        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2010  sub _get_next_token ($) { Line 2417  sub _get_next_token ($) {
2417      }      }
2418        
2419          redo A;          redo A;
2420        } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and        } elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2421                 $self->{open_elements}->[-1]->[1] & FOREIGN_EL and                   $self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
2422                    $self->{is_xml}) and
2423                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2424                                                    
2425          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2426          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2427                    
2428      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2429        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2049  sub _get_next_token ($) { Line 2457  sub _get_next_token ($) {
2457                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2458                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2459                                   };                                   };
2460          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2461                    
2462      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2463        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2085  sub _get_next_token ($) { Line 2493  sub _get_next_token ($) {
2493              0x0054, # T              0x0054, # T
2494              0x0059, # Y              0x0059, # Y
2495              0x0050, # P              0x0050, # P
2496            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2497            $self->{nc} == [            $self->{nc} == [
2498              undef,              undef,
2499              0x006F, # o              0x006F, # o
# Line 2093  sub _get_next_token ($) { Line 2501  sub _get_next_token ($) {
2501              0x0074, # t              0x0074, # t
2502              0x0079, # y              0x0079, # y
2503              0x0070, # p              0x0070, # p
2504            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2505                    
2506          ## Stay in the state.          ## Stay in the state.
2507          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2508                    
2509      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2510        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2109  sub _get_next_token ($) { Line 2517  sub _get_next_token ($) {
2517      }      }
2518        
2519          redo A;          redo A;
2520        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2521                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2522                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2523                    if ($self->{is_xml} and
2524                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2525              
2526              ## XML5: case-sensitive.
2527              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2528                              text => 'DOCTYPE',
2529                              line => $self->{line_prev},
2530                              column => $self->{column_prev} - 5);
2531            } else {
2532              
2533            }
2534          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2535          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2536                                    quirks => 1,                                    quirks => 1,
# Line 2135  sub _get_next_token ($) { Line 2553  sub _get_next_token ($) {
2553                                    
2554          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2555                          line => $self->{line_prev},                          line => $self->{line_prev},
2556                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2557          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2558          ## Reconsume.          ## Reconsume.
2559          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2560                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2561                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2562                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2563                                   };                                   };
2564          redo A;          redo A;
2565        }        }
# Line 2152  sub _get_next_token ($) { Line 2570  sub _get_next_token ($) {
2570              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2571              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2572              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2573            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2574                    
2575          ## Stay in the state.          ## Stay in the state.
2576          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2577                    
2578      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2579        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2168  sub _get_next_token ($) { Line 2586  sub _get_next_token ($) {
2586      }      }
2587        
2588          redo A;          redo A;
2589        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2590                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2591                    if ($self->{is_xml} and
2592                not $self->{tainted} and
2593                @{$self->{open_elements} or []} == 0) {
2594              
2595              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2596                              line => $self->{line_prev},
2597                              column => $self->{column_prev} - 7);
2598              $self->{tainted} = 1;
2599            } else {
2600              
2601            }
2602    
2603          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2604                                    data => '',                                    data => '',
2605                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2192  sub _get_next_token ($) { Line 2621  sub _get_next_token ($) {
2621                    
2622          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2623                          line => $self->{line_prev},                          line => $self->{line_prev},
2624                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2625          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2626          ## Reconsume.          ## Reconsume.
2627          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2628                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2629                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2630                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2631                                   };                                   };
2632          redo A;          redo A;
2633        }        }
# Line 2219  sub _get_next_token ($) { Line 2648  sub _get_next_token ($) {
2648        
2649          redo A;          redo A;
2650        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2651          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2652          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2653              
2654              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2655            } else {
2656              
2657              $self->{state} = DATA_STATE;
2658              $self->{s_kwd} = '';
2659            }
2660                    
2661      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2662        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2238  sub _get_next_token ($) { Line 2673  sub _get_next_token ($) {
2673    
2674          redo A;          redo A;
2675        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2676          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2677          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2678              
2679              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2680            } else {
2681              
2682              $self->{state} = DATA_STATE;
2683              $self->{s_kwd} = '';
2684            }
2685          ## reconsume          ## reconsume
2686    
2687          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2281  sub _get_next_token ($) { Line 2722  sub _get_next_token ($) {
2722        
2723          redo A;          redo A;
2724        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2725          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2726          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2727              
2728              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2729            } else {
2730              
2731              $self->{state} = DATA_STATE;
2732              $self->{s_kwd} = '';
2733            }
2734                    
2735      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2736        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2300  sub _get_next_token ($) { Line 2747  sub _get_next_token ($) {
2747    
2748          redo A;          redo A;
2749        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2750          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2751          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2752              
2753              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2754            } else {
2755              
2756              $self->{state} = DATA_STATE;
2757              $self->{s_kwd} = '';
2758            }
2759          ## reconsume          ## reconsume
2760    
2761          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2327  sub _get_next_token ($) { Line 2780  sub _get_next_token ($) {
2780          redo A;          redo A;
2781        }        }
2782      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2783          ## XML5: "Comment state" and "DOCTYPE comment state".
2784    
2785        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2786                    
2787          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2343  sub _get_next_token ($) { Line 2798  sub _get_next_token ($) {
2798        
2799          redo A;          redo A;
2800        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2801          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2802          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2803              
2804              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2805            } else {
2806              
2807              $self->{state} = DATA_STATE;
2808              $self->{s_kwd} = '';
2809            }
2810          ## reconsume          ## reconsume
2811    
2812          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2373  sub _get_next_token ($) { Line 2834  sub _get_next_token ($) {
2834          redo A;          redo A;
2835        }        }
2836      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2837          ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2838    
2839        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2840                    
2841          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2389  sub _get_next_token ($) { Line 2852  sub _get_next_token ($) {
2852        
2853          redo A;          redo A;
2854        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2855          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2856          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2857              
2858              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2859            } else {
2860              
2861              $self->{state} = DATA_STATE;
2862              $self->{s_kwd} = '';
2863            }
2864          ## reconsume          ## reconsume
2865    
2866          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2415  sub _get_next_token ($) { Line 2884  sub _get_next_token ($) {
2884          redo A;          redo A;
2885        }        }
2886      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2887          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2888    
2889        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2890                    if ($self->{in_subset}) {
2891          $self->{state} = DATA_STATE;            
2892              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2893            } else {
2894              
2895              $self->{state} = DATA_STATE;
2896              $self->{s_kwd} = '';
2897            }
2898                    
2899      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2900        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2435  sub _get_next_token ($) { Line 2912  sub _get_next_token ($) {
2912          redo A;          redo A;
2913        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2914                    
2915            ## XML5: Not a parse error.
2916          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2917                          line => $self->{line_prev},                          line => $self->{line_prev},
2918                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2453  sub _get_next_token ($) { Line 2931  sub _get_next_token ($) {
2931        
2932          redo A;          redo A;
2933        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2934          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2935          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2936              
2937              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2938            } else {
2939              
2940              $self->{state} = DATA_STATE;
2941              $self->{s_kwd} = '';
2942            }
2943          ## reconsume          ## reconsume
2944    
2945          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2463  sub _get_next_token ($) { Line 2947  sub _get_next_token ($) {
2947          redo A;          redo A;
2948        } else {        } else {
2949                    
2950            ## XML5: Not a parse error.
2951          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2952                          line => $self->{line_prev},                          line => $self->{line_prev},
2953                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2499  sub _get_next_token ($) { Line 2984  sub _get_next_token ($) {
2984          redo A;          redo A;
2985        } else {        } else {
2986                    
2987            ## XML5: Unless EOF, swith to the bogus comment state.
2988          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
2989          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2990          ## reconsume          ## reconsume
2991          redo A;          redo A;
2992        }        }
2993      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2994          ## XML5: "DOCTYPE root name before state".
2995    
2996        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2997                    
2998          ## Stay in the state          ## Stay in the state
# Line 2522  sub _get_next_token ($) { Line 3010  sub _get_next_token ($) {
3010          redo A;          redo A;
3011        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3012                    
3013            ## XML5: No parse error.
3014          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3015          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3016            $self->{s_kwd} = '';
3017                    
3018      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3019        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2543  sub _get_next_token ($) { Line 3033  sub _get_next_token ($) {
3033                    
3034          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3035          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3036            $self->{s_kwd} = '';
3037          ## reconsume          ## reconsume
3038    
3039          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3040    
3041          redo A;          redo A;
3042          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3043            
3044            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3045            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3046            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3047            $self->{in_subset} = 1;
3048            
3049        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3050          $self->{line_prev} = $self->{line};
3051          $self->{column_prev} = $self->{column};
3052          $self->{column}++;
3053          $self->{nc}
3054              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3055        } else {
3056          $self->{set_nc}->($self);
3057        }
3058      
3059            return  ($self->{ct}); # DOCTYPE
3060            redo A;
3061        } else {        } else {
3062                    
3063          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2567  sub _get_next_token ($) { Line 3077  sub _get_next_token ($) {
3077          redo A;          redo A;
3078        }        }
3079      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3080  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3081    
3082          ## ISSUE: Redundant "First," in the spec.
3083    
3084        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3085                    
3086          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2586  sub _get_next_token ($) { Line 3099  sub _get_next_token ($) {
3099        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3100                    
3101          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3102            $self->{s_kwd} = '';
3103                    
3104      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3105        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2605  sub _get_next_token ($) { Line 3119  sub _get_next_token ($) {
3119                    
3120          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3121          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3122            $self->{s_kwd} = '';
3123          ## reconsume          ## reconsume
3124    
3125          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3126          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3127    
3128          redo A;          redo A;
3129          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3130            
3131            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3132            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3133            $self->{in_subset} = 1;
3134            
3135        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3136          $self->{line_prev} = $self->{line};
3137          $self->{column_prev} = $self->{column};
3138          $self->{column}++;
3139          $self->{nc}
3140              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3141        } else {
3142          $self->{set_nc}->($self);
3143        }
3144      
3145            return  ($self->{ct}); # DOCTYPE
3146            redo A;
3147        } else {        } else {
3148                    
3149          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2630  sub _get_next_token ($) { Line 3163  sub _get_next_token ($) {
3163          redo A;          redo A;
3164        }        }
3165      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3166          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3167          ## state", but implemented differently.
3168    
3169        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3170                    
3171          ## Stay in the state          ## Stay in the state
# Line 2648  sub _get_next_token ($) { Line 3184  sub _get_next_token ($) {
3184        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3185                    
3186          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3187            $self->{s_kwd} = '';
3188                    
3189      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3190        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2667  sub _get_next_token ($) { Line 3204  sub _get_next_token ($) {
3204                    
3205          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3206          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3207            $self->{s_kwd} = '';
3208          ## reconsume          ## reconsume
3209    
3210          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2675  sub _get_next_token ($) { Line 3213  sub _get_next_token ($) {
3213          redo A;          redo A;
3214        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3215                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3216            
3217          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3218          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3219                    
3220      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3221        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2691  sub _get_next_token ($) { Line 3230  sub _get_next_token ($) {
3230          redo A;          redo A;
3231        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3232                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3233            
3234          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3235          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3236                    
3237      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3238        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2705  sub _get_next_token ($) { Line 3245  sub _get_next_token ($) {
3245      }      }
3246        
3247          redo A;          redo A;
3248          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3249            
3250            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3251            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3252            $self->{in_subset} = 1;
3253            
3254        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3255          $self->{line_prev} = $self->{line};
3256          $self->{column_prev} = $self->{column};
3257          $self->{column}++;
3258          $self->{nc}
3259              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3260        } else {
3261          $self->{set_nc}->($self);
3262        }
3263      
3264            return  ($self->{ct}); # DOCTYPE
3265            redo A;
3266        } else {        } else {
3267                    
3268          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');
# Line 2732  sub _get_next_token ($) { Line 3290  sub _get_next_token ($) {
3290              0x0042, # B              0x0042, # B
3291              0x004C, # L              0x004C, # L
3292              0x0049, # I              0x0049, # I
3293            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3294            $self->{nc} == [            $self->{nc} == [
3295              undef,              undef,
3296              0x0075, # u              0x0075, # u
3297              0x0062, # b              0x0062, # b
3298              0x006C, # l              0x006C, # l
3299              0x0069, # i              0x0069, # i
3300            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3301                    
3302          ## Stay in the state.          ## Stay in the state.
3303          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3304                    
3305      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3306        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2755  sub _get_next_token ($) { Line 3313  sub _get_next_token ($) {
3313      }      }
3314        
3315          redo A;          redo A;
3316        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3317                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3318                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3319                    if ($self->{is_xml} and
3320                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3321              
3322              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3323                              text => 'PUBLIC',
3324                              line => $self->{line_prev},
3325                              column => $self->{column_prev} - 4);
3326            } else {
3327              
3328            }
3329          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3330                    
3331      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2776  sub _get_next_token ($) { Line 3343  sub _get_next_token ($) {
3343                    
3344          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',
3345                          line => $self->{line_prev},                          line => $self->{line_prev},
3346                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3347          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3348    
3349          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
# Line 2791  sub _get_next_token ($) { Line 3358  sub _get_next_token ($) {
3358              0x0053, # S              0x0053, # S
3359              0x0054, # T              0x0054, # T
3360              0x0045, # E              0x0045, # E
3361            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3362            $self->{nc} == [            $self->{nc} == [
3363              undef,              undef,
3364              0x0079, # y              0x0079, # y
3365              0x0073, # s              0x0073, # s
3366              0x0074, # t              0x0074, # t
3367              0x0065, # e              0x0065, # e
3368            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3369                    
3370          ## Stay in the state.          ## Stay in the state.
3371          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3372                    
3373      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3374        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2814  sub _get_next_token ($) { Line 3381  sub _get_next_token ($) {
3381      }      }
3382        
3383          redo A;          redo A;
3384        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3385                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3386                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3387                    if ($self->{is_xml} and
3388                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3389              
3390              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3391                              text => 'SYSTEM',
3392                              line => $self->{line_prev},
3393                              column => $self->{column_prev} - 4);
3394            } else {
3395              
3396            }
3397          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3398                    
3399      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2835  sub _get_next_token ($) { Line 3411  sub _get_next_token ($) {
3411                    
3412          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',
3413                          line => $self->{line_prev},                          line => $self->{line_prev},
3414                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3415          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3416    
3417          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
# Line 2895  sub _get_next_token ($) { Line 3471  sub _get_next_token ($) {
3471          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3472    
3473          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3474            $self->{s_kwd} = '';
3475                    
3476      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3477        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2916  sub _get_next_token ($) { Line 3493  sub _get_next_token ($) {
3493          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3494    
3495          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3496            $self->{s_kwd} = '';
3497          ## reconsume          ## reconsume
3498    
3499          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3500          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3501    
3502          redo A;          redo A;
3503          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3504            
3505            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3506            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3507            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3508            $self->{in_subset} = 1;
3509            
3510        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3511          $self->{line_prev} = $self->{line};
3512          $self->{column_prev} = $self->{column};
3513          $self->{column}++;
3514          $self->{nc}
3515              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3516        } else {
3517          $self->{set_nc}->($self);
3518        }
3519      
3520            return  ($self->{ct}); # DOCTYPE
3521            redo A;
3522        } else {        } else {
3523                    
3524          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
# Line 2962  sub _get_next_token ($) { Line 3559  sub _get_next_token ($) {
3559          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3560    
3561          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3562            $self->{s_kwd} = '';
3563                    
3564      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3565        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2983  sub _get_next_token ($) { Line 3581  sub _get_next_token ($) {
3581          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3582    
3583          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3584            $self->{s_kwd} = '';
3585          ## reconsume          ## reconsume
3586    
3587          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3031  sub _get_next_token ($) { Line 3630  sub _get_next_token ($) {
3630          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3631    
3632          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3633            $self->{s_kwd} = '';
3634                    
3635      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3636        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3052  sub _get_next_token ($) { Line 3652  sub _get_next_token ($) {
3652          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3653    
3654          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3655            $self->{s_kwd} = '';
3656          ## reconsume          ## reconsume
3657    
3658          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3128  sub _get_next_token ($) { Line 3729  sub _get_next_token ($) {
3729        
3730          redo A;          redo A;
3731        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3732                    if ($self->{is_xml}) {
3733              
3734              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3735            } else {
3736              
3737            }
3738          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3739            $self->{s_kwd} = '';
3740                    
3741      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3742        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3150  sub _get_next_token ($) { Line 3757  sub _get_next_token ($) {
3757          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3758    
3759          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3760            $self->{s_kwd} = '';
3761          ## reconsume          ## reconsume
3762    
3763          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3764          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3765    
3766          redo A;          redo A;
3767          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3768            
3769            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3770            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3771            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3772            $self->{in_subset} = 1;
3773            
3774        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3775          $self->{line_prev} = $self->{line};
3776          $self->{column_prev} = $self->{column};
3777          $self->{column}++;
3778          $self->{nc}
3779              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3780        } else {
3781          $self->{set_nc}->($self);
3782        }
3783      
3784            return  ($self->{ct}); # DOCTYPE
3785            redo A;
3786        } else {        } else {
3787                    
3788          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
# Line 3227  sub _get_next_token ($) { Line 3854  sub _get_next_token ($) {
3854                    
3855          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3856          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3857            $self->{s_kwd} = '';
3858                    
3859      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3860        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3248  sub _get_next_token ($) { Line 3876  sub _get_next_token ($) {
3876          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3877    
3878          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3879            $self->{s_kwd} = '';
3880          ## reconsume          ## reconsume
3881    
3882          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3883          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3884    
3885          redo A;          redo A;
3886          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3887            
3888            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3889    
3890            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3891            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3892            $self->{in_subset} = 1;
3893            
3894        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3895          $self->{line_prev} = $self->{line};
3896          $self->{column_prev} = $self->{column};
3897          $self->{column}++;
3898          $self->{nc}
3899              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3900        } else {
3901          $self->{set_nc}->($self);
3902        }
3903      
3904            return  ($self->{ct}); # DOCTYPE
3905            redo A;
3906        } else {        } else {
3907                    
3908          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
# Line 3289  sub _get_next_token ($) { Line 3938  sub _get_next_token ($) {
3938      }      }
3939        
3940          redo A;          redo A;
3941        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
3942                    
3943          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3944    
3945          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3946            $self->{s_kwd} = '';
3947                    
3948      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3949        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3315  sub _get_next_token ($) { Line 3965  sub _get_next_token ($) {
3965          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3966    
3967          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3968            $self->{s_kwd} = '';
3969          ## reconsume          ## reconsume
3970    
3971          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3358  sub _get_next_token ($) { Line 4009  sub _get_next_token ($) {
4009      }      }
4010        
4011          redo A;          redo A;
4012        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4013                    
4014          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4015    
4016          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4017            $self->{s_kwd} = '';
4018                    
4019      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4020        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3384  sub _get_next_token ($) { Line 4036  sub _get_next_token ($) {
4036          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4037    
4038          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4039            $self->{s_kwd} = '';
4040          ## reconsume          ## reconsume
4041    
4042          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 3430  sub _get_next_token ($) { Line 4083  sub _get_next_token ($) {
4083        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4084                    
4085          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4086            $self->{s_kwd} = '';
4087                    
4088      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4089        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3449  sub _get_next_token ($) { Line 4103  sub _get_next_token ($) {
4103                    
4104          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4105          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4106            $self->{s_kwd} = '';
4107          ## reconsume          ## reconsume
4108    
4109          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
4110          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4111    
4112          redo A;          redo A;
4113          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4114            
4115            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4116            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4117            $self->{in_subset} = 1;
4118            
4119        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4120          $self->{line_prev} = $self->{line};
4121          $self->{column_prev} = $self->{column};
4122          $self->{column}++;
4123          $self->{nc}
4124              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4125        } else {
4126          $self->{set_nc}->($self);
4127        }
4128      
4129            return  ($self->{ct}); # DOCTYPE
4130            redo A;
4131        } else {        } else {
4132                    
4133          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
# Line 3478  sub _get_next_token ($) { Line 4151  sub _get_next_token ($) {
4151        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4152                    
4153          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4154            $self->{s_kwd} = '';
4155                    
4156      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4157        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3493  sub _get_next_token ($) { Line 4167  sub _get_next_token ($) {
4167          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4168    
4169          redo A;          redo A;
4170          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4171            
4172            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4173            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4174            $self->{in_subset} = 1;
4175            
4176        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4177          $self->{line_prev} = $self->{line};
4178          $self->{column_prev} = $self->{column};
4179          $self->{column}++;
4180          $self->{nc}
4181              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4182        } else {
4183          $self->{set_nc}->($self);
4184        }
4185      
4186            return  ($self->{ct}); # DOCTYPE
4187            redo A;
4188        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4189                    
4190          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4191            $self->{s_kwd} = '';
4192          ## reconsume          ## reconsume
4193    
4194          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
# Line 3504  sub _get_next_token ($) { Line 4197  sub _get_next_token ($) {
4197        } else {        } else {
4198                    
4199          my $s = '';          my $s = '';
4200          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4201    
4202          ## Stay in the state          ## Stay in the state
4203                    
# Line 3524  sub _get_next_token ($) { Line 4217  sub _get_next_token ($) {
4217        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
4218        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
4219        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
4220    
4221          ## XML5: "CDATA state".
4222                
4223        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4224                    
# Line 3541  sub _get_next_token ($) { Line 4236  sub _get_next_token ($) {
4236        
4237          redo A;          redo A;
4238        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4239            if ($self->{is_xml}) {
4240              
4241              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
4242            } else {
4243              
4244            }
4245    
4246          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4247                    $self->{s_kwd} = '';
4248      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {          ## Reconsume.
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
4249          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
4250                        
4251            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3585  sub _get_next_token ($) { Line 4278  sub _get_next_token ($) {
4278    
4279        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
4280      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
4281          ## XML5: "CDATA bracket state".
4282    
4283        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4284                    
4285          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3602  sub _get_next_token ($) { Line 4297  sub _get_next_token ($) {
4297          redo A;          redo A;
4298        } else {        } else {
4299                    
4300            ## XML5: If EOF, "]" is not appended and changed to the data state.
4301          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
4302          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
4303          ## Reconsume.          ## Reconsume.
4304          redo A;          redo A;
4305        }        }
4306      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
4307          ## XML5: "CDATA end state".
4308    
4309        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4310          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4311            $self->{s_kwd} = '';
4312                    
4313      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4314        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3649  sub _get_next_token ($) { Line 4348  sub _get_next_token ($) {
4348                    
4349          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
4350          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
4351          ## Reconsume.          ## Reconsume. ## XML5: Emit.
4352          redo A;          redo A;
4353        }        }
4354      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3666  sub _get_next_token ($) { Line 4365  sub _get_next_token ($) {
4365        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4366                    
4367          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4368          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4369                    
4370      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4371        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3686  sub _get_next_token ($) { Line 4385  sub _get_next_token ($) {
4385                    
4386          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4387          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4388          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4389          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4390          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4391                    
4392      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3717  sub _get_next_token ($) { Line 4416  sub _get_next_token ($) {
4416        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4417                    
4418          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4419            $self->{s_kwd} = '';
4420          ## Reconsume.          ## Reconsume.
4421          return  ({type => CHARACTER_TOKEN, data => '&',          return  ({type => CHARACTER_TOKEN, data => '&',
4422                    line => $self->{line_prev},                    line => $self->{line_prev},
# Line 3727  sub _get_next_token ($) { Line 4427  sub _get_next_token ($) {
4427                    
4428          $self->{ca}->{value} .= '&';          $self->{ca}->{value} .= '&';
4429          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4430            $self->{s_kwd} = '';
4431          ## Reconsume.          ## Reconsume.
4432          redo A;          redo A;
4433        }        }
# Line 3735  sub _get_next_token ($) { Line 4436  sub _get_next_token ($) {
4436            $self->{nc} == 0x0058) { # X            $self->{nc} == 0x0058) { # X
4437                    
4438          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4439          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4440                    
4441      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4442        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3752  sub _get_next_token ($) { Line 4453  sub _get_next_token ($) {
4453                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4454                    
4455          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4456          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4457                    
4458      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4459        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3777  sub _get_next_token ($) { Line 4478  sub _get_next_token ($) {
4478          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
4479                        
4480            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4481              $self->{s_kwd} = '';
4482            ## Reconsume.            ## Reconsume.
4483            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4484                      data => '&#',                      data => '&#',
# Line 3788  sub _get_next_token ($) { Line 4490  sub _get_next_token ($) {
4490                        
4491            $self->{ca}->{value} .= '&#';            $self->{ca}->{value} .= '&#';
4492            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4493              $self->{s_kwd} = '';
4494            ## Reconsume.            ## Reconsume.
4495            redo A;            redo A;
4496          }          }
# Line 3796  sub _get_next_token ($) { Line 4499  sub _get_next_token ($) {
4499        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4500            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4501                    
4502          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4503          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4504                    
4505          ## Stay in the state.          ## Stay in the state.
4506                    
# Line 3833  sub _get_next_token ($) { Line 4536  sub _get_next_token ($) {
4536          #          #
4537        }        }
4538    
4539        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4540        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4541        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4542        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 3853  sub _get_next_token ($) { Line 4556  sub _get_next_token ($) {
4556        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4557                    
4558          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4559            $self->{s_kwd} = '';
4560          ## Reconsume.          ## Reconsume.
4561          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4562                      has_reference => 1,
4563                    line => $l, column => $c,                    line => $l, column => $c,
4564                   });                   });
4565          redo A;          redo A;
# Line 3863  sub _get_next_token ($) { Line 4568  sub _get_next_token ($) {
4568          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
4569          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
4570          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4571            $self->{s_kwd} = '';
4572          ## Reconsume.          ## Reconsume.
4573          redo A;          redo A;
4574        }        }
# Line 3873  sub _get_next_token ($) { Line 4579  sub _get_next_token ($) {
4579          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4580                    
4581          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4582          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4583          ## Reconsume.          ## Reconsume.
4584          redo A;          redo A;
4585        } else {        } else {
# Line 3888  sub _get_next_token ($) { Line 4594  sub _get_next_token ($) {
4594          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
4595                        
4596            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4597              $self->{s_kwd} = '';
4598            ## Reconsume.            ## Reconsume.
4599            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4600                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4601                      line => $self->{line_prev},                      line => $self->{line_prev},
4602                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4603                     });                     });
4604            redo A;            redo A;
4605          } else {          } else {
4606                        
4607            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4608            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4609              $self->{s_kwd} = '';
4610            ## Reconsume.            ## Reconsume.
4611            redo A;            redo A;
4612          }          }
# Line 3907  sub _get_next_token ($) { Line 4615  sub _get_next_token ($) {
4615        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4616          # 0..9          # 0..9
4617                    
4618          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4619          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4620          ## Stay in the state.          ## Stay in the state.
4621                    
4622      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3925  sub _get_next_token ($) { Line 4633  sub _get_next_token ($) {
4633        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4634                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4635                    
4636          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4637          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4638          ## Stay in the state.          ## Stay in the state.
4639                    
4640      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3943  sub _get_next_token ($) { Line 4651  sub _get_next_token ($) {
4651        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4652                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4653                    
4654          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4655          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4656          ## Stay in the state.          ## Stay in the state.
4657                    
4658      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3981  sub _get_next_token ($) { Line 4689  sub _get_next_token ($) {
4689          #          #
4690        }        }
4691    
4692        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4693        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4694        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4695        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4001  sub _get_next_token ($) { Line 4709  sub _get_next_token ($) {
4709        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4710                    
4711          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4712            $self->{s_kwd} = '';
4713          ## Reconsume.          ## Reconsume.
4714          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4715                      has_reference => 1,
4716                    line => $l, column => $c,                    line => $l, column => $c,
4717                   });                   });
4718          redo A;          redo A;
# Line 4011  sub _get_next_token ($) { Line 4721  sub _get_next_token ($) {
4721          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
4722          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
4723          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4724            $self->{s_kwd} = '';
4725          ## Reconsume.          ## Reconsume.
4726          redo A;          redo A;
4727        }        }
4728      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
4729        if (length $self->{s_kwd} < 30 and        if (length $self->{kwd} < 30 and
4730            ## NOTE: Some number greater than the maximum length of entity name            ## NOTE: Some number greater than the maximum length of entity name
4731            ((0x0041 <= $self->{nc} and # a            ((0x0041 <= $self->{nc} and # a
4732              $self->{nc} <= 0x005A) or # x              $self->{nc} <= 0x005A) or # x
# Line 4025  sub _get_next_token ($) { Line 4736  sub _get_next_token ($) {
4736              $self->{nc} <= 0x0039) or # 9              $self->{nc} <= 0x0039) or # 9
4737             $self->{nc} == 0x003B)) { # ;             $self->{nc} == 0x003B)) { # ;
4738          our $EntityChar;          our $EntityChar;
4739          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4740          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}}) {
4741            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
4742                            
4743              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
4744              $self->{entity__match} = 1;              $self->{entity__match} = 1;
4745                            
4746      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4045  sub _get_next_token ($) { Line 4756  sub _get_next_token ($) {
4756              #              #
4757            } else {            } else {
4758                            
4759              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
4760              $self->{entity__match} = -1;              $self->{entity__match} = -1;
4761              ## Stay in the state.              ## Stay in the state.
4762                            
# Line 4093  sub _get_next_token ($) { Line 4804  sub _get_next_token ($) {
4804          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
4805              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
4806                        
4807            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
4808            #            #
4809          } else {          } else {
4810                        
# Line 4105  sub _get_next_token ($) { Line 4816  sub _get_next_token ($) {
4816                    
4817          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4818                          line => $self->{line_prev},                          line => $self->{line_prev},
4819                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
4820          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
4821          #          #
4822        }        }
4823        
# Line 4123  sub _get_next_token ($) { Line 4834  sub _get_next_token ($) {
4834        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4835                    
4836          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4837            $self->{s_kwd} = '';
4838          ## Reconsume.          ## Reconsume.
4839          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
4840                    data => $data,                    data => $data,
4841                      has_reference => $has_ref,
4842                    line => $self->{line_prev},                    line => $self->{line_prev},
4843                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
4844                   });                   });
4845          redo A;          redo A;
4846        } else {        } else {
# Line 4135  sub _get_next_token ($) { Line 4848  sub _get_next_token ($) {
4848          $self->{ca}->{value} .= $data;          $self->{ca}->{value} .= $data;
4849          $self->{ca}->{has_reference} = 1 if $has_ref;          $self->{ca}->{has_reference} = 1 if $has_ref;
4850          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4851            $self->{s_kwd} = '';
4852            ## Reconsume.
4853            redo A;
4854          }
4855    
4856        ## XML-only states
4857    
4858        } elsif ($self->{state} == PI_STATE) {
4859          ## XML5: "Pi state" and "DOCTYPE pi state".
4860    
4861          if ($is_space->{$self->{nc}} or
4862              $self->{nc} == 0x003F or # ?
4863              $self->{nc} == -1) {
4864            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
4865            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
4866            ## "DOCTYPE pi state": Parse error, switch to the "data
4867            ## state".
4868            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
4869                            line => $self->{line_prev},
4870                            column => $self->{column_prev}
4871                                - 1 * ($self->{nc} != -1));
4872            $self->{state} = BOGUS_COMMENT_STATE;
4873            ## Reconsume.
4874            $self->{ct} = {type => COMMENT_TOKEN,
4875                           data => '?',
4876                           line => $self->{line_prev},
4877                           column => $self->{column_prev}
4878                               - 1 * ($self->{nc} != -1),
4879                          };
4880            redo A;
4881          } else {
4882            ## XML5: "DOCTYPE pi state": Stay in the state.
4883            $self->{ct} = {type => PI_TOKEN,
4884                           target => chr $self->{nc},
4885                           data => '',
4886                           line => $self->{line_prev},
4887                           column => $self->{column_prev} - 1,
4888                          };
4889            $self->{state} = PI_TARGET_STATE;
4890            
4891        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4892          $self->{line_prev} = $self->{line};
4893          $self->{column_prev} = $self->{column};
4894          $self->{column}++;
4895          $self->{nc}
4896              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4897        } else {
4898          $self->{set_nc}->($self);
4899        }
4900      
4901            redo A;
4902          }
4903        } elsif ($self->{state} == PI_TARGET_STATE) {
4904          if ($is_space->{$self->{nc}}) {
4905            $self->{state} = PI_TARGET_AFTER_STATE;
4906            
4907        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4908          $self->{line_prev} = $self->{line};
4909          $self->{column_prev} = $self->{column};
4910          $self->{column}++;
4911          $self->{nc}
4912              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4913        } else {
4914          $self->{set_nc}->($self);
4915        }
4916      
4917            redo A;
4918          } elsif ($self->{nc} == -1) {
4919            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4920            if ($self->{in_subset}) {
4921              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4922            } else {
4923              $self->{state} = DATA_STATE;
4924              $self->{s_kwd} = '';
4925            }
4926            ## Reconsume.
4927            return  ($self->{ct}); # pi
4928            redo A;
4929          } elsif ($self->{nc} == 0x003F) { # ?
4930            $self->{state} = PI_AFTER_STATE;
4931            
4932        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4933          $self->{line_prev} = $self->{line};
4934          $self->{column_prev} = $self->{column};
4935          $self->{column}++;
4936          $self->{nc}
4937              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4938        } else {
4939          $self->{set_nc}->($self);
4940        }
4941      
4942            redo A;
4943          } else {
4944            ## XML5: typo ("tag name" -> "target")
4945            $self->{ct}->{target} .= chr $self->{nc}; # pi
4946            
4947        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4948          $self->{line_prev} = $self->{line};
4949          $self->{column_prev} = $self->{column};
4950          $self->{column}++;
4951          $self->{nc}
4952              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4953        } else {
4954          $self->{set_nc}->($self);
4955        }
4956      
4957            redo A;
4958          }
4959        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
4960          if ($is_space->{$self->{nc}}) {
4961            ## Stay in the state.
4962            
4963        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4964          $self->{line_prev} = $self->{line};
4965          $self->{column_prev} = $self->{column};
4966          $self->{column}++;
4967          $self->{nc}
4968              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4969        } else {
4970          $self->{set_nc}->($self);
4971        }
4972      
4973            redo A;
4974          } else {
4975            $self->{state} = PI_DATA_STATE;
4976            ## Reprocess.
4977            redo A;
4978          }
4979        } elsif ($self->{state} == PI_DATA_STATE) {
4980          if ($self->{nc} == 0x003F) { # ?
4981            $self->{state} = PI_DATA_AFTER_STATE;
4982            
4983        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4984          $self->{line_prev} = $self->{line};
4985          $self->{column_prev} = $self->{column};
4986          $self->{column}++;
4987          $self->{nc}
4988              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4989        } else {
4990          $self->{set_nc}->($self);
4991        }
4992      
4993            redo A;
4994          } elsif ($self->{nc} == -1) {
4995            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4996            if ($self->{in_subset}) {
4997              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
4998            } else {
4999              $self->{state} = DATA_STATE;
5000              $self->{s_kwd} = '';
5001            }
5002            ## Reprocess.
5003            return  ($self->{ct}); # pi
5004            redo A;
5005          } else {
5006            $self->{ct}->{data} .= chr $self->{nc}; # pi
5007            $self->{read_until}->($self->{ct}->{data}, q[?],
5008                                  length $self->{ct}->{data});
5009            ## Stay in the state.
5010            
5011        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5012          $self->{line_prev} = $self->{line};
5013          $self->{column_prev} = $self->{column};
5014          $self->{column}++;
5015          $self->{nc}
5016              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5017        } else {
5018          $self->{set_nc}->($self);
5019        }
5020      
5021            ## Reprocess.
5022            redo A;
5023          }
5024        } elsif ($self->{state} == PI_AFTER_STATE) {
5025          ## XML5: Part of "Pi after state".
5026    
5027          if ($self->{nc} == 0x003E) { # >
5028            if ($self->{in_subset}) {
5029              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5030            } else {
5031              $self->{state} = DATA_STATE;
5032              $self->{s_kwd} = '';
5033            }
5034            
5035        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5036          $self->{line_prev} = $self->{line};
5037          $self->{column_prev} = $self->{column};
5038          $self->{column}++;
5039          $self->{nc}
5040              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5041        } else {
5042          $self->{set_nc}->($self);
5043        }
5044      
5045            return  ($self->{ct}); # pi
5046            redo A;
5047          } elsif ($self->{nc} == 0x003F) { # ?
5048            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5049                            line => $self->{line_prev},
5050                            column => $self->{column_prev}); ## XML5: no error
5051            $self->{ct}->{data} .= '?';
5052            $self->{state} = PI_DATA_AFTER_STATE;
5053            
5054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5055          $self->{line_prev} = $self->{line};
5056          $self->{column_prev} = $self->{column};
5057          $self->{column}++;
5058          $self->{nc}
5059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5060        } else {
5061          $self->{set_nc}->($self);
5062        }
5063      
5064            redo A;
5065          } else {
5066            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5067                            line => $self->{line_prev},
5068                            column => $self->{column_prev}
5069                                + 1 * ($self->{nc} == -1)); ## XML5: no error
5070            $self->{ct}->{data} .= '?'; ## XML5: not appended
5071            $self->{state} = PI_DATA_STATE;
5072            ## Reprocess.
5073            redo A;
5074          }
5075        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5076          ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5077    
5078          if ($self->{nc} == 0x003E) { # >
5079            if ($self->{in_subset}) {
5080              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5081            } else {
5082              $self->{state} = DATA_STATE;
5083              $self->{s_kwd} = '';
5084            }
5085            
5086        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5087          $self->{line_prev} = $self->{line};
5088          $self->{column_prev} = $self->{column};
5089          $self->{column}++;
5090          $self->{nc}
5091              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5092        } else {
5093          $self->{set_nc}->($self);
5094        }
5095      
5096            return  ($self->{ct}); # pi
5097            redo A;
5098          } elsif ($self->{nc} == 0x003F) { # ?
5099            $self->{ct}->{data} .= '?';
5100            ## Stay in the state.
5101            
5102        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5103          $self->{line_prev} = $self->{line};
5104          $self->{column_prev} = $self->{column};
5105          $self->{column}++;
5106          $self->{nc}
5107              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5108        } else {
5109          $self->{set_nc}->($self);
5110        }
5111      
5112            redo A;
5113          } else {
5114            $self->{ct}->{data} .= '?'; ## XML5: not appended
5115            $self->{state} = PI_DATA_STATE;
5116            ## Reprocess.
5117            redo A;
5118          }
5119    
5120        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5121          if ($self->{nc} == 0x003C) { # <
5122            $self->{state} = DOCTYPE_TAG_STATE;
5123            
5124        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5125          $self->{line_prev} = $self->{line};
5126          $self->{column_prev} = $self->{column};
5127          $self->{column}++;
5128          $self->{nc}
5129              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5130        } else {
5131          $self->{set_nc}->($self);
5132        }
5133      
5134            redo A;
5135          } elsif ($self->{nc} == 0x0025) { # %
5136            ## XML5: Not defined yet.
5137    
5138            ## TODO:
5139            
5140        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5141          $self->{line_prev} = $self->{line};
5142          $self->{column_prev} = $self->{column};
5143          $self->{column}++;
5144          $self->{nc}
5145              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5146        } else {
5147          $self->{set_nc}->($self);
5148        }
5149      
5150            redo A;
5151          } elsif ($self->{nc} == 0x005D) { # ]
5152            delete $self->{in_subset};
5153            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5154            
5155        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5156          $self->{line_prev} = $self->{line};
5157          $self->{column_prev} = $self->{column};
5158          $self->{column}++;
5159          $self->{nc}
5160              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5161        } else {
5162          $self->{set_nc}->($self);
5163        }
5164      
5165            redo A;
5166          } elsif ($is_space->{$self->{nc}}) {
5167            ## Stay in the state.
5168            
5169        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5170          $self->{line_prev} = $self->{line};
5171          $self->{column_prev} = $self->{column};
5172          $self->{column}++;
5173          $self->{nc}
5174              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5175        } else {
5176          $self->{set_nc}->($self);
5177        }
5178      
5179            redo A;
5180          } elsif ($self->{nc} == -1) {
5181            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5182            delete $self->{in_subset};
5183            $self->{state} = DATA_STATE;
5184            $self->{s_kwd} = '';
5185            ## Reconsume.
5186            return  ({type => END_OF_DOCTYPE_TOKEN});
5187            redo A;
5188          } else {
5189            unless ($self->{internal_subset_tainted}) {
5190              ## XML5: No parse error.
5191              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5192              $self->{internal_subset_tainted} = 1;
5193            }
5194            ## Stay in the state.
5195            
5196        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5197          $self->{line_prev} = $self->{line};
5198          $self->{column_prev} = $self->{column};
5199          $self->{column}++;
5200          $self->{nc}
5201              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5202        } else {
5203          $self->{set_nc}->($self);
5204        }
5205      
5206            redo A;
5207          }
5208        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5209          if ($self->{nc} == 0x003E) { # >
5210            $self->{state} = DATA_STATE;
5211            $self->{s_kwd} = '';
5212            
5213        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5214          $self->{line_prev} = $self->{line};
5215          $self->{column_prev} = $self->{column};
5216          $self->{column}++;
5217          $self->{nc}
5218              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5219        } else {
5220          $self->{set_nc}->($self);
5221        }
5222      
5223            return  ({type => END_OF_DOCTYPE_TOKEN});
5224            redo A;
5225          } elsif ($self->{nc} == -1) {
5226            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5227            $self->{state} = DATA_STATE;
5228            $self->{s_kwd} = '';
5229            ## Reconsume.
5230            return  ({type => END_OF_DOCTYPE_TOKEN});
5231            redo A;
5232          } else {
5233            ## XML5: No parse error and stay in the state.
5234            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5235    
5236            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5237            
5238        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5239          $self->{line_prev} = $self->{line};
5240          $self->{column_prev} = $self->{column};
5241          $self->{column}++;
5242          $self->{nc}
5243              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5244        } else {
5245          $self->{set_nc}->($self);
5246        }
5247      
5248            redo A;
5249          }
5250        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5251          if ($self->{nc} == 0x003E) { # >
5252            $self->{state} = DATA_STATE;
5253            $self->{s_kwd} = '';
5254            
5255        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5256          $self->{line_prev} = $self->{line};
5257          $self->{column_prev} = $self->{column};
5258          $self->{column}++;
5259          $self->{nc}
5260              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5261        } else {
5262          $self->{set_nc}->($self);
5263        }
5264      
5265            return  ({type => END_OF_DOCTYPE_TOKEN});
5266            redo A;
5267          } elsif ($self->{nc} == -1) {
5268            $self->{state} = DATA_STATE;
5269            $self->{s_kwd} = '';
5270            ## Reconsume.
5271            return  ({type => END_OF_DOCTYPE_TOKEN});
5272            redo A;
5273          } else {
5274            ## Stay in the state.
5275            
5276        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5277          $self->{line_prev} = $self->{line};
5278          $self->{column_prev} = $self->{column};
5279          $self->{column}++;
5280          $self->{nc}
5281              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5282        } else {
5283          $self->{set_nc}->($self);
5284        }
5285      
5286            redo A;
5287          }
5288        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5289          if ($self->{nc} == 0x0021) { # !
5290            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5291            
5292        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5293          $self->{line_prev} = $self->{line};
5294          $self->{column_prev} = $self->{column};
5295          $self->{column}++;
5296          $self->{nc}
5297              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5298        } else {
5299          $self->{set_nc}->($self);
5300        }
5301      
5302            redo A;
5303          } elsif ($self->{nc} == 0x003F) { # ?
5304            $self->{state} = PI_STATE;
5305            
5306        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5307          $self->{line_prev} = $self->{line};
5308          $self->{column_prev} = $self->{column};
5309          $self->{column}++;
5310          $self->{nc}
5311              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5312        } else {
5313          $self->{set_nc}->($self);
5314        }
5315      
5316            redo A;
5317          } elsif ($self->{nc} == -1) {
5318            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5319            $self->{state} = DATA_STATE;
5320            $self->{s_kwd} = '';
5321            ## Reconsume.
5322            redo A;
5323          } else {
5324            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5325                            line => $self->{line_prev},
5326                            column => $self->{column_prev});
5327            $self->{state} = BOGUS_COMMENT_STATE;
5328            $self->{ct} = {type => COMMENT_TOKEN,
5329                           data => '',
5330                          }; ## NOTE: Will be discarded.
5331            
5332        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5333          $self->{line_prev} = $self->{line};
5334          $self->{column_prev} = $self->{column};
5335          $self->{column}++;
5336          $self->{nc}
5337              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5338        } else {
5339          $self->{set_nc}->($self);
5340        }
5341      
5342            redo A;
5343          }
5344        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5345          ## XML5: "DOCTYPE markup declaration state".
5346          
5347          if ($self->{nc} == 0x002D) { # -
5348            $self->{state} = MD_HYPHEN_STATE;
5349            
5350        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5351          $self->{line_prev} = $self->{line};
5352          $self->{column_prev} = $self->{column};
5353          $self->{column}++;
5354          $self->{nc}
5355              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5356        } else {
5357          $self->{set_nc}->($self);
5358        }
5359      
5360            redo A;
5361          } elsif ($self->{nc} == 0x0045) { # E
5362            $self->{state} = MD_E_STATE;
5363            $self->{kwd} = chr $self->{nc};
5364            
5365        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5366          $self->{line_prev} = $self->{line};
5367          $self->{column_prev} = $self->{column};
5368          $self->{column}++;
5369          $self->{nc}
5370              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5371        } else {
5372          $self->{set_nc}->($self);
5373        }
5374      
5375            redo A;
5376          } elsif ($self->{nc} == 0x0041) { # A
5377            $self->{state} = MD_ATTLIST_STATE;
5378            $self->{kwd} = chr $self->{nc};
5379            
5380        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5381          $self->{line_prev} = $self->{line};
5382          $self->{column_prev} = $self->{column};
5383          $self->{column}++;
5384          $self->{nc}
5385              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5386        } else {
5387          $self->{set_nc}->($self);
5388        }
5389      
5390            redo A;
5391          } elsif ($self->{nc} == 0x004E) { # N
5392            $self->{state} = MD_NOTATION_STATE;
5393            $self->{kwd} = chr $self->{nc};
5394            
5395        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5396          $self->{line_prev} = $self->{line};
5397          $self->{column_prev} = $self->{column};
5398          $self->{column}++;
5399          $self->{nc}
5400              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5401        } else {
5402          $self->{set_nc}->($self);
5403        }
5404      
5405            redo A;
5406          } else {
5407            #
5408          }
5409          
5410          ## XML5: No parse error.
5411          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5412                          line => $self->{line_prev},
5413                          column => $self->{column_prev} - 1);
5414          ## Reconsume.
5415          $self->{state} = BOGUS_COMMENT_STATE;
5416          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5417          redo A;
5418        } elsif ($self->{state} == MD_E_STATE) {
5419          if ($self->{nc} == 0x004E) { # N
5420            $self->{state} = MD_ENTITY_STATE;
5421            $self->{kwd} .= chr $self->{nc};
5422            
5423        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5424          $self->{line_prev} = $self->{line};
5425          $self->{column_prev} = $self->{column};
5426          $self->{column}++;
5427          $self->{nc}
5428              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5429        } else {
5430          $self->{set_nc}->($self);
5431        }
5432      
5433            redo A;
5434          } elsif ($self->{nc} == 0x004C) { # L
5435            ## XML5: <!ELEMENT> not supported.
5436            $self->{state} = MD_ELEMENT_STATE;
5437            $self->{kwd} .= chr $self->{nc};
5438            
5439        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5440          $self->{line_prev} = $self->{line};
5441          $self->{column_prev} = $self->{column};
5442          $self->{column}++;
5443          $self->{nc}
5444              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5445        } else {
5446          $self->{set_nc}->($self);
5447        }
5448      
5449            redo A;
5450          } else {
5451            ## XML5: No parse error.
5452            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5453                            line => $self->{line_prev},
5454                            column => $self->{column_prev} - 2
5455                                + 1 * ($self->{nc} == -1));
5456            ## Reconsume.
5457            $self->{state} = BOGUS_COMMENT_STATE;
5458            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5459            redo A;
5460          }
5461        } elsif ($self->{state} == MD_ENTITY_STATE) {
5462          if ($self->{nc} == {
5463                'EN' => 0x0054, # T
5464                'ENT' => 0x0049, # I
5465                'ENTI' => 0x0054, # T
5466              }->{$self->{kwd}}) {
5467            ## Stay in the state.
5468            $self->{kwd} .= chr $self->{nc};
5469            
5470        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5471          $self->{line_prev} = $self->{line};
5472          $self->{column_prev} = $self->{column};
5473          $self->{column}++;
5474          $self->{nc}
5475              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5476        } else {
5477          $self->{set_nc}->($self);
5478        }
5479      
5480            redo A;
5481          } elsif ($self->{kwd} eq 'ENTIT' and
5482                   $self->{nc} == 0x0059) { # Y
5483            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',
5484                           line => $self->{line_prev},
5485                           column => $self->{column_prev} - 6};
5486            $self->{state} = DOCTYPE_MD_STATE;
5487            
5488        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5489          $self->{line_prev} = $self->{line};
5490          $self->{column_prev} = $self->{column};
5491          $self->{column}++;
5492          $self->{nc}
5493              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5494        } else {
5495          $self->{set_nc}->($self);
5496        }
5497      
5498            redo A;
5499          } else {
5500            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5501                            line => $self->{line_prev},
5502                            column => $self->{column_prev} - 1
5503                                - (length $self->{kwd})
5504                                + 1 * ($self->{nc} == -1));
5505            $self->{state} = BOGUS_COMMENT_STATE;
5506            ## Reconsume.
5507            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5508            redo A;
5509          }
5510        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5511          if ($self->{nc} == {
5512                'EL' => 0x0045, # E
5513                'ELE' => 0x004D, # M
5514                'ELEM' => 0x0045, # E
5515                'ELEME' => 0x004E, # N
5516              }->{$self->{kwd}}) {
5517            ## Stay in the state.
5518            $self->{kwd} .= chr $self->{nc};
5519            
5520        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5521          $self->{line_prev} = $self->{line};
5522          $self->{column_prev} = $self->{column};
5523          $self->{column}++;
5524          $self->{nc}
5525              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5526        } else {
5527          $self->{set_nc}->($self);
5528        }
5529      
5530            redo A;
5531          } elsif ($self->{kwd} eq 'ELEMEN' and
5532                   $self->{nc} == 0x0054) { # T
5533            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5534                           line => $self->{line_prev},
5535                           column => $self->{column_prev} - 6};
5536            $self->{state} = DOCTYPE_MD_STATE;
5537            
5538        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5539          $self->{line_prev} = $self->{line};
5540          $self->{column_prev} = $self->{column};
5541          $self->{column}++;
5542          $self->{nc}
5543              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5544        } else {
5545          $self->{set_nc}->($self);
5546        }
5547      
5548            redo A;
5549          } else {
5550            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5551                            line => $self->{line_prev},
5552                            column => $self->{column_prev} - 1
5553                                - (length $self->{kwd})
5554                                + 1 * ($self->{nc} == -1));
5555            $self->{state} = BOGUS_COMMENT_STATE;
5556            ## Reconsume.
5557            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5558            redo A;
5559          }
5560        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5561          if ($self->{nc} == {
5562                'A' => 0x0054, # T
5563                'AT' => 0x0054, # T
5564                'ATT' => 0x004C, # L
5565                'ATTL' => 0x0049, # I
5566                'ATTLI' => 0x0053, # S
5567              }->{$self->{kwd}}) {
5568            ## Stay in the state.
5569            $self->{kwd} .= chr $self->{nc};
5570            
5571        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5572          $self->{line_prev} = $self->{line};
5573          $self->{column_prev} = $self->{column};
5574          $self->{column}++;
5575          $self->{nc}
5576              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5577        } else {
5578          $self->{set_nc}->($self);
5579        }
5580      
5581            redo A;
5582          } elsif ($self->{kwd} eq 'ATTLIS' and
5583                   $self->{nc} == 0x0054) { # T
5584            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5585                           attrdefs => [],
5586                           line => $self->{line_prev},
5587                           column => $self->{column_prev} - 6};
5588            $self->{state} = DOCTYPE_MD_STATE;
5589            
5590        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5591          $self->{line_prev} = $self->{line};
5592          $self->{column_prev} = $self->{column};
5593          $self->{column}++;
5594          $self->{nc}
5595              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5596        } else {
5597          $self->{set_nc}->($self);
5598        }
5599      
5600            redo A;
5601          } else {
5602            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5603                            line => $self->{line_prev},
5604                            column => $self->{column_prev} - 1
5605                                 - (length $self->{kwd})
5606                                 + 1 * ($self->{nc} == -1));
5607            $self->{state} = BOGUS_COMMENT_STATE;
5608            ## Reconsume.
5609            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5610            redo A;
5611          }
5612        } elsif ($self->{state} == MD_NOTATION_STATE) {
5613          if ($self->{nc} == {
5614                'N' => 0x004F, # O
5615                'NO' => 0x0054, # T
5616                'NOT' => 0x0041, # A
5617                'NOTA' => 0x0054, # T
5618                'NOTAT' => 0x0049, # I
5619                'NOTATI' => 0x004F, # O
5620              }->{$self->{kwd}}) {
5621            ## Stay in the state.
5622            $self->{kwd} .= chr $self->{nc};
5623            
5624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5625          $self->{line_prev} = $self->{line};
5626          $self->{column_prev} = $self->{column};
5627          $self->{column}++;
5628          $self->{nc}
5629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5630        } else {
5631          $self->{set_nc}->($self);
5632        }
5633      
5634            redo A;
5635          } elsif ($self->{kwd} eq 'NOTATIO' and
5636                   $self->{nc} == 0x004E) { # N
5637            $self->{ct} = {type => NOTATION_TOKEN, name => '',
5638                           line => $self->{line_prev},
5639                           column => $self->{column_prev} - 6};
5640            $self->{state} = DOCTYPE_MD_STATE;
5641            
5642        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5643          $self->{line_prev} = $self->{line};
5644          $self->{column_prev} = $self->{column};
5645          $self->{column}++;
5646          $self->{nc}
5647              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5648        } else {
5649          $self->{set_nc}->($self);
5650        }
5651      
5652            redo A;
5653          } else {
5654            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5655                            line => $self->{line_prev},
5656                            column => $self->{column_prev} - 1
5657                                - (length $self->{kwd})
5658                                + 1 * ($self->{nc} == -1));
5659            $self->{state} = BOGUS_COMMENT_STATE;
5660            ## Reconsume.
5661            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5662            redo A;
5663          }
5664        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
5665          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
5666          ## "DOCTYPE NOTATION state".
5667    
5668          if ($is_space->{$self->{nc}}) {
5669            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
5670            $self->{state} = BEFORE_MD_NAME_STATE;
5671            
5672        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5673          $self->{line_prev} = $self->{line};
5674          $self->{column_prev} = $self->{column};
5675          $self->{column}++;
5676          $self->{nc}
5677              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5678        } else {
5679          $self->{set_nc}->($self);
5680        }
5681      
5682            redo A;
5683          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5684                   $self->{nc} == 0x0025) { # %
5685            ## XML5: Switch to the "DOCTYPE bogus comment state".
5686            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5687            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5688            
5689        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5690          $self->{line_prev} = $self->{line};
5691          $self->{column_prev} = $self->{column};
5692          $self->{column}++;
5693          $self->{nc}
5694              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5695        } else {
5696          $self->{set_nc}->($self);
5697        }
5698      
5699            redo A;
5700          } elsif ($self->{nc} == -1) {
5701            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5702            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5703            ## Reconsume.
5704            redo A;
5705          } elsif ($self->{nc} == 0x003E) { # >
5706            ## XML5: Switch to the "DOCTYPE bogus comment state".
5707            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5708            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5709            
5710        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5711          $self->{line_prev} = $self->{line};
5712          $self->{column_prev} = $self->{column};
5713          $self->{column}++;
5714          $self->{nc}
5715              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5716        } else {
5717          $self->{set_nc}->($self);
5718        }
5719      
5720            redo A;
5721          } else {
5722            ## XML5: Switch to the "DOCTYPE bogus comment state".
5723            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5724            $self->{state} = BEFORE_MD_NAME_STATE;
5725            redo A;
5726          }
5727        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
5728          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
5729          ## before state", "DOCTYPE ATTLIST name before state".
5730    
5731          if ($is_space->{$self->{nc}}) {
5732            ## Stay in the state.
5733            
5734        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5735          $self->{line_prev} = $self->{line};
5736          $self->{column_prev} = $self->{column};
5737          $self->{column}++;
5738          $self->{nc}
5739              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5740        } else {
5741          $self->{set_nc}->($self);
5742        }
5743      
5744            redo A;
5745          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5746                   $self->{nc} == 0x0025) { # %
5747            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5748            
5749        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5750          $self->{line_prev} = $self->{line};
5751          $self->{column_prev} = $self->{column};
5752          $self->{column}++;
5753          $self->{nc}
5754              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5755        } else {
5756          $self->{set_nc}->($self);
5757        }
5758      
5759            redo A;
5760          } elsif ($self->{nc} == 0x003E) { # >
5761            ## XML5: Same as "Anything else".
5762            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5763            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5764            
5765        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5766          $self->{line_prev} = $self->{line};
5767          $self->{column_prev} = $self->{column};
5768          $self->{column}++;
5769          $self->{nc}
5770              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5771        } else {
5772          $self->{set_nc}->($self);
5773        }
5774      
5775            redo A;
5776          } elsif ($self->{nc} == -1) {
5777            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5778            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5779            ## Reconsume.
5780            redo A;
5781          } else {
5782            ## XML5: [ATTLIST] Not defined yet.
5783            $self->{ct}->{name} .= chr $self->{nc};
5784            $self->{state} = MD_NAME_STATE;
5785            
5786        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5787          $self->{line_prev} = $self->{line};
5788          $self->{column_prev} = $self->{column};
5789          $self->{column}++;
5790          $self->{nc}
5791              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5792        } else {
5793          $self->{set_nc}->($self);
5794        }
5795      
5796            redo A;
5797          }
5798        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
5799          if ($is_space->{$self->{nc}}) {
5800            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
5801            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
5802            $self->{state} = BEFORE_MD_NAME_STATE;
5803            
5804        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5805          $self->{line_prev} = $self->{line};
5806          $self->{column_prev} = $self->{column};
5807          $self->{column}++;
5808          $self->{nc}
5809              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5810        } else {
5811          $self->{set_nc}->($self);
5812        }
5813      
5814            redo A;
5815          } elsif ($self->{nc} == 0x003E) { # >
5816            ## XML5: Same as "Anything else".
5817            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5818            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5819            
5820        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5821          $self->{line_prev} = $self->{line};
5822          $self->{column_prev} = $self->{column};
5823          $self->{column}++;
5824          $self->{nc}
5825              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5826        } else {
5827          $self->{set_nc}->($self);
5828        }
5829      
5830            redo A;
5831          } elsif ($self->{nc} == -1) {
5832            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
5833            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5834            ## Reconsume.
5835            redo A;
5836          } else {
5837            ## XML5: No parse error.
5838            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
5839            $self->{state} = BOGUS_COMMENT_STATE;
5840            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5841            ## Reconsume.
5842            redo A;
5843          }
5844        } elsif ($self->{state} == MD_NAME_STATE) {
5845          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
5846          
5847          if ($is_space->{$self->{nc}}) {
5848            ## TODO:
5849            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
5850            
5851        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5852          $self->{line_prev} = $self->{line};
5853          $self->{column_prev} = $self->{column};
5854          $self->{column}++;
5855          $self->{nc}
5856              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5857        } else {
5858          $self->{set_nc}->($self);
5859        }
5860      
5861            redo A;
5862          } elsif ($self->{nc} == 0x003E) { # >
5863            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
5864              #
5865            } else {
5866              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md body'); ## TODO: type
5867            }
5868            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5869            
5870        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5871          $self->{line_prev} = $self->{line};
5872          $self->{column_prev} = $self->{column};
5873          $self->{column}++;
5874          $self->{nc}
5875              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5876        } else {
5877          $self->{set_nc}->($self);
5878        }
5879      
5880            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
5881            redo A;
5882          } elsif ($self->{nc} == -1) {
5883            ## XML5: [ATTLIST] No parse error.
5884            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
5885            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5886            ## Reconsume.
5887            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
5888            redo A;
5889          } else {
5890            ## XML5: [ATTLIST] Not defined yet.
5891            $self->{ct}->{name} .= chr $self->{nc};
5892            ## Stay in the state.
5893            
5894        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5895          $self->{line_prev} = $self->{line};
5896          $self->{column_prev} = $self->{column};
5897          $self->{column}++;
5898          $self->{nc}
5899              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5900        } else {
5901          $self->{set_nc}->($self);
5902        }
5903      
5904            redo A;
5905          }
5906        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
5907          if ($is_space->{$self->{nc}}) {
5908            ## Stay in the state.
5909            
5910        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5911          $self->{line_prev} = $self->{line};
5912          $self->{column_prev} = $self->{column};
5913          $self->{column}++;
5914          $self->{nc}
5915              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5916        } else {
5917          $self->{set_nc}->($self);
5918        }
5919      
5920            redo A;
5921          } elsif ($self->{nc} == 0x003E) { # >
5922            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5923            
5924        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5925          $self->{line_prev} = $self->{line};
5926          $self->{column_prev} = $self->{column};
5927          $self->{column}++;
5928          $self->{nc}
5929              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5930        } else {
5931          $self->{set_nc}->($self);
5932        }
5933      
5934            return  ($self->{ct}); # ATTLIST
5935            redo A;
5936          } elsif ($self->{nc} == -1) {
5937            ## XML5: No parse error.
5938            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5939            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5940            return  ($self->{ct});
5941            redo A;
5942          } else {
5943            ## XML5: Not defined yet.
5944            $self->{ca} = {name => chr ($self->{nc}), # attrdef
5945                           tokens => [],
5946                           line => $self->{line}, column => $self->{column}};
5947            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
5948            
5949        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5950          $self->{line_prev} = $self->{line};
5951          $self->{column_prev} = $self->{column};
5952          $self->{column}++;
5953          $self->{nc}
5954              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5955        } else {
5956          $self->{set_nc}->($self);
5957        }
5958      
5959            redo A;
5960          }
5961        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
5962          if ($is_space->{$self->{nc}}) {
5963            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
5964            
5965        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5966          $self->{line_prev} = $self->{line};
5967          $self->{column_prev} = $self->{column};
5968          $self->{column}++;
5969          $self->{nc}
5970              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5971        } else {
5972          $self->{set_nc}->($self);
5973        }
5974      
5975            redo A;
5976          } elsif ($self->{nc} == 0x003E) { # >
5977            ## XML5: Same as "anything else".
5978            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
5979            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5980            
5981        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5982          $self->{line_prev} = $self->{line};
5983          $self->{column_prev} = $self->{column};
5984          $self->{column}++;
5985          $self->{nc}
5986              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5987        } else {
5988          $self->{set_nc}->($self);
5989        }
5990      
5991            return  ($self->{ct}); # ATTLIST
5992            redo A;
5993          } elsif ($self->{nc} == 0x0028) { # (
5994            ## XML5: Same as "anything else".
5995            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
5996            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
5997            
5998        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5999          $self->{line_prev} = $self->{line};
6000          $self->{column_prev} = $self->{column};
6001          $self->{column}++;
6002          $self->{nc}
6003              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6004        } else {
6005          $self->{set_nc}->($self);
6006        }
6007      
6008            redo A;
6009          } elsif ($self->{nc} == -1) {
6010            ## XML5: No parse error.
6011            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6012            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6013            
6014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6015          $self->{line_prev} = $self->{line};
6016          $self->{column_prev} = $self->{column};
6017          $self->{column}++;
6018          $self->{nc}
6019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6020        } else {
6021          $self->{set_nc}->($self);
6022        }
6023      
6024            return  ($self->{ct}); # ATTLIST
6025            redo A;
6026          } else {
6027            ## XML5: Not defined yet.
6028            $self->{ca}->{name} .= chr $self->{nc};
6029            ## Stay in the state.
6030            
6031        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6032          $self->{line_prev} = $self->{line};
6033          $self->{column_prev} = $self->{column};
6034          $self->{column}++;
6035          $self->{nc}
6036              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6037        } else {
6038          $self->{set_nc}->($self);
6039        }
6040      
6041            redo A;
6042          }
6043        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6044          if ($is_space->{$self->{nc}}) {
6045            ## Stay in the state.
6046            
6047        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6048          $self->{line_prev} = $self->{line};
6049          $self->{column_prev} = $self->{column};
6050          $self->{column}++;
6051          $self->{nc}
6052              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6053        } else {
6054          $self->{set_nc}->($self);
6055        }
6056      
6057            redo A;
6058          } elsif ($self->{nc} == 0x003E) { # >
6059            ## XML5: Same as "anything else".
6060            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6061            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6062            
6063        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6064          $self->{line_prev} = $self->{line};
6065          $self->{column_prev} = $self->{column};
6066          $self->{column}++;
6067          $self->{nc}
6068              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6069        } else {
6070          $self->{set_nc}->($self);
6071        }
6072      
6073            return  ($self->{ct}); # ATTLIST
6074            redo A;
6075          } elsif ($self->{nc} == 0x0028) { # (
6076            ## XML5: Same as "anything else".
6077            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6078            
6079        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6080          $self->{line_prev} = $self->{line};
6081          $self->{column_prev} = $self->{column};
6082          $self->{column}++;
6083          $self->{nc}
6084              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6085        } else {
6086          $self->{set_nc}->($self);
6087        }
6088      
6089            redo A;
6090          } elsif ($self->{nc} == -1) {
6091            ## XML5: No parse error.
6092            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6093            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6094            
6095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6096          $self->{line_prev} = $self->{line};
6097          $self->{column_prev} = $self->{column};
6098          $self->{column}++;
6099          $self->{nc}
6100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6101        } else {
6102          $self->{set_nc}->($self);
6103        }
6104      
6105            return  ($self->{ct});
6106            redo A;
6107          } else {
6108            ## XML5: Not defined yet.
6109            $self->{ca}->{type} = chr $self->{nc};
6110            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6111            
6112        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6113          $self->{line_prev} = $self->{line};
6114          $self->{column_prev} = $self->{column};
6115          $self->{column}++;
6116          $self->{nc}
6117              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6118        } else {
6119          $self->{set_nc}->($self);
6120        }
6121      
6122            redo A;
6123          }
6124        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6125          if ($is_space->{$self->{nc}}) {
6126            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6127            
6128        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6129          $self->{line_prev} = $self->{line};
6130          $self->{column_prev} = $self->{column};
6131          $self->{column}++;
6132          $self->{nc}
6133              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6134        } else {
6135          $self->{set_nc}->($self);
6136        }
6137      
6138            redo A;
6139          } elsif ($self->{nc} == 0x0023) { # #
6140            ## XML5: Same as "anything else".
6141            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6142            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6143            
6144        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6145          $self->{line_prev} = $self->{line};
6146          $self->{column_prev} = $self->{column};
6147          $self->{column}++;
6148          $self->{nc}
6149              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6150        } else {
6151          $self->{set_nc}->($self);
6152        }
6153      
6154            redo A;
6155          } elsif ($self->{nc} == 0x0022) { # "
6156            ## XML5: Same as "anything else".
6157            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6158            $self->{ca}->{value} = '';
6159            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6160            
6161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6162          $self->{line_prev} = $self->{line};
6163          $self->{column_prev} = $self->{column};
6164          $self->{column}++;
6165          $self->{nc}
6166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6167        } else {
6168          $self->{set_nc}->($self);
6169        }
6170      
6171            redo A;
6172          } elsif ($self->{nc} == 0x0027) { # '
6173            ## XML5: Same as "anything else".
6174            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6175            $self->{ca}->{value} = '';
6176            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6177            
6178        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6179          $self->{line_prev} = $self->{line};
6180          $self->{column_prev} = $self->{column};
6181          $self->{column}++;
6182          $self->{nc}
6183              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6184        } else {
6185          $self->{set_nc}->($self);
6186        }
6187      
6188            redo A;
6189          } elsif ($self->{nc} == 0x003E) { # >
6190            ## XML5: Same as "anything else".
6191            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6192            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6193            
6194        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6195          $self->{line_prev} = $self->{line};
6196          $self->{column_prev} = $self->{column};
6197          $self->{column}++;
6198          $self->{nc}
6199              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6200        } else {
6201          $self->{set_nc}->($self);
6202        }
6203      
6204            return  ($self->{ct}); # ATTLIST
6205            redo A;
6206          } elsif ($self->{nc} == 0x0028) { # (
6207            ## XML5: Same as "anything else".
6208            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6209            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6210            
6211        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6212          $self->{line_prev} = $self->{line};
6213          $self->{column_prev} = $self->{column};
6214          $self->{column}++;
6215          $self->{nc}
6216              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6217        } else {
6218          $self->{set_nc}->($self);
6219        }
6220      
6221            redo A;
6222          } elsif ($self->{nc} == -1) {
6223            ## XML5: No parse error.
6224            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6225            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6226            
6227        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6228          $self->{line_prev} = $self->{line};
6229          $self->{column_prev} = $self->{column};
6230          $self->{column}++;
6231          $self->{nc}
6232              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6233        } else {
6234          $self->{set_nc}->($self);
6235        }
6236      
6237            return  ($self->{ct});
6238            redo A;
6239          } else {
6240            ## XML5: Not defined yet.
6241            $self->{ca}->{type} .= chr $self->{nc};
6242            ## Stay in the state.
6243            
6244        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6245          $self->{line_prev} = $self->{line};
6246          $self->{column_prev} = $self->{column};
6247          $self->{column}++;
6248          $self->{nc}
6249              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6250        } else {
6251          $self->{set_nc}->($self);
6252        }
6253      
6254            redo A;
6255          }
6256        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6257          if ($is_space->{$self->{nc}}) {
6258            ## Stay in the state.
6259            
6260        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6261          $self->{line_prev} = $self->{line};
6262          $self->{column_prev} = $self->{column};
6263          $self->{column}++;
6264          $self->{nc}
6265              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6266        } else {
6267          $self->{set_nc}->($self);
6268        }
6269      
6270            redo A;
6271          } elsif ($self->{nc} == 0x0028) { # (
6272            ## XML5: Same as "anything else".
6273            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6274            
6275        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6276          $self->{line_prev} = $self->{line};
6277          $self->{column_prev} = $self->{column};
6278          $self->{column}++;
6279          $self->{nc}
6280              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6281        } else {
6282          $self->{set_nc}->($self);
6283        }
6284      
6285            redo A;
6286          } elsif ($self->{nc} == 0x0023) { # #
6287            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6288            
6289        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6290          $self->{line_prev} = $self->{line};
6291          $self->{column_prev} = $self->{column};
6292          $self->{column}++;
6293          $self->{nc}
6294              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6295        } else {
6296          $self->{set_nc}->($self);
6297        }
6298      
6299            redo A;
6300          } elsif ($self->{nc} == 0x0022) { # "
6301            ## XML5: Same as "anything else".
6302            $self->{ca}->{value} = '';
6303            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6304            
6305        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6306          $self->{line_prev} = $self->{line};
6307          $self->{column_prev} = $self->{column};
6308          $self->{column}++;
6309          $self->{nc}
6310              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6311        } else {
6312          $self->{set_nc}->($self);
6313        }
6314      
6315            redo A;
6316          } elsif ($self->{nc} == 0x0027) { # '
6317            ## XML5: Same as "anything else".
6318            $self->{ca}->{value} = '';
6319            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6320            
6321        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6322          $self->{line_prev} = $self->{line};
6323          $self->{column_prev} = $self->{column};
6324          $self->{column}++;
6325          $self->{nc}
6326              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6327        } else {
6328          $self->{set_nc}->($self);
6329        }
6330      
6331            redo A;
6332          } elsif ($self->{nc} == 0x003E) { # >
6333            ## XML5: Same as "anything else".
6334            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6335            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6336            
6337        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6338          $self->{line_prev} = $self->{line};
6339          $self->{column_prev} = $self->{column};
6340          $self->{column}++;
6341          $self->{nc}
6342              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6343        } else {
6344          $self->{set_nc}->($self);
6345        }
6346      
6347            return  ($self->{ct}); # ATTLIST
6348            redo A;
6349          } elsif ($self->{nc} == -1) {
6350            ## XML5: No parse error.
6351            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6352            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6353            
6354        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6355          $self->{line_prev} = $self->{line};
6356          $self->{column_prev} = $self->{column};
6357          $self->{column}++;
6358          $self->{nc}
6359              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6360        } else {
6361          $self->{set_nc}->($self);
6362        }
6363      
6364            return  ($self->{ct});
6365            redo A;
6366          } else {
6367            ## XML5: Switch to the "DOCTYPE bogus comment state".
6368            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6369            $self->{ca}->{value} = '';
6370            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6371            ## Reconsume.
6372            redo A;
6373          }
6374        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6375          if ($is_space->{$self->{nc}}) {
6376            ## Stay in the state.
6377            
6378        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6379          $self->{line_prev} = $self->{line};
6380          $self->{column_prev} = $self->{column};
6381          $self->{column}++;
6382          $self->{nc}
6383              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6384        } else {
6385          $self->{set_nc}->($self);
6386        }
6387      
6388            redo A;
6389          } elsif ($self->{nc} == 0x007C) { # |
6390            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6391            ## Stay in the state.
6392            
6393        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6394          $self->{line_prev} = $self->{line};
6395          $self->{column_prev} = $self->{column};
6396          $self->{column}++;
6397          $self->{nc}
6398              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6399        } else {
6400          $self->{set_nc}->($self);
6401        }
6402      
6403            redo A;
6404          } elsif ($self->{nc} == 0x0029) { # )
6405            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6406            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6407            
6408        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6409          $self->{line_prev} = $self->{line};
6410          $self->{column_prev} = $self->{column};
6411          $self->{column}++;
6412          $self->{nc}
6413              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6414        } else {
6415          $self->{set_nc}->($self);
6416        }
6417      
6418            redo A;
6419          } elsif ($self->{nc} == 0x003E) { # >
6420            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6421            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6422            
6423        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6424          $self->{line_prev} = $self->{line};
6425          $self->{column_prev} = $self->{column};
6426          $self->{column}++;
6427          $self->{nc}
6428              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6429        } else {
6430          $self->{set_nc}->($self);
6431        }
6432      
6433            return  ($self->{ct}); # ATTLIST
6434            redo A;
6435          } elsif ($self->{nc} == -1) {
6436            ## XML5: No parse error.
6437            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6438            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6439            
6440        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6441          $self->{line_prev} = $self->{line};
6442          $self->{column_prev} = $self->{column};
6443          $self->{column}++;
6444          $self->{nc}
6445              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6446        } else {
6447          $self->{set_nc}->($self);
6448        }
6449      
6450            return  ($self->{ct});
6451            redo A;
6452          } else {
6453            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6454            $self->{state} = ALLOWED_TOKEN_STATE;
6455            
6456        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6457          $self->{line_prev} = $self->{line};
6458          $self->{column_prev} = $self->{column};
6459          $self->{column}++;
6460          $self->{nc}
6461              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6462        } else {
6463          $self->{set_nc}->($self);
6464        }
6465      
6466            redo A;
6467          }
6468        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6469          if ($is_space->{$self->{nc}}) {
6470            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6471            
6472        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6473          $self->{line_prev} = $self->{line};
6474          $self->{column_prev} = $self->{column};
6475          $self->{column}++;
6476          $self->{nc}
6477              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6478        } else {
6479          $self->{set_nc}->($self);
6480        }
6481      
6482            redo A;
6483          } elsif ($self->{nc} == 0x007C) { # |
6484            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6485            
6486        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6487          $self->{line_prev} = $self->{line};
6488          $self->{column_prev} = $self->{column};
6489          $self->{column}++;
6490          $self->{nc}
6491              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6492        } else {
6493          $self->{set_nc}->($self);
6494        }
6495      
6496            redo A;
6497          } elsif ($self->{nc} == 0x0029) { # )
6498            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6499            
6500        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6501          $self->{line_prev} = $self->{line};
6502          $self->{column_prev} = $self->{column};
6503          $self->{column}++;
6504          $self->{nc}
6505              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6506        } else {
6507          $self->{set_nc}->($self);
6508        }
6509      
6510            redo A;
6511          } elsif ($self->{nc} == 0x003E) { # >
6512            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6513            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6514            
6515        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6516          $self->{line_prev} = $self->{line};
6517          $self->{column_prev} = $self->{column};
6518          $self->{column}++;
6519          $self->{nc}
6520              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6521        } else {
6522          $self->{set_nc}->($self);
6523        }
6524      
6525            return  ($self->{ct}); # ATTLIST
6526            redo A;
6527          } elsif ($self->{nc} == -1) {
6528            ## XML5: No parse error.
6529            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6530            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6531            
6532        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6533          $self->{line_prev} = $self->{line};
6534          $self->{column_prev} = $self->{column};
6535          $self->{column}++;
6536          $self->{nc}
6537              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6538        } else {
6539          $self->{set_nc}->($self);
6540        }
6541      
6542            return  ($self->{ct});
6543            redo A;
6544          } else {
6545            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6546            ## Stay in the state.
6547            
6548        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6549          $self->{line_prev} = $self->{line};
6550          $self->{column_prev} = $self->{column};
6551          $self->{column}++;
6552          $self->{nc}
6553              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6554        } else {
6555          $self->{set_nc}->($self);
6556        }
6557      
6558            redo A;
6559          }
6560        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6561          if ($is_space->{$self->{nc}}) {
6562            ## Stay in the state.
6563            
6564        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6565          $self->{line_prev} = $self->{line};
6566          $self->{column_prev} = $self->{column};
6567          $self->{column}++;
6568          $self->{nc}
6569              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6570        } else {
6571          $self->{set_nc}->($self);
6572        }
6573      
6574            redo A;
6575          } elsif ($self->{nc} == 0x007C) { # |
6576            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6577            
6578        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6579          $self->{line_prev} = $self->{line};
6580          $self->{column_prev} = $self->{column};
6581          $self->{column}++;
6582          $self->{nc}
6583              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6584        } else {
6585          $self->{set_nc}->($self);
6586        }
6587      
6588            redo A;
6589          } elsif ($self->{nc} == 0x0029) { # )
6590            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6591            
6592        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6593          $self->{line_prev} = $self->{line};
6594          $self->{column_prev} = $self->{column};
6595          $self->{column}++;
6596          $self->{nc}
6597              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6598        } else {
6599          $self->{set_nc}->($self);
6600        }
6601      
6602            redo A;
6603          } elsif ($self->{nc} == 0x003E) { # >
6604            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6605            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6606            
6607        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6608          $self->{line_prev} = $self->{line};
6609          $self->{column_prev} = $self->{column};
6610          $self->{column}++;
6611          $self->{nc}
6612              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6613        } else {
6614          $self->{set_nc}->($self);
6615        }
6616      
6617            return  ($self->{ct}); # ATTLIST
6618            redo A;
6619          } elsif ($self->{nc} == -1) {
6620            ## XML5: No parse error.
6621            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6622            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6623            
6624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6625          $self->{line_prev} = $self->{line};
6626          $self->{column_prev} = $self->{column};
6627          $self->{column}++;
6628          $self->{nc}
6629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6630        } else {
6631          $self->{set_nc}->($self);
6632        }
6633      
6634            return  ($self->{ct});
6635            redo A;
6636          } else {
6637            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
6638                            line => $self->{line_prev},
6639                            column => $self->{column_prev});
6640            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
6641            $self->{state} = ALLOWED_TOKEN_STATE;
6642            
6643        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6644          $self->{line_prev} = $self->{line};
6645          $self->{column_prev} = $self->{column};
6646          $self->{column}++;
6647          $self->{nc}
6648              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6649        } else {
6650          $self->{set_nc}->($self);
6651        }
6652      
6653            redo A;
6654          }
6655        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
6656          if ($is_space->{$self->{nc}}) {
6657            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
6658            
6659        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6660          $self->{line_prev} = $self->{line};
6661          $self->{column_prev} = $self->{column};
6662          $self->{column}++;
6663          $self->{nc}
6664              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6665        } else {
6666          $self->{set_nc}->($self);
6667        }
6668      
6669            redo A;
6670          } elsif ($self->{nc} == 0x0023) { # #
6671            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6672            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6673            
6674        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6675          $self->{line_prev} = $self->{line};
6676          $self->{column_prev} = $self->{column};
6677          $self->{column}++;
6678          $self->{nc}
6679              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6680        } else {
6681          $self->{set_nc}->($self);
6682        }
6683      
6684            redo A;
6685          } elsif ($self->{nc} == 0x0022) { # "
6686            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6687            $self->{ca}->{value} = '';
6688            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6689            
6690        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6691          $self->{line_prev} = $self->{line};
6692          $self->{column_prev} = $self->{column};
6693          $self->{column}++;
6694          $self->{nc}
6695              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6696        } else {
6697          $self->{set_nc}->($self);
6698        }
6699      
6700            redo A;
6701          } elsif ($self->{nc} == 0x0027) { # '
6702            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6703            $self->{ca}->{value} = '';
6704            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6705            
6706        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6707          $self->{line_prev} = $self->{line};
6708          $self->{column_prev} = $self->{column};
6709          $self->{column}++;
6710          $self->{nc}
6711              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6712        } else {
6713          $self->{set_nc}->($self);
6714        }
6715      
6716            redo A;
6717          } elsif ($self->{nc} == 0x003E) { # >
6718            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6719            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6720            
6721        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6722          $self->{line_prev} = $self->{line};
6723          $self->{column_prev} = $self->{column};
6724          $self->{column}++;
6725          $self->{nc}
6726              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6727        } else {
6728          $self->{set_nc}->($self);
6729        }
6730      
6731            return  ($self->{ct}); # ATTLIST
6732            redo A;
6733          } elsif ($self->{nc} == -1) {
6734            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6735            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6736            
6737        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6738          $self->{line_prev} = $self->{line};
6739          $self->{column_prev} = $self->{column};
6740          $self->{column}++;
6741          $self->{nc}
6742              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6743        } else {
6744          $self->{set_nc}->($self);
6745        }
6746      
6747            return  ($self->{ct});
6748            redo A;
6749          } else {
6750            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6751            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6752          ## Reconsume.          ## Reconsume.
6753          redo A;          redo A;
6754        }        }
6755        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
6756          if ($is_space->{$self->{nc}}) {
6757            ## Stay in the state.
6758            
6759        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6760          $self->{line_prev} = $self->{line};
6761          $self->{column_prev} = $self->{column};
6762          $self->{column}++;
6763          $self->{nc}
6764              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6765        } else {
6766          $self->{set_nc}->($self);
6767        }
6768      
6769            redo A;
6770          } elsif ($self->{nc} == 0x0023) { # #
6771            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6772            
6773        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6774          $self->{line_prev} = $self->{line};
6775          $self->{column_prev} = $self->{column};
6776          $self->{column}++;
6777          $self->{nc}
6778              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6779        } else {
6780          $self->{set_nc}->($self);
6781        }
6782      
6783            redo A;
6784          } elsif ($self->{nc} == 0x0022) { # "
6785            $self->{ca}->{value} = '';
6786            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6787            
6788        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6789          $self->{line_prev} = $self->{line};
6790          $self->{column_prev} = $self->{column};
6791          $self->{column}++;
6792          $self->{nc}
6793              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6794        } else {
6795          $self->{set_nc}->($self);
6796        }
6797      
6798            redo A;
6799          } elsif ($self->{nc} == 0x0027) { # '
6800            $self->{ca}->{value} = '';
6801            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6802            
6803        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6804          $self->{line_prev} = $self->{line};
6805          $self->{column_prev} = $self->{column};
6806          $self->{column}++;
6807          $self->{nc}
6808              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6809        } else {
6810          $self->{set_nc}->($self);
6811        }
6812      
6813            redo A;
6814          } elsif ($self->{nc} == 0x003E) { # >
6815            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6816            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6817            
6818        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6819          $self->{line_prev} = $self->{line};
6820          $self->{column_prev} = $self->{column};
6821          $self->{column}++;
6822          $self->{nc}
6823              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6824        } else {
6825          $self->{set_nc}->($self);
6826        }
6827      
6828            return  ($self->{ct}); # ATTLIST
6829            redo A;
6830          } elsif ($self->{nc} == -1) {
6831            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6832            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6833            
6834        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6835          $self->{line_prev} = $self->{line};
6836          $self->{column_prev} = $self->{column};
6837          $self->{column}++;
6838          $self->{nc}
6839              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6840        } else {
6841          $self->{set_nc}->($self);
6842        }
6843      
6844            return  ($self->{ct});
6845            redo A;
6846          } else {
6847            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6848            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6849            ## Reconsume.
6850            redo A;
6851          }
6852        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
6853          if ($is_space->{$self->{nc}}) {
6854            ## XML5: No parse error.
6855            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
6856            $self->{state} = BOGUS_COMMENT_STATE;
6857            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6858            ## Reconsume.
6859            redo A;
6860          } elsif ($self->{nc} == 0x0022) { # "
6861            ## XML5: Same as "anything else".
6862            $self->{ca}->{value} = '';
6863            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6864            
6865        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6866          $self->{line_prev} = $self->{line};
6867          $self->{column_prev} = $self->{column};
6868          $self->{column}++;
6869          $self->{nc}
6870              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6871        } else {
6872          $self->{set_nc}->($self);
6873        }
6874      
6875            redo A;
6876          } elsif ($self->{nc} == 0x0027) { # '
6877            ## XML5: Same as "anything else".
6878            $self->{ca}->{value} = '';
6879            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6880            
6881        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6882          $self->{line_prev} = $self->{line};
6883          $self->{column_prev} = $self->{column};
6884          $self->{column}++;
6885          $self->{nc}
6886              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6887        } else {
6888          $self->{set_nc}->($self);
6889        }
6890      
6891            redo A;
6892          } elsif ($self->{nc} == 0x003E) { # >
6893            ## XML5: Same as "anything else".
6894            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6895            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6896            
6897        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6898          $self->{line_prev} = $self->{line};
6899          $self->{column_prev} = $self->{column};
6900          $self->{column}++;
6901          $self->{nc}
6902              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6903        } else {
6904          $self->{set_nc}->($self);
6905        }
6906      
6907            return  ($self->{ct}); # ATTLIST
6908            redo A;
6909          } elsif ($self->{nc} == -1) {
6910            ## XML5: No parse error.
6911            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6912            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6913            
6914        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6915          $self->{line_prev} = $self->{line};
6916          $self->{column_prev} = $self->{column};
6917          $self->{column}++;
6918          $self->{nc}
6919              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6920        } else {
6921          $self->{set_nc}->($self);
6922        }
6923      
6924            return  ($self->{ct});
6925            redo A;
6926          } else {
6927            $self->{ca}->{default} = chr $self->{nc};
6928            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
6929            
6930        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6931          $self->{line_prev} = $self->{line};
6932          $self->{column_prev} = $self->{column};
6933          $self->{column}++;
6934          $self->{nc}
6935              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6936        } else {
6937          $self->{set_nc}->($self);
6938        }
6939      
6940            redo A;
6941          }
6942        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
6943          if ($is_space->{$self->{nc}}) {
6944            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
6945            
6946        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6947          $self->{line_prev} = $self->{line};
6948          $self->{column_prev} = $self->{column};
6949          $self->{column}++;
6950          $self->{nc}
6951              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6952        } else {
6953          $self->{set_nc}->($self);
6954        }
6955      
6956            redo A;
6957          } elsif ($self->{nc} == 0x0022) { # "
6958            ## XML5: Same as "anything else".
6959            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6960            $self->{ca}->{value} = '';
6961            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6962            
6963        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6964          $self->{line_prev} = $self->{line};
6965          $self->{column_prev} = $self->{column};
6966          $self->{column}++;
6967          $self->{nc}
6968              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6969        } else {
6970          $self->{set_nc}->($self);
6971        }
6972      
6973            redo A;
6974          } elsif ($self->{nc} == 0x0027) { # '
6975            ## XML5: Same as "anything else".
6976            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6977            $self->{ca}->{value} = '';
6978            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6979            
6980        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6981          $self->{line_prev} = $self->{line};
6982          $self->{column_prev} = $self->{column};
6983          $self->{column}++;
6984          $self->{nc}
6985              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6986        } else {
6987          $self->{set_nc}->($self);
6988        }
6989      
6990            redo A;
6991          } elsif ($self->{nc} == 0x003E) { # >
6992            ## XML5: Same as "anything else".
6993            push @{$self->{ct}->{attrdefs}}, $self->{ca};
6994            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6995            
6996        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6997          $self->{line_prev} = $self->{line};
6998          $self->{column_prev} = $self->{column};
6999          $self->{column}++;
7000          $self->{nc}
7001              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7002        } else {
7003          $self->{set_nc}->($self);
7004        }
7005      
7006            return  ($self->{ct}); # ATTLIST
7007            redo A;
7008          } elsif ($self->{nc} == -1) {
7009            ## XML5: No parse error.
7010            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7011            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7012            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7013            
7014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7015          $self->{line_prev} = $self->{line};
7016          $self->{column_prev} = $self->{column};
7017          $self->{column}++;
7018          $self->{nc}
7019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7020        } else {
7021          $self->{set_nc}->($self);
7022        }
7023      
7024            return  ($self->{ct});
7025            redo A;
7026          } else {
7027            $self->{ca}->{default} .= chr $self->{nc};
7028            ## Stay in the state.
7029            
7030        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7031          $self->{line_prev} = $self->{line};
7032          $self->{column_prev} = $self->{column};
7033          $self->{column}++;
7034          $self->{nc}
7035              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7036        } else {
7037          $self->{set_nc}->($self);
7038        }
7039      
7040            redo A;
7041          }
7042        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7043          if ($is_space->{$self->{nc}}) {
7044            ## Stay in the state.
7045            
7046        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7047          $self->{line_prev} = $self->{line};
7048          $self->{column_prev} = $self->{column};
7049          $self->{column}++;
7050          $self->{nc}
7051              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7052        } else {
7053          $self->{set_nc}->($self);
7054        }
7055      
7056            redo A;
7057          } elsif ($self->{nc} == 0x0022) { # "
7058            $self->{ca}->{value} = '';
7059            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7060            
7061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7062          $self->{line_prev} = $self->{line};
7063          $self->{column_prev} = $self->{column};
7064          $self->{column}++;
7065          $self->{nc}
7066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7067        } else {
7068          $self->{set_nc}->($self);
7069        }
7070      
7071            redo A;
7072          } elsif ($self->{nc} == 0x0027) { # '
7073            $self->{ca}->{value} = '';
7074            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7075            
7076        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7077          $self->{line_prev} = $self->{line};
7078          $self->{column_prev} = $self->{column};
7079          $self->{column}++;
7080          $self->{nc}
7081              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7082        } else {
7083          $self->{set_nc}->($self);
7084        }
7085      
7086            redo A;
7087          } elsif ($self->{nc} == 0x003E) { # >
7088            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7089            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7090            
7091        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7092          $self->{line_prev} = $self->{line};
7093          $self->{column_prev} = $self->{column};
7094          $self->{column}++;
7095          $self->{nc}
7096              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7097        } else {
7098          $self->{set_nc}->($self);
7099        }
7100      
7101            return  ($self->{ct}); # ATTLIST
7102            redo A;
7103          } elsif ($self->{nc} == -1) {
7104            ## XML5: No parse error.
7105            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7106            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7107            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7108            
7109        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7110          $self->{line_prev} = $self->{line};
7111          $self->{column_prev} = $self->{column};
7112          $self->{column}++;
7113          $self->{nc}
7114              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7115        } else {
7116          $self->{set_nc}->($self);
7117        }
7118      
7119            return  ($self->{ct});
7120            redo A;
7121          } else {
7122            ## XML5: Not defined yet.
7123            if ($self->{ca}->{default} eq 'FIXED') {
7124              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7125            } else {
7126              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7127              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7128            }
7129            ## Reconsume.
7130            redo A;
7131          }
7132        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7133          if ($is_space->{$self->{nc}} or
7134              $self->{nc} == -1 or
7135              $self->{nc} == 0x003E) { # >
7136            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7137            ## Reconsume.
7138            redo A;
7139          } else {
7140            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7141            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7142            ## Reconsume.
7143            redo A;
7144          }      
7145      } else {      } else {
7146        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
7147      }      }
# Line 4148  sub _get_next_token ($) { Line 7152  sub _get_next_token ($) {
7152    
7153  1;  1;
7154  ## $Date$  ## $Date$
7155                                    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.15

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24