/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Tue Oct 14 11:46:57 2008 UTC revision 1.31 by wakaba, Sat Sep 5 09:26:55 2009 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 77  sub COMMENT_START_STATE () { 14 } Line 105  sub COMMENT_START_STATE () { 14 }
105  sub COMMENT_START_DASH_STATE () { 15 }  sub COMMENT_START_DASH_STATE () { 15 }
106  sub COMMENT_STATE () { 16 }  sub COMMENT_STATE () { 16 }
107  sub COMMENT_END_STATE () { 17 }  sub COMMENT_END_STATE () { 17 }
108    sub COMMENT_END_BANG_STATE () { 102 } ## LAST
109  sub COMMENT_END_DASH_STATE () { 18 }  sub COMMENT_END_DASH_STATE () { 18 }
110  sub BOGUS_COMMENT_STATE () { 19 }  sub BOGUS_COMMENT_STATE () { 19 }
111  sub DOCTYPE_STATE () { 20 }  sub DOCTYPE_STATE () { 20 }
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 143  sub HEXREF_HEX_STATE () { 48 }
143  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
144  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
145    
146    ## XML-only states
147    sub PI_STATE () { 51 }
148    sub PI_TARGET_STATE () { 52 }
149    sub PI_TARGET_AFTER_STATE () { 53 }
150    sub PI_DATA_STATE () { 54 }
151    sub PI_AFTER_STATE () { 55 }
152    sub PI_DATA_AFTER_STATE () { 56 }
153    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
154    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
155    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
156    sub DOCTYPE_TAG_STATE () { 60 }
157    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
158    sub MD_ATTLIST_STATE () { 62 }
159    sub MD_E_STATE () { 63 }
160    sub MD_ELEMENT_STATE () { 64 }
161    sub MD_ENTITY_STATE () { 65 }
162    sub MD_NOTATION_STATE () { 66 }
163    sub DOCTYPE_MD_STATE () { 67 }
164    sub BEFORE_MD_NAME_STATE () { 68 }
165    sub MD_NAME_STATE () { 69 }
166    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
167    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
171    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
172    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
173    sub ALLOWED_TOKEN_STATE () { 77 }
174    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
175    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
176    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
179    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
180    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
181    sub BEFORE_NDATA_STATE () { 85 }
182    sub NDATA_STATE () { 86 }
183    sub AFTER_NDATA_STATE () { 87 }
184    sub BEFORE_NOTATION_NAME_STATE () { 88 }
185    sub NOTATION_NAME_STATE () { 89 }
186    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
187    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
188    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
189    sub AFTER_ELEMENT_NAME_STATE () { 93 }
190    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
191    sub CONTENT_KEYWORD_STATE () { 95 }
192    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
193    sub CM_ELEMENT_NAME_STATE () { 97 }
194    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
195    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
196    sub AFTER_MD_DEF_STATE () { 100 }
197    sub BOGUS_MD_STATE () { 101 }
198    
199  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
200  ## list and descriptions)  ## list and descriptions)
201    
# Line 178  sub _initialize_tokenizer ($) { Line 260  sub _initialize_tokenizer ($) {
260    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
261    
262    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
263    #$self->{s_kwd}; # state keyword - initialized when used    $self->{s_kwd} = ''; # Data state keyword
264      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
265    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
266    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
267    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 208  sub _initialize_tokenizer ($) { Line 291  sub _initialize_tokenizer ($) {
291    
292  ## A token has:  ## A token has:
293  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
294  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
295  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
296  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
297    ##   ->{target} (PI_TOKEN)
298  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
299  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
300  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 218  sub _initialize_tokenizer ($) { Line 302  sub _initialize_tokenizer ($) {
302  ##        ->{name}  ##        ->{name}
303  ##        ->{value}  ##        ->{value}
304  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
305  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
306    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
307    ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
308    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
309    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
310    
311  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
312  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
313  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 238  my $is_space = { Line 327  my $is_space = {
327    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
328    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
329    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
330    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
331    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
332    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
333  };  };
# Line 362  sub _get_next_token ($) { Line 451  sub _get_next_token ($) {
451          }          }
452        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
453          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
454            $self->{s_kwd} .= '-';            if ($self->{s_kwd} eq '<!-') {
             
           if ($self->{s_kwd} eq '<!--') {  
455                            
456              $self->{escape} = 1; # unless $self->{escape};              $self->{escape} = 1; # unless $self->{escape};
457              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
458              #              #
459            } elsif ($self->{s_kwd} eq '---') {            } elsif ($self->{s_kwd} eq '-') {
460                            
461              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
462              #              #
463              } elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') {
464                
465                $self->{s_kwd} .= '-';
466                #
467            } else {            } else {
468                            
469                $self->{s_kwd} = '-';
470              #              #
471            }            }
472          }          }
# Line 420  sub _get_next_token ($) { Line 512  sub _get_next_token ($) {
512            if ($self->{s_kwd} eq '--') {            if ($self->{s_kwd} eq '--') {
513                            
514              delete $self->{escape};              delete $self->{escape};
515                #
516            } else {            } else {
517                            
518                #
519            }            }
520            } elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') {
521              
522              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unmatched mse', ## TODO: type
523                              line => $self->{line_prev},
524                              column => $self->{column_prev} - 1);
525              #
526          } else {          } else {
527                        
528              #
529          }          }
530                    
531          $self->{s_kwd} = '';          $self->{s_kwd} = '';
532          #          #
533          } elsif ($self->{nc} == 0x005D) { # ]
534            if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') {
535              
536              $self->{s_kwd} .= ']';
537            } elsif ($self->{s_kwd} eq ']]') {
538              
539              #
540            } else {
541              
542              $self->{s_kwd} = '';
543            }
544            #
545        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
546                    
547          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 446  sub _get_next_token ($) { Line 559  sub _get_next_token ($) {
559                     data => chr $self->{nc},                     data => chr $self->{nc},
560                     line => $self->{line}, column => $self->{column},                     line => $self->{line}, column => $self->{column},
561                    };                    };
562        if ($self->{read_until}->($token->{data}, q[-!<>&],        if ($self->{read_until}->($token->{data}, q{-!<>&\]},
563                                  length $token->{data})) {                                  length $token->{data})) {
564          $self->{s_kwd} = '';          $self->{s_kwd} = '';
565        }        }
566    
567        ## Stay in the data state.        ## Stay in the data state.
568        if ($self->{content_model} == PCDATA_CONTENT_MODEL) {        if (not $self->{is_xml} and
569              $self->{content_model} == PCDATA_CONTENT_MODEL) {
570                    
571          $self->{state} = PCDATA_STATE;          $self->{state} = PCDATA_STATE;
572        } else {        } else {
# Line 473  sub _get_next_token ($) { Line 587  sub _get_next_token ($) {
587        return  ($token);        return  ($token);
588        redo A;        redo A;
589      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
590          ## XML5: "tag state".
591    
592        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
593          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
594                        
# Line 491  sub _get_next_token ($) { Line 607  sub _get_next_token ($) {
607            redo A;            redo A;
608          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
609                        
610            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
611            #            #
612          } else {          } else {
613                        
614              $self->{s_kwd} = '';
615            #            #
616          }          }
617    
# Line 583  sub _get_next_token ($) { Line 700  sub _get_next_token ($) {
700                            line => $self->{line_prev},                            line => $self->{line_prev},
701                            column => $self->{column_prev});                            column => $self->{column_prev});
702            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
703              $self->{s_kwd} = '';
704                        
705      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
706        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 602  sub _get_next_token ($) { Line 720  sub _get_next_token ($) {
720    
721            redo A;            redo A;
722          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
723                        if ($self->{is_xml}) {
724            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
725                            line => $self->{line_prev},              $self->{state} = PI_STATE;
726                            column => $self->{column_prev});              
727            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
728            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
729                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
730                                      column => $self->{column_prev},        $self->{column}++;
731                                     };        $self->{nc}
732            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
733            redo A;      } else {
734          } else {        $self->{set_nc}->($self);
735        }
736      
737                redo A;
738              } else {
739                
740                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
741                                line => $self->{line_prev},
742                                column => $self->{column_prev});
743                $self->{state} = BOGUS_COMMENT_STATE;
744                $self->{ct} = {type => COMMENT_TOKEN, data => '',
745                               line => $self->{line_prev},
746                               column => $self->{column_prev},
747                              };
748                ## $self->{nc} is intentionally left as is
749                redo A;
750              }
751            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
752                        
753            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
754                            line => $self->{line_prev},                            line => $self->{line_prev},
755                            column => $self->{column_prev});                            column => $self->{column_prev});
756            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
757              $self->{s_kwd} = '';
758            ## reconsume            ## reconsume
759    
760            return  ({type => CHARACTER_TOKEN, data => '<',            return  ({type => CHARACTER_TOKEN, data => '<',
# Line 627  sub _get_next_token ($) { Line 763  sub _get_next_token ($) {
763                     });                     });
764    
765            redo A;            redo A;
766            } else {
767              ## XML5: "<:" is a parse error.
768              
769              $self->{ct} = {type => START_TAG_TOKEN,
770                                        tag_name => chr ($self->{nc}),
771                                        line => $self->{line_prev},
772                                        column => $self->{column_prev}};
773              $self->{state} = TAG_NAME_STATE;
774              
775        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
776          $self->{line_prev} = $self->{line};
777          $self->{column_prev} = $self->{column};
778          $self->{column}++;
779          $self->{nc}
780              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
781        } else {
782          $self->{set_nc}->($self);
783        }
784      
785              redo A;
786          }          }
787        } else {        } else {
788          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 635  sub _get_next_token ($) { Line 791  sub _get_next_token ($) {
791        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
792        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
793    
794          ## XML5: "end tag state".
795    
796        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
797        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
798          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
799            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
800            $self->{s_kwd} = '';            $self->{kwd} = '';
801            ## Reconsume.            ## Reconsume.
802            redo A;            redo A;
803          } else {          } else {
# Line 647  sub _get_next_token ($) { Line 805  sub _get_next_token ($) {
805            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
806                        
807            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
808              $self->{s_kwd} = '';
809            ## Reconsume.            ## Reconsume.
810            return  ({type => CHARACTER_TOKEN, data => '</',            return  ({type => CHARACTER_TOKEN, data => '</',
811                      line => $l, column => $c,                      line => $l, column => $c,
# Line 695  sub _get_next_token ($) { Line 854  sub _get_next_token ($) {
854        
855          redo A;          redo A;
856        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
857          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
858                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
859                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
860          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
861                    $self->{s_kwd} = '';
862            if ($self->{is_xml}) {
863              
864              ## XML5: No parse error.
865              
866              ## NOTE: This parser raises a parse error, since it supports
867              ## XML1, not XML5.
868    
869              ## NOTE: A short end tag token.
870              my $ct = {type => END_TAG_TOKEN,
871                        tag_name => '',
872                        line => $self->{line_prev},
873                        column => $self->{column_prev} - 1,
874                       };
875              
876        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
877          $self->{line_prev} = $self->{line};
878          $self->{column_prev} = $self->{column};
879          $self->{column}++;
880          $self->{nc}
881              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
882        } else {
883          $self->{set_nc}->($self);
884        }
885      
886              return  ($ct);
887            } else {
888              
889              
890      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
891        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
892        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 711  sub _get_next_token ($) { Line 897  sub _get_next_token ($) {
897        $self->{set_nc}->($self);        $self->{set_nc}->($self);
898      }      }
899        
900            }
901          redo A;          redo A;
902        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
903                    
904          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare etago');
905            $self->{s_kwd} = '';
906          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
907          # reconsume          # reconsume
908    
# Line 723  sub _get_next_token ($) { Line 911  sub _get_next_token ($) {
911                   });                   });
912    
913          redo A;          redo A;
914        } else {        } elsif (not $self->{is_xml} or
915                   $is_space->{$self->{nc}}) {
916                    
917          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
918                            line => $self->{line_prev}, # "<" of "</"
919                            column => $self->{column_prev} - 1);
920          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
921          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
922                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 738  sub _get_next_token ($) { Line 929  sub _get_next_token ($) {
929          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
930          ## "bogus comment state" entry.          ## "bogus comment state" entry.
931          redo A;          redo A;
932          } else {
933            ## XML5: "</:" is a parse error.
934            
935            $self->{ct} = {type => END_TAG_TOKEN,
936                           tag_name => chr ($self->{nc}),
937                           line => $l, column => $c};
938            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
939            
940        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
941          $self->{line_prev} = $self->{line};
942          $self->{column_prev} = $self->{column};
943          $self->{column}++;
944          $self->{nc}
945              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
946        } else {
947          $self->{set_nc}->($self);
948        }
949      
950            redo A;
951        }        }
952      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
953        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
954        if (length $ch) {        if (length $ch) {
955          my $CH = $ch;          my $CH = $ch;
956          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 748  sub _get_next_token ($) { Line 958  sub _get_next_token ($) {
958          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
959                        
960            ## Stay in the state.            ## Stay in the state.
961            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
962                        
963      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
964        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 764  sub _get_next_token ($) { Line 974  sub _get_next_token ($) {
974          } else {          } else {
975                        
976            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
977              $self->{s_kwd} = '';
978            ## Reconsume.            ## Reconsume.
979            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
980                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
981                      line => $self->{line_prev},                      line => $self->{line_prev},
982                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
983                     });                     });
984            redo A;            redo A;
985          }          }
# Line 782  sub _get_next_token ($) { Line 993  sub _get_next_token ($) {
993                        
994            ## Reconsume.            ## Reconsume.
995            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
996              $self->{s_kwd} = '';
997            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
998                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
999                      line => $self->{line_prev},                      line => $self->{line_prev},
1000                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
1001                     });                     });
1002            redo A;            redo A;
1003          } else {          } else {
# Line 794  sub _get_next_token ($) { Line 1006  sub _get_next_token ($) {
1006                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
1007                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
1008                   line => $self->{line_prev},                   line => $self->{line_prev},
1009                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
1010            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
1011            ## Reconsume.            ## Reconsume.
1012            redo A;            redo A;
# Line 833  sub _get_next_token ($) { Line 1045  sub _get_next_token ($) {
1045            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1046          }          }
1047          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1048            $self->{s_kwd} = '';
1049                    
1050      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1051        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 885  sub _get_next_token ($) { Line 1098  sub _get_next_token ($) {
1098            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1099          }          }
1100          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1101            $self->{s_kwd} = '';
1102          # reconsume          # reconsume
1103    
1104          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 924  sub _get_next_token ($) { Line 1138  sub _get_next_token ($) {
1138          redo A;          redo A;
1139        }        }
1140      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1141          ## XML5: "Tag attribute name before state".
1142    
1143        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1144                    
1145          ## Stay in the state          ## Stay in the state
# Line 955  sub _get_next_token ($) { Line 1171  sub _get_next_token ($) {
1171            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1172          }          }
1173          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1174            $self->{s_kwd} = '';
1175                    
1176      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1177        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1022  sub _get_next_token ($) { Line 1239  sub _get_next_token ($) {
1239            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1240          }          }
1241          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1242            $self->{s_kwd} = '';
1243          # reconsume          # reconsume
1244    
1245          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
# Line 1031  sub _get_next_token ($) { Line 1249  sub _get_next_token ($) {
1249          if ({          if ({
1250               0x0022 => 1, # "               0x0022 => 1, # "
1251               0x0027 => 1, # '               0x0027 => 1, # '
1252                 0x003C => 1, # <
1253               0x003D => 1, # =               0x003D => 1, # =
1254              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1255                        
1256              ## XML5: Not a parse error.
1257            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1258          } else {          } else {
1259                        
1260              ## XML5: ":" raises a parse error and is ignored.
1261          }          }
1262          $self->{ca}          $self->{ca}
1263              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1057  sub _get_next_token ($) { Line 1278  sub _get_next_token ($) {
1278          redo A;          redo A;
1279        }        }
1280      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1281          ## XML5: "Tag attribute name state".
1282    
1283        my $before_leave = sub {        my $before_leave = sub {
1284          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1285              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1067  sub _get_next_token ($) { Line 1290  sub _get_next_token ($) {
1290                        
1291            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1292              = $self->{ca};              = $self->{ca};
1293              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1294          }          }
1295        }; # $before_leave        }; # $before_leave
1296    
# Line 1103  sub _get_next_token ($) { Line 1327  sub _get_next_token ($) {
1327        
1328          redo A;          redo A;
1329        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1330            if ($self->{is_xml}) {
1331              
1332              ## XML5: Not a parse error.
1333              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1334            } else {
1335              
1336            }
1337    
1338          $before_leave->();          $before_leave->();
1339          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1340                        
# Line 1117  sub _get_next_token ($) { Line 1349  sub _get_next_token ($) {
1349            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1350          }          }
1351          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1352            $self->{s_kwd} = '';
1353                    
1354      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1355        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1151  sub _get_next_token ($) { Line 1384  sub _get_next_token ($) {
1384        
1385          redo A;          redo A;
1386        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1387            if ($self->{is_xml}) {
1388              
1389              ## XML5: Not a parse error.
1390              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1391            } else {
1392              
1393            }
1394                    
1395          $before_leave->();          $before_leave->();
1396          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1185  sub _get_next_token ($) { Line 1425  sub _get_next_token ($) {
1425            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1426          }          }
1427          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1428            $self->{s_kwd} = '';
1429          # reconsume          # reconsume
1430    
1431          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
1432    
1433          redo A;          redo A;
1434        } else {        } else {
1435          if ($self->{nc} == 0x0022 or # "          if ({
1436              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1437                 0x0027 => 1, # '
1438                 0x003C => 1, # <
1439                }->{$self->{nc}}) {
1440                        
1441              ## XML5: Not a parse error.
1442            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1443          } else {          } else {
1444                        
# Line 1214  sub _get_next_token ($) { Line 1459  sub _get_next_token ($) {
1459          redo A;          redo A;
1460        }        }
1461      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1462          ## XML5: "Tag attribute name after state".
1463          
1464        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1465                    
1466          ## Stay in the state          ## Stay in the state
# Line 1245  sub _get_next_token ($) { Line 1492  sub _get_next_token ($) {
1492        
1493          redo A;          redo A;
1494        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1495            if ($self->{is_xml}) {
1496              
1497              ## XML5: Not a parse error.
1498              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1499            } else {
1500              
1501            }
1502    
1503          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1504                        
1505            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1261  sub _get_next_token ($) { Line 1516  sub _get_next_token ($) {
1516            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1517          }          }
1518          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1519            $self->{s_kwd} = '';
1520                    
1521      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1522        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1297  sub _get_next_token ($) { Line 1553  sub _get_next_token ($) {
1553        
1554          redo A;          redo A;
1555        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1556            if ($self->{is_xml}) {
1557              
1558              ## XML5: Not a parse error.
1559              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1560            } else {
1561              
1562            }
1563                    
1564          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1565                    
# Line 1328  sub _get_next_token ($) { Line 1591  sub _get_next_token ($) {
1591          } else {          } else {
1592            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1593          }          }
1594            $self->{s_kwd} = '';
1595          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1596          # reconsume          # reconsume
1597    
# Line 1335  sub _get_next_token ($) { Line 1599  sub _get_next_token ($) {
1599    
1600          redo A;          redo A;
1601        } else {        } else {
1602          if ($self->{nc} == 0x0022 or # "          if ($self->{is_xml}) {
             $self->{nc} == 0x0027) { # '  
1603                        
1604              ## XML5: Not a parse error.
1605              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1606            } else {
1607              
1608            }
1609    
1610            if ({
1611                 0x0022 => 1, # "
1612                 0x0027 => 1, # '
1613                 0x003C => 1, # <
1614                }->{$self->{nc}}) {
1615              
1616              ## XML5: Not a parse error.
1617            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1618          } else {          } else {
1619                        
# Line 1361  sub _get_next_token ($) { Line 1637  sub _get_next_token ($) {
1637          redo A;                  redo A;        
1638        }        }
1639      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1640          ## XML5: "Tag attribute value before state".
1641    
1642        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1643                    
1644          ## Stay in the state          ## Stay in the state
# Line 1429  sub _get_next_token ($) { Line 1707  sub _get_next_token ($) {
1707            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1708          }          }
1709          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1710            $self->{s_kwd} = '';
1711                    
1712      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1713        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1462  sub _get_next_token ($) { Line 1741  sub _get_next_token ($) {
1741            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1742          }          }
1743          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1744            $self->{s_kwd} = '';
1745          ## reconsume          ## reconsume
1746    
1747          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
1748    
1749          redo A;          redo A;
1750        } else {        } else {
1751          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, <
1752                        
1753              ## XML5: Not a parse error.
1754            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1755            } elsif ($self->{is_xml}) {
1756              
1757              ## XML5: No parse error.
1758              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1759          } else {          } else {
1760                        
1761          }          }
# Line 1490  sub _get_next_token ($) { Line 1775  sub _get_next_token ($) {
1775          redo A;          redo A;
1776        }        }
1777      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1778          ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1779          ## ATTLIST attribute value double quoted state".
1780          
1781        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1782                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1783          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1784              ## XML5: "DOCTYPE ATTLIST name after state".
1785              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1786              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1787            } else {
1788              
1789              ## XML5: "Tag attribute name before state".
1790              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1791            }
1792                    
1793      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1794        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1507  sub _get_next_token ($) { Line 1803  sub _get_next_token ($) {
1803          redo A;          redo A;
1804        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1805                    
1806            ## XML5: Not defined yet.
1807    
1808          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1809          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1810          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1526  sub _get_next_token ($) { Line 1824  sub _get_next_token ($) {
1824      }      }
1825        
1826          redo A;          redo A;
1827          } elsif ($self->{is_xml} and
1828                   $is_space->{$self->{nc}}) {
1829            
1830            $self->{ca}->{value} .= ' ';
1831            ## Stay in the state.
1832            
1833        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1834          $self->{line_prev} = $self->{line};
1835          $self->{column_prev} = $self->{column};
1836          $self->{column}++;
1837          $self->{nc}
1838              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1839        } else {
1840          $self->{set_nc}->($self);
1841        }
1842      
1843            redo A;
1844        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1845          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1846          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1847                        
1848            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1849    
1850              $self->{state} = DATA_STATE;
1851              $self->{s_kwd} = '';
1852              ## reconsume
1853              return  ($self->{ct}); # start tag
1854              redo A;
1855          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1856            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1857            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1540  sub _get_next_token ($) { Line 1861  sub _get_next_token ($) {
1861              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1862                            
1863            }            }
1864    
1865              $self->{state} = DATA_STATE;
1866              $self->{s_kwd} = '';
1867              ## reconsume
1868              return  ($self->{ct}); # end tag
1869              redo A;
1870            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1871              ## XML5: No parse error above; not defined yet.
1872              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1873              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1874              ## Reconsume.
1875              return  ($self->{ct}); # ATTLIST
1876              redo A;
1877          } else {          } else {
1878            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1879          }          }
         $self->{state} = DATA_STATE;  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1880        } else {        } else {
1881                    ## XML5 [ATTLIST]: Not defined yet.
1882            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1883              
1884              ## XML5: Not a parse error.
1885              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1886            } else {
1887              
1888            }
1889          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1890          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1891                                q["&],                                qq["&<\x09\x0C\x20],
1892                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1893    
1894          ## Stay in the state          ## Stay in the state
# Line 1571  sub _get_next_token ($) { Line 1906  sub _get_next_token ($) {
1906          redo A;          redo A;
1907        }        }
1908      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1909          ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1910          ## ATTLIST attribute value single quoted state".
1911    
1912        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1913                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1914          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1915              ## XML5: "DOCTYPE ATTLIST name after state".
1916              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1917              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1918            } else {
1919              
1920              ## XML5: "Before attribute name state" (sic).
1921              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1922            }
1923                    
1924      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1925        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1588  sub _get_next_token ($) { Line 1934  sub _get_next_token ($) {
1934          redo A;          redo A;
1935        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1936                    
1937            ## XML5: Not defined yet.
1938    
1939          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1940          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1941          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1607  sub _get_next_token ($) { Line 1955  sub _get_next_token ($) {
1955      }      }
1956        
1957          redo A;          redo A;
1958          } elsif ($self->{is_xml} and
1959                   $is_space->{$self->{nc}}) {
1960            
1961            $self->{ca}->{value} .= ' ';
1962            ## Stay in the state.
1963            
1964        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1965          $self->{line_prev} = $self->{line};
1966          $self->{column_prev} = $self->{column};
1967          $self->{column}++;
1968          $self->{nc}
1969              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1970        } else {
1971          $self->{set_nc}->($self);
1972        }
1973      
1974            redo A;
1975        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1976          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1977          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1978                        
1979            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1980    
1981              $self->{state} = DATA_STATE;
1982              $self->{s_kwd} = '';
1983              ## reconsume
1984              return  ($self->{ct}); # start tag
1985              redo A;
1986          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1987            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1988            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1621  sub _get_next_token ($) { Line 1992  sub _get_next_token ($) {
1992              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1993                            
1994            }            }
1995    
1996              $self->{state} = DATA_STATE;
1997              $self->{s_kwd} = '';
1998              ## reconsume
1999              return  ($self->{ct}); # end tag
2000              redo A;
2001            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2002              ## XML5: No parse error above; not defined yet.
2003              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2004              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2005              ## Reconsume.
2006              return  ($self->{ct}); # ATTLIST
2007              redo A;
2008          } else {          } else {
2009            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2010          }          }
         $self->{state} = DATA_STATE;  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2011        } else {        } else {
2012                    ## XML5 [ATTLIST]: Not defined yet.
2013            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
2014              
2015              ## XML5: Not a parse error.
2016              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
2017            } else {
2018              
2019            }
2020          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2021          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2022                                q['&],                                qq['&<\x09\x0C\x20],
2023                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2024    
2025          ## Stay in the state          ## Stay in the state
# Line 1652  sub _get_next_token ($) { Line 2037  sub _get_next_token ($) {
2037          redo A;          redo A;
2038        }        }
2039      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
2040          ## XML5: "Tag attribute value unquoted state".
2041    
2042        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2043                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2044          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            
2045              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2046              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2047            } else {
2048              
2049              ## XML5: "Tag attribute name before state".
2050              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2051            }
2052                    
2053      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2054        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1669  sub _get_next_token ($) { Line 2063  sub _get_next_token ($) {
2063          redo A;          redo A;
2064        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
2065                    
2066    
2067            ## XML5: Not defined yet.
2068    
2069          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
2070          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
2071          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1692  sub _get_next_token ($) { Line 2089  sub _get_next_token ($) {
2089          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2090                        
2091            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2092    
2093              $self->{state} = DATA_STATE;
2094              $self->{s_kwd} = '';
2095              
2096        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2097          $self->{line_prev} = $self->{line};
2098          $self->{column_prev} = $self->{column};
2099          $self->{column}++;
2100          $self->{nc}
2101              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2102        } else {
2103          $self->{set_nc}->($self);
2104        }
2105      
2106              return  ($self->{ct}); # start tag
2107              redo A;
2108          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2109            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2110            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1701  sub _get_next_token ($) { Line 2114  sub _get_next_token ($) {
2114              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2115                            
2116            }            }
2117          } else {  
2118            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2119          }            $self->{s_kwd} = '';
2120          $self->{state} = DATA_STATE;            
           
2121      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2122        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2123        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1716  sub _get_next_token ($) { Line 2128  sub _get_next_token ($) {
2128        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2129      }      }
2130        
2131              return  ($self->{ct}); # end tag
2132          return  ($self->{ct}); # start tag or end tag            redo A;
2133            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2134          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2135              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2136              
2137        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2138          $self->{line_prev} = $self->{line};
2139          $self->{column_prev} = $self->{column};
2140          $self->{column}++;
2141          $self->{nc}
2142              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2143        } else {
2144          $self->{set_nc}->($self);
2145        }
2146      
2147              return  ($self->{ct}); # ATTLIST
2148              redo A;
2149            } else {
2150              die "$0: $self->{ct}->{type}: Unknown token type";
2151            }
2152        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2153          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2154                        
2155              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2156            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2157    
2158              $self->{state} = DATA_STATE;
2159              $self->{s_kwd} = '';
2160              ## reconsume
2161              return  ($self->{ct}); # start tag
2162              redo A;
2163          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2164              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2165            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2166            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2167                            
# Line 1734  sub _get_next_token ($) { Line 2170  sub _get_next_token ($) {
2170              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2171                            
2172            }            }
2173    
2174              $self->{state} = DATA_STATE;
2175              $self->{s_kwd} = '';
2176              ## reconsume
2177              return  ($self->{ct}); # end tag
2178              redo A;
2179            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2180              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2181              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2182              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2183              ## Reconsume.
2184              return  ($self->{ct}); # ATTLIST
2185              redo A;
2186          } else {          } else {
2187            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2188          }          }
         $self->{state} = DATA_STATE;  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2189        } else {        } else {
2190          if ({          if ({
2191               0x0022 => 1, # "               0x0022 => 1, # "
2192               0x0027 => 1, # '               0x0027 => 1, # '
2193               0x003D => 1, # =               0x003D => 1, # =
2194                 0x003C => 1, # <
2195              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2196                        
2197              ## XML5: Not a parse error.
2198            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
2199          } else {          } else {
2200                        
2201          }          }
2202          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2203          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2204                                q["'=& >],                                qq["'=& \x09\x0C>],
2205                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2206    
2207          ## Stay in the state          ## Stay in the state
# Line 1806  sub _get_next_token ($) { Line 2251  sub _get_next_token ($) {
2251            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2252          }          }
2253          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2254            $self->{s_kwd} = '';
2255                    
2256      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2257        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1853  sub _get_next_token ($) { Line 2299  sub _get_next_token ($) {
2299            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2300          }          }
2301          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2302            $self->{s_kwd} = '';
2303          ## Reconsume.          ## Reconsume.
2304          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
2305          redo A;          redo A;
# Line 1864  sub _get_next_token ($) { Line 2311  sub _get_next_token ($) {
2311          redo A;          redo A;
2312        }        }
2313      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2314          ## XML5: "Empty tag state".
2315    
2316        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2317          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2318                        
# Line 1883  sub _get_next_token ($) { Line 2332  sub _get_next_token ($) {
2332          }          }
2333    
2334          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2335            $self->{s_kwd} = '';
2336                    
2337      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2338        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1914  sub _get_next_token ($) { Line 2364  sub _get_next_token ($) {
2364          } else {          } else {
2365            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2366          }          }
2367            ## XML5: "Tag attribute name before state".
2368          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2369            $self->{s_kwd} = '';
2370          ## Reconsume.          ## Reconsume.
2371          return  ($self->{ct}); # start tag or end tag          return  ($self->{ct}); # start tag or end tag
2372          redo A;          redo A;
# Line 1927  sub _get_next_token ($) { Line 2379  sub _get_next_token ($) {
2379          redo A;          redo A;
2380        }        }
2381      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2382        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2383    
2384        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2385        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2386                
2387        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2388                    if ($self->{in_subset}) {
2389          $self->{state} = DATA_STATE;            
2390              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2391            } else {
2392              
2393              $self->{state} = DATA_STATE;
2394              $self->{s_kwd} = '';
2395            }
2396                    
2397      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2398        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1950  sub _get_next_token ($) { Line 2408  sub _get_next_token ($) {
2408          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2409          redo A;          redo A;
2410        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2411                    if ($self->{in_subset}) {
2412          $self->{state} = DATA_STATE;            
2413              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2414            } else {
2415              
2416              $self->{state} = DATA_STATE;
2417              $self->{s_kwd} = '';
2418            }
2419          ## reconsume          ## reconsume
2420    
2421          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 1978  sub _get_next_token ($) { Line 2442  sub _get_next_token ($) {
2442          redo A;          redo A;
2443        }        }
2444      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2445        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2446                
2447        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2448                    
# Line 2000  sub _get_next_token ($) { Line 2464  sub _get_next_token ($) {
2464          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2465                    
2466          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2467          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2468                    
2469      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2470        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2019  sub _get_next_token ($) { Line 2483  sub _get_next_token ($) {
2483                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2484                                                    
2485          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2486          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2487                    
2488      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2489        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2053  sub _get_next_token ($) { Line 2517  sub _get_next_token ($) {
2517                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2518                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2519                                   };                                   };
2520          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2521                    
2522      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2523        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2089  sub _get_next_token ($) { Line 2553  sub _get_next_token ($) {
2553              0x0054, # T              0x0054, # T
2554              0x0059, # Y              0x0059, # Y
2555              0x0050, # P              0x0050, # P
2556            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2557            $self->{nc} == [            $self->{nc} == [
2558              undef,              undef,
2559              0x006F, # o              0x006F, # o
# Line 2097  sub _get_next_token ($) { Line 2561  sub _get_next_token ($) {
2561              0x0074, # t              0x0074, # t
2562              0x0079, # y              0x0079, # y
2563              0x0070, # p              0x0070, # p
2564            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2565                    
2566          ## Stay in the state.          ## Stay in the state.
2567          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2568                    
2569      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2570        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2113  sub _get_next_token ($) { Line 2577  sub _get_next_token ($) {
2577      }      }
2578        
2579          redo A;          redo A;
2580        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2581                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2582                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2583                    if ($self->{is_xml} and
2584                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2585              
2586              ## XML5: case-sensitive.
2587              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2588                              text => 'DOCTYPE',
2589                              line => $self->{line_prev},
2590                              column => $self->{column_prev} - 5);
2591            } else {
2592              
2593            }
2594          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2595          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2596                                    quirks => 1,                                    quirks => 1,
# Line 2139  sub _get_next_token ($) { Line 2613  sub _get_next_token ($) {
2613                                    
2614          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2615                          line => $self->{line_prev},                          line => $self->{line_prev},
2616                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2617          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2618          ## Reconsume.          ## Reconsume.
2619          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2620                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2621                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2622                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2623                                   };                                   };
2624          redo A;          redo A;
2625        }        }
# Line 2156  sub _get_next_token ($) { Line 2630  sub _get_next_token ($) {
2630              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2631              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2632              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2633            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2634                    
2635          ## Stay in the state.          ## Stay in the state.
2636          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2637                    
2638      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2639        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2172  sub _get_next_token ($) { Line 2646  sub _get_next_token ($) {
2646      }      }
2647        
2648          redo A;          redo A;
2649        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2650                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2651                    if ($self->{is_xml} and
2652                not $self->{tainted} and
2653                @{$self->{open_elements} or []} == 0) {
2654              
2655              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2656                              line => $self->{line_prev},
2657                              column => $self->{column_prev} - 7);
2658              $self->{tainted} = 1;
2659            } else {
2660              
2661            }
2662    
2663          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2664                                    data => '',                                    data => '',
2665                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2196  sub _get_next_token ($) { Line 2681  sub _get_next_token ($) {
2681                    
2682          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2683                          line => $self->{line_prev},                          line => $self->{line_prev},
2684                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2685          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2686          ## Reconsume.          ## Reconsume.
2687          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2688                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2689                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2690                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2691                                   };                                   };
2692          redo A;          redo A;
2693        }        }
# Line 2223  sub _get_next_token ($) { Line 2708  sub _get_next_token ($) {
2708        
2709          redo A;          redo A;
2710        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2711          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2712          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2713              
2714              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2715            } else {
2716              
2717              $self->{state} = DATA_STATE;
2718              $self->{s_kwd} = '';
2719            }
2720                    
2721      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2722        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2242  sub _get_next_token ($) { Line 2733  sub _get_next_token ($) {
2733    
2734          redo A;          redo A;
2735        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2736          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2737          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2738              
2739              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2740            } else {
2741              
2742              $self->{state} = DATA_STATE;
2743              $self->{s_kwd} = '';
2744            }
2745          ## reconsume          ## reconsume
2746    
2747          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2285  sub _get_next_token ($) { Line 2782  sub _get_next_token ($) {
2782        
2783          redo A;          redo A;
2784        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2785          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2786          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2787              
2788              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2789            } else {
2790              
2791              $self->{state} = DATA_STATE;
2792              $self->{s_kwd} = '';
2793            }
2794                    
2795      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2796        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2304  sub _get_next_token ($) { Line 2807  sub _get_next_token ($) {
2807    
2808          redo A;          redo A;
2809        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2810          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2811          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2812              
2813              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2814            } else {
2815              
2816              $self->{state} = DATA_STATE;
2817              $self->{s_kwd} = '';
2818            }
2819          ## reconsume          ## reconsume
2820    
2821          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2331  sub _get_next_token ($) { Line 2840  sub _get_next_token ($) {
2840          redo A;          redo A;
2841        }        }
2842      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2843          ## XML5: "Comment state" and "DOCTYPE comment state".
2844    
2845        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2846                    
2847          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2347  sub _get_next_token ($) { Line 2858  sub _get_next_token ($) {
2858        
2859          redo A;          redo A;
2860        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2861          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2862          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2863              
2864              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2865            } else {
2866              
2867              $self->{state} = DATA_STATE;
2868              $self->{s_kwd} = '';
2869            }
2870          ## reconsume          ## reconsume
2871    
2872          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2377  sub _get_next_token ($) { Line 2894  sub _get_next_token ($) {
2894          redo A;          redo A;
2895        }        }
2896      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2897          ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2898    
2899        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2900                    
2901          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2393  sub _get_next_token ($) { Line 2912  sub _get_next_token ($) {
2912        
2913          redo A;          redo A;
2914        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2915          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2916          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2917              
2918              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2919            } else {
2920              
2921              $self->{state} = DATA_STATE;
2922              $self->{s_kwd} = '';
2923            }
2924          ## reconsume          ## reconsume
2925    
2926          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2418  sub _get_next_token ($) { Line 2943  sub _get_next_token ($) {
2943        
2944          redo A;          redo A;
2945        }        }
2946      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE or
2947                 $self->{state} == COMMENT_END_BANG_STATE) {
2948          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2949          ## (No comment end bang state.)
2950    
2951        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2952                    if ($self->{in_subset}) {
2953          $self->{state} = DATA_STATE;            
2954              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2955            } else {
2956              
2957              $self->{state} = DATA_STATE;
2958              $self->{s_kwd} = '';
2959            }
2960                    
2961      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2962        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2438  sub _get_next_token ($) { Line 2973  sub _get_next_token ($) {
2973    
2974          redo A;          redo A;
2975        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2976            if ($self->{state} == COMMENT_END_BANG_STATE) {
2977              
2978              $self->{ct}->{data} .= '--!'; # comment
2979              $self->{state} = COMMENT_END_DASH_STATE;
2980            } else {
2981              
2982              ## XML5: Not a parse error.
2983              $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2984                              line => $self->{line_prev},
2985                              column => $self->{column_prev});
2986              $self->{ct}->{data} .= '-'; # comment
2987              ## Stay in the state
2988            }
2989                    
2990          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2991                          line => $self->{line_prev},        $self->{line_prev} = $self->{line};
2992                          column => $self->{column_prev});        $self->{column_prev} = $self->{column};
2993          $self->{ct}->{data} .= '-'; # comment        $self->{column}++;
2994          ## Stay in the state        $self->{nc}
2995              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2996        } else {
2997          $self->{set_nc}->($self);
2998        }
2999      
3000            redo A;
3001          } elsif ($self->{nc} == 0x0021 and # !
3002                   $self->{state} != COMMENT_END_BANG_STATE) {
3003            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end bang'); # XXX error type
3004            $self->{state} = COMMENT_END_BANG_STATE;
3005                    
3006      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3007        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2457  sub _get_next_token ($) { Line 3015  sub _get_next_token ($) {
3015        
3016          redo A;          redo A;
3017        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3018          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
3019          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
3020          ## reconsume            
3021              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3022            } else {
3023              
3024              $self->{state} = DATA_STATE;
3025              $self->{s_kwd} = '';
3026            }
3027            ## Reconsume.
3028    
3029          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
3030    
3031          redo A;          redo A;
3032        } else {        } else {
3033                    
3034          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          if ($self->{state} == COMMENT_END_BANG_STATE) {
3035                          line => $self->{line_prev},            $self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment
3036                          column => $self->{column_prev});          } else {
3037          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3038            }
3039          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3040                    
3041      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2501  sub _get_next_token ($) { Line 3066  sub _get_next_token ($) {
3066      }      }
3067        
3068          redo A;          redo A;
3069          } elsif ($self->{nc} == -1) {
3070            
3071            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3072            $self->{ct}->{quirks} = 1;
3073    
3074            $self->{state} = DATA_STATE;
3075            ## Reconsume.
3076            return  ($self->{ct}); # DOCTYPE (quirks)
3077    
3078            redo A;
3079        } else {        } else {
3080                    
3081            ## XML5: Swith to the bogus comment state.
3082          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3083          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3084          ## reconsume          ## reconsume
3085          redo A;          redo A;
3086        }        }
3087      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
3088          ## XML5: "DOCTYPE root name before state".
3089    
3090        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3091                    
3092          ## Stay in the state          ## Stay in the state
# Line 2526  sub _get_next_token ($) { Line 3104  sub _get_next_token ($) {
3104          redo A;          redo A;
3105        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3106                    
3107            ## XML5: No parse error.
3108          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3109          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3110            $self->{s_kwd} = '';
3111                    
3112      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3113        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2543  sub _get_next_token ($) { Line 3123  sub _get_next_token ($) {
3123          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3124    
3125          redo A;          redo A;
3126          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3127            
3128            $self->{ct}->{name} # DOCTYPE
3129                = chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3130            delete $self->{ct}->{quirks};
3131            $self->{state} = DOCTYPE_NAME_STATE;
3132            
3133        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3134          $self->{line_prev} = $self->{line};
3135          $self->{column_prev} = $self->{column};
3136          $self->{column}++;
3137          $self->{nc}
3138              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3139        } else {
3140          $self->{set_nc}->($self);
3141        }
3142      
3143            redo A;
3144        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3145                    
3146          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3147          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3148            $self->{s_kwd} = '';
3149          ## reconsume          ## reconsume
3150    
3151          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3152    
3153          redo A;          redo A;
3154          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3155            
3156            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3157            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3158            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3159            $self->{in_subset} = 1;
3160            
3161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3162          $self->{line_prev} = $self->{line};
3163          $self->{column_prev} = $self->{column};
3164          $self->{column}++;
3165          $self->{nc}
3166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3167        } else {
3168          $self->{set_nc}->($self);
3169        }
3170      
3171            return  ($self->{ct}); # DOCTYPE
3172            redo A;
3173        } else {        } else {
3174                    
3175          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2571  sub _get_next_token ($) { Line 3189  sub _get_next_token ($) {
3189          redo A;          redo A;
3190        }        }
3191      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3192  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3193    
3194          ## ISSUE: Redundant "First," in the spec.
3195    
3196        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3197                    
3198          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2590  sub _get_next_token ($) { Line 3211  sub _get_next_token ($) {
3211        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3212                    
3213          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3214            $self->{s_kwd} = '';
3215                    
3216      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3217        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2605  sub _get_next_token ($) { Line 3227  sub _get_next_token ($) {
3227          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3228    
3229          redo A;          redo A;
3230          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3231            
3232            $self->{ct}->{name} # DOCTYPE
3233                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3234            delete $self->{ct}->{quirks};
3235            ## Stay in the state.
3236            
3237        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3238          $self->{line_prev} = $self->{line};
3239          $self->{column_prev} = $self->{column};
3240          $self->{column}++;
3241          $self->{nc}
3242              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3243        } else {
3244          $self->{set_nc}->($self);
3245        }
3246      
3247            redo A;
3248        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3249                    
3250          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3251          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3252            $self->{s_kwd} = '';
3253          ## reconsume          ## reconsume
3254    
3255          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3256          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3257    
3258          redo A;          redo A;
3259          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3260            
3261            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3262            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3263            $self->{in_subset} = 1;
3264            
3265        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3266          $self->{line_prev} = $self->{line};
3267          $self->{column_prev} = $self->{column};
3268          $self->{column}++;
3269          $self->{nc}
3270              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3271        } else {
3272          $self->{set_nc}->($self);
3273        }
3274      
3275            return  ($self->{ct}); # DOCTYPE
3276            redo A;
3277        } else {        } else {
3278                    
3279          $self->{ct}->{name}          $self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE
3280            .= chr ($self->{nc}); # DOCTYPE          ## Stay in the state.
         ## Stay in the state  
3281                    
3282      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3283        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2634  sub _get_next_token ($) { Line 3292  sub _get_next_token ($) {
3292          redo A;          redo A;
3293        }        }
3294      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3295          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3296          ## state", but implemented differently.
3297    
3298        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3299                    
3300          ## Stay in the state          ## Stay in the state
# Line 2650  sub _get_next_token ($) { Line 3311  sub _get_next_token ($) {
3311        
3312          redo A;          redo A;
3313        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3314            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3315              
3316              $self->{state} = DATA_STATE;
3317              $self->{s_kwd} = '';
3318            } else {
3319              
3320              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3321              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3322            }
3323                    
         $self->{state} = DATA_STATE;  
3324                    
3325      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3326        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2663  sub _get_next_token ($) { Line 3332  sub _get_next_token ($) {
3332        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3333      }      }
3334        
3335            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3336          redo A;          redo A;
3337        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3338            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3339              
3340              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3341              $self->{state} = DATA_STATE;
3342              $self->{s_kwd} = '';
3343              $self->{ct}->{quirks} = 1;
3344            } else {
3345              
3346              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3347              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3348            }
3349                    
3350          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3351          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3352          redo A;          redo A;
3353        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3354                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3355            
3356          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3357          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3358                    
3359      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3360        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2695  sub _get_next_token ($) { Line 3369  sub _get_next_token ($) {
3369          redo A;          redo A;
3370        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3371                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3372            
3373          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3374          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3375                    
3376      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3377        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2709  sub _get_next_token ($) { Line 3384  sub _get_next_token ($) {
3384      }      }
3385        
3386          redo A;          redo A;
3387        } else {        } elsif ($self->{nc} == 0x0022 and # "
3388                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3389                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3390                    
3391          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3392          $self->{ct}->{quirks} = 1;          $self->{ct}->{value} = ''; # ENTITY
3393            
3394        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3395          $self->{line_prev} = $self->{line};
3396          $self->{column_prev} = $self->{column};
3397          $self->{column}++;
3398          $self->{nc}
3399              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3400        } else {
3401          $self->{set_nc}->($self);
3402        }
3403      
3404            redo A;
3405          } elsif ($self->{nc} == 0x0027 and # '
3406                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3407                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3408            
3409            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3410            $self->{ct}->{value} = ''; # ENTITY
3411            
3412        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3413          $self->{line_prev} = $self->{line};
3414          $self->{column_prev} = $self->{column};
3415          $self->{column}++;
3416          $self->{nc}
3417              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3418        } else {
3419          $self->{set_nc}->($self);
3420        }
3421      
3422            redo A;
3423          } elsif ($self->{is_xml} and
3424                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3425                   $self->{nc} == 0x005B) { # [
3426            
3427            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3428            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3429            $self->{in_subset} = 1;
3430            
3431        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3432          $self->{line_prev} = $self->{line};
3433          $self->{column_prev} = $self->{column};
3434          $self->{column}++;
3435          $self->{nc}
3436              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3437        } else {
3438          $self->{set_nc}->($self);
3439        }
3440      
3441            return  ($self->{ct}); # DOCTYPE
3442            redo A;
3443          } else {
3444            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3445    
3446            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3447              
3448              $self->{ct}->{quirks} = 1;
3449              $self->{state} = BOGUS_DOCTYPE_STATE;
3450            } else {
3451              
3452              $self->{state} = BOGUS_MD_STATE;
3453            }
3454    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3455                    
3456      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3457        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2736  sub _get_next_token ($) { Line 3473  sub _get_next_token ($) {
3473              0x0042, # B              0x0042, # B
3474              0x004C, # L              0x004C, # L
3475              0x0049, # I              0x0049, # I
3476            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3477            $self->{nc} == [            $self->{nc} == [
3478              undef,              undef,
3479              0x0075, # u              0x0075, # u
3480              0x0062, # b              0x0062, # b
3481              0x006C, # l              0x006C, # l
3482              0x0069, # i              0x0069, # i
3483            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3484                    
3485          ## Stay in the state.          ## Stay in the state.
3486          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3487                    
3488      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3489        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2759  sub _get_next_token ($) { Line 3496  sub _get_next_token ($) {
3496      }      }
3497        
3498          redo A;          redo A;
3499        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3500                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3501                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3502                    if ($self->{is_xml} and
3503                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3504              
3505              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3506                              text => 'PUBLIC',
3507                              line => $self->{line_prev},
3508                              column => $self->{column_prev} - 4);
3509            } else {
3510              
3511            }
3512          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3513                    
3514      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2777  sub _get_next_token ($) { Line 3523  sub _get_next_token ($) {
3523        
3524          redo A;          redo A;
3525        } else {        } else {
3526                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3527                          line => $self->{line_prev},                          line => $self->{line_prev},
3528                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3529          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3530              
3531          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3532              $self->{state} = BOGUS_DOCTYPE_STATE;
3533            } else {
3534              
3535              $self->{state} = BOGUS_MD_STATE;
3536            }
3537          ## Reconsume.          ## Reconsume.
3538          redo A;          redo A;
3539        }        }
# Line 2795  sub _get_next_token ($) { Line 3545  sub _get_next_token ($) {
3545              0x0053, # S              0x0053, # S
3546              0x0054, # T              0x0054, # T
3547              0x0045, # E              0x0045, # E
3548            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3549            $self->{nc} == [            $self->{nc} == [
3550              undef,              undef,
3551              0x0079, # y              0x0079, # y
3552              0x0073, # s              0x0073, # s
3553              0x0074, # t              0x0074, # t
3554              0x0065, # e              0x0065, # e
3555            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3556                    
3557          ## Stay in the state.          ## Stay in the state.
3558          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3559                    
3560      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3561        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2818  sub _get_next_token ($) { Line 3568  sub _get_next_token ($) {
3568      }      }
3569        
3570          redo A;          redo A;
3571        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3572                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3573                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3574                    if ($self->{is_xml} and
3575                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3576              
3577              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3578                              text => 'SYSTEM',
3579                              line => $self->{line_prev},
3580                              column => $self->{column_prev} - 4);
3581            } else {
3582              
3583            }
3584          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3585                    
3586      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2836  sub _get_next_token ($) { Line 3595  sub _get_next_token ($) {
3595        
3596          redo A;          redo A;
3597        } else {        } else {
3598                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3599                          line => $self->{line_prev},                          line => $self->{line_prev},
3600                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3601          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3602              
3603          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3604              $self->{state} = BOGUS_DOCTYPE_STATE;
3605            } else {
3606              
3607              $self->{state} = BOGUS_MD_STATE;
3608            }
3609          ## Reconsume.          ## Reconsume.
3610          redo A;          redo A;
3611        }        }
# Line 2895  sub _get_next_token ($) { Line 3658  sub _get_next_token ($) {
3658        
3659          redo A;          redo A;
3660        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3661          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3662            
3663          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3664              
3665              $self->{state} = DATA_STATE;
3666              $self->{s_kwd} = '';
3667              $self->{ct}->{quirks} = 1;
3668            } else {
3669              
3670              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3671            }
3672            
3673                    
3674      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3675        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2910  sub _get_next_token ($) { Line 3681  sub _get_next_token ($) {
3681        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3682      }      }
3683        
3684            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3685          redo A;          redo A;
3686        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3687            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3688              
3689              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3690              $self->{state} = DATA_STATE;
3691              $self->{s_kwd} = '';
3692              $self->{ct}->{quirks} = 1;
3693            } else {
3694              
3695              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3696              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3697            }
3698                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
3699          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3700          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3701          redo A;          redo A;
3702        } else {        } elsif ($self->{is_xml} and
3703                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3704                   $self->{nc} == 0x005B) { # [
3705                    
3706            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3707            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3708            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3709            $self->{in_subset} = 1;
3710            
3711        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3712          $self->{line_prev} = $self->{line};
3713          $self->{column_prev} = $self->{column};
3714          $self->{column}++;
3715          $self->{nc}
3716              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3717        } else {
3718          $self->{set_nc}->($self);
3719        }
3720      
3721            return  ($self->{ct}); # DOCTYPE
3722            redo A;
3723          } else {
3724          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3725    
3726          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3727              
3728              $self->{ct}->{quirks} = 1;
3729              $self->{state} = BOGUS_DOCTYPE_STATE;
3730            } else {
3731              
3732              $self->{state} = BOGUS_MD_STATE;
3733            }
3734    
3735                    
3736      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3737        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2962  sub _get_next_token ($) { Line 3762  sub _get_next_token ($) {
3762        
3763          redo A;          redo A;
3764        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3765          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3766    
3767          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3768              
3769              $self->{state} = DATA_STATE;
3770              $self->{s_kwd} = '';
3771              $self->{ct}->{quirks} = 1;
3772            } else {
3773              
3774              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3775            }
3776    
3777                    
3778      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3779        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2977  sub _get_next_token ($) { Line 3785  sub _get_next_token ($) {
3785        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3786      }      }
3787        
3788            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3789          redo A;          redo A;
3790        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3791          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3792    
3793          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3794          ## reconsume            
3795              $self->{state} = DATA_STATE;
3796          $self->{ct}->{quirks} = 1;            $self->{s_kwd} = '';
3797              $self->{ct}->{quirks} = 1;
3798            } else {
3799              
3800              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3801            }
3802            
3803            ## Reconsume.
3804          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3805          redo A;          redo A;
3806        } else {        } else {
3807                    
3808          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3809          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3810                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3811    
# Line 3031  sub _get_next_token ($) { Line 3840  sub _get_next_token ($) {
3840        
3841          redo A;          redo A;
3842        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3843          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3844    
3845          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3846              
3847              $self->{state} = DATA_STATE;
3848              $self->{s_kwd} = '';
3849              $self->{ct}->{quirks} = 1;
3850            } else {
3851              
3852              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3853            }
3854    
3855                    
3856      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3857        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3046  sub _get_next_token ($) { Line 3863  sub _get_next_token ($) {
3863        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3864      }      }
3865        
3866            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3867          redo A;          redo A;
3868        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3869          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3870    
3871          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3872              
3873              $self->{state} = DATA_STATE;
3874              $self->{s_kwd} = '';
3875              $self->{ct}->{quirks} = 1;
3876            } else {
3877              
3878              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3879            }
3880          
3881          ## reconsume          ## reconsume
3882            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3883          redo A;          redo A;
3884        } else {        } else {
3885                    
3886          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3887          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3888                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3889    
# Line 3101  sub _get_next_token ($) { Line 3919  sub _get_next_token ($) {
3919          redo A;          redo A;
3920        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3921                    
3922          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3923          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3924                    
3925      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3117  sub _get_next_token ($) { Line 3935  sub _get_next_token ($) {
3935          redo A;          redo A;
3936        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3937                    
3938          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3939          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3940                    
3941      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3132  sub _get_next_token ($) { Line 3950  sub _get_next_token ($) {
3950        
3951          redo A;          redo A;
3952        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3953            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3954              if ($self->{is_xml}) {
3955                
3956                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3957              } else {
3958                
3959              }
3960              $self->{state} = DATA_STATE;
3961              $self->{s_kwd} = '';
3962            } else {
3963              if ($self->{ct}->{type} == NOTATION_TOKEN) {
3964                
3965              } else {
3966                
3967                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3968              }
3969              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3970            }
3971                    
         $self->{state} = DATA_STATE;  
3972                    
3973      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3974        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3145  sub _get_next_token ($) { Line 3980  sub _get_next_token ($) {
3980        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3981      }      }
3982        
3983            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3984          redo A;          redo A;
3985        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3986            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3987              
3988              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3989              
3990              $self->{state} = DATA_STATE;
3991              $self->{s_kwd} = '';
3992              $self->{ct}->{quirks} = 1;
3993            } else {
3994              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3995              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3996            }
3997                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
3998          ## reconsume          ## reconsume
3999            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4000          $self->{ct}->{quirks} = 1;          redo A;
4001          } elsif ($self->{is_xml} and
4002                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4003                   $self->{nc} == 0x005B) { # [
4004            
4005            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4006            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4007            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4008            $self->{in_subset} = 1;
4009            
4010        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4011          $self->{line_prev} = $self->{line};
4012          $self->{column_prev} = $self->{column};
4013          $self->{column}++;
4014          $self->{nc}
4015              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4016        } else {
4017          $self->{set_nc}->($self);
4018        }
4019      
4020          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4021          redo A;          redo A;
4022        } else {        } else {
           
4023          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
4024    
4025          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4026              
4027              $self->{ct}->{quirks} = 1;
4028              $self->{state} = BOGUS_DOCTYPE_STATE;
4029            } else {
4030              
4031              $self->{state} = BOGUS_MD_STATE;
4032            }
4033    
4034                    
4035      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4036        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3228  sub _get_next_token ($) { Line 4093  sub _get_next_token ($) {
4093        
4094          redo A;          redo A;
4095        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
4096          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
4097                    
4098      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4099        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3243  sub _get_next_token ($) { Line 4106  sub _get_next_token ($) {
4106      }      }
4107        
4108    
4109          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4110          return  ($self->{ct}); # DOCTYPE            
4111              $self->{state} = DATA_STATE;
4112              $self->{s_kwd} = '';
4113              $self->{ct}->{quirks} = 1;
4114            } else {
4115              
4116              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4117            }
4118    
4119            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4120          redo A;          redo A;
4121        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4122            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4123              
4124              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4125              $self->{state} = DATA_STATE;
4126              $self->{s_kwd} = '';
4127              $self->{ct}->{quirks} = 1;
4128            } else {
4129              
4130              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4131              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4132            }
4133                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
4134          ## reconsume          ## reconsume
4135            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4136            redo A;
4137          } elsif ($self->{is_xml} and
4138                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4139                   $self->{nc} == 0x005B) { # [
4140            
4141            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4142    
4143          $self->{ct}->{quirks} = 1;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4144            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4145            $self->{in_subset} = 1;
4146            
4147        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4148          $self->{line_prev} = $self->{line};
4149          $self->{column_prev} = $self->{column};
4150          $self->{column}++;
4151          $self->{nc}
4152              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4153        } else {
4154          $self->{set_nc}->($self);
4155        }
4156      
4157          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4158          redo A;          redo A;
4159        } else {        } else {
           
4160          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4161    
4162          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4163                        
4164              $self->{ct}->{quirks} = 1;
4165              $self->{state} = BOGUS_DOCTYPE_STATE;
4166            } else {
4167              
4168              $self->{state} = BOGUS_MD_STATE;
4169            }
4170    
4171                    
4172      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4173        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3293  sub _get_next_token ($) { Line 4197  sub _get_next_token ($) {
4197      }      }
4198        
4199          redo A;          redo A;
4200        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4201          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4202    
4203          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4204              
4205              $self->{state} = DATA_STATE;
4206              $self->{s_kwd} = '';
4207              $self->{ct}->{quirks} = 1;
4208            } else {
4209              
4210              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4211            }
4212            
4213                    
4214      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4215        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3309  sub _get_next_token ($) { Line 4221  sub _get_next_token ($) {
4221        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4222      }      }
4223        
4224            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4225          redo A;          redo A;
4226        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4227          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4228    
4229          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4230              
4231              $self->{state} = DATA_STATE;
4232              $self->{s_kwd} = '';
4233              $self->{ct}->{quirks} = 1;
4234            } else {
4235              
4236              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4237            }
4238            
4239          ## reconsume          ## reconsume
4240            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4241          redo A;          redo A;
4242        } else {        } else {
4243                    
4244          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4245          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4246                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4247    
# Line 3362  sub _get_next_token ($) { Line 4275  sub _get_next_token ($) {
4275      }      }
4276        
4277          redo A;          redo A;
4278        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4279                    
4280          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4281    
4282          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4283            $self->{s_kwd} = '';
4284                    
4285      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4286        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3384  sub _get_next_token ($) { Line 4298  sub _get_next_token ($) {
4298    
4299          redo A;          redo A;
4300        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4301          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4302    
4303          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4304          ## reconsume            
4305              $self->{state} = DATA_STATE;
4306          $self->{ct}->{quirks} = 1;            $self->{s_kwd} = '';
4307          return  ($self->{ct}); # DOCTYPE            $self->{ct}->{quirks} = 1;
4308            } else {
4309              
4310              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4311            }
4312    
4313            ## reconsume
4314            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4315          redo A;          redo A;
4316        } else {        } else {
4317                    
4318          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4319          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4320                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4321    
# Line 3417  sub _get_next_token ($) { Line 4335  sub _get_next_token ($) {
4335        }        }
4336      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4337        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4338                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4339          ## Stay in the state            
4340              $self->{state} = BEFORE_NDATA_STATE;
4341            } else {
4342              
4343              ## Stay in the state
4344            }
4345                    
4346      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4347        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3432  sub _get_next_token ($) { Line 4355  sub _get_next_token ($) {
4355        
4356          redo A;          redo A;
4357        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4358            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4359              
4360              $self->{state} = DATA_STATE;
4361              $self->{s_kwd} = '';
4362            } else {
4363              
4364              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4365            }
4366    
4367                    
4368          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4369          $self->{line_prev} = $self->{line};
4370          $self->{column_prev} = $self->{column};
4371          $self->{column}++;
4372          $self->{nc}
4373              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4374        } else {
4375          $self->{set_nc}->($self);
4376        }
4377      
4378            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4379            redo A;
4380          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4381                   ($self->{nc} == 0x004E or # N
4382                    $self->{nc} == 0x006E)) { # n
4383            
4384            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4385            $self->{state} = NDATA_STATE;
4386            $self->{kwd} = chr $self->{nc};
4387                    
4388      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4389        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3445  sub _get_next_token ($) { Line 4395  sub _get_next_token ($) {
4395        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4396      }      }
4397        
4398            redo A;
4399          } elsif ($self->{nc} == -1) {
4400            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4401              
4402              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4403              $self->{state} = DATA_STATE;
4404              $self->{s_kwd} = '';
4405              $self->{ct}->{quirks} = 1;
4406            } else {
4407              
4408              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4409              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4410            }
4411    
4412            ## reconsume
4413            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4414            redo A;
4415          } elsif ($self->{is_xml} and
4416                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4417                   $self->{nc} == 0x005B) { # [
4418            
4419            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4420            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4421            $self->{in_subset} = 1;
4422            
4423        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4424          $self->{line_prev} = $self->{line};
4425          $self->{column_prev} = $self->{column};
4426          $self->{column}++;
4427          $self->{nc}
4428              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4429        } else {
4430          $self->{set_nc}->($self);
4431        }
4432      
4433          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4434            redo A;
4435          } else {
4436            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4437    
4438            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4439              
4440              #$self->{ct}->{quirks} = 1;
4441              $self->{state} = BOGUS_DOCTYPE_STATE;
4442            } else {
4443              
4444              $self->{state} = BOGUS_MD_STATE;
4445            }
4446    
4447            
4448        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4449          $self->{line_prev} = $self->{line};
4450          $self->{column_prev} = $self->{column};
4451          $self->{column}++;
4452          $self->{nc}
4453              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4454        } else {
4455          $self->{set_nc}->($self);
4456        }
4457      
4458            redo A;
4459          }
4460        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4461          if ($is_space->{$self->{nc}}) {
4462            
4463            ## Stay in the state.
4464            
4465        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4466          $self->{line_prev} = $self->{line};
4467          $self->{column_prev} = $self->{column};
4468          $self->{column}++;
4469          $self->{nc}
4470              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4471        } else {
4472          $self->{set_nc}->($self);
4473        }
4474      
4475            redo A;
4476          } elsif ($self->{nc} == 0x003E) { # >
4477            
4478            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4479            
4480        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4481          $self->{line_prev} = $self->{line};
4482          $self->{column_prev} = $self->{column};
4483          $self->{column}++;
4484          $self->{nc}
4485              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4486        } else {
4487          $self->{set_nc}->($self);
4488        }
4489      
4490            return  ($self->{ct}); # ENTITY
4491            redo A;
4492          } elsif ($self->{nc} == 0x004E or # N
4493                   $self->{nc} == 0x006E) { # n
4494            
4495            $self->{state} = NDATA_STATE;
4496            $self->{kwd} = chr $self->{nc};
4497            
4498        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4499          $self->{line_prev} = $self->{line};
4500          $self->{column_prev} = $self->{column};
4501          $self->{column}++;
4502          $self->{nc}
4503              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4504        } else {
4505          $self->{set_nc}->($self);
4506        }
4507      
4508          redo A;          redo A;
4509        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4510                    
4511          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4512          $self->{state} = DATA_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4513          ## reconsume          ## reconsume
4514            return  ($self->{ct}); # ENTITY
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4515          redo A;          redo A;
4516        } else {        } else {
4517                    
4518          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4519          #$self->{ct}->{quirks} = 1;          $self->{state} = BOGUS_MD_STATE;
   
         $self->{state} = BOGUS_DOCTYPE_STATE;  
4520                    
4521      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4522        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3482  sub _get_next_token ($) { Line 4534  sub _get_next_token ($) {
4534        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4535                    
4536          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4537            $self->{s_kwd} = '';
4538                    
4539      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4540        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3497  sub _get_next_token ($) { Line 4550  sub _get_next_token ($) {
4550          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4551    
4552          redo A;          redo A;
4553          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4554            
4555            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4556            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4557            $self->{in_subset} = 1;
4558            
4559        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4560          $self->{line_prev} = $self->{line};
4561          $self->{column_prev} = $self->{column};
4562          $self->{column}++;
4563          $self->{nc}
4564              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4565        } else {
4566          $self->{set_nc}->($self);
4567        }
4568      
4569            return  ($self->{ct}); # DOCTYPE
4570            redo A;
4571        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4572                    
4573          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4574            $self->{s_kwd} = '';
4575          ## reconsume          ## reconsume
4576    
4577          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
# Line 3508  sub _get_next_token ($) { Line 4580  sub _get_next_token ($) {
4580        } else {        } else {
4581                    
4582          my $s = '';          my $s = '';
4583          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4584    
4585          ## Stay in the state          ## Stay in the state
4586                    
# Line 3528  sub _get_next_token ($) { Line 4600  sub _get_next_token ($) {
4600        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
4601        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
4602        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
4603    
4604          ## XML5: "CDATA state".
4605                
4606        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4607                    
# Line 3545  sub _get_next_token ($) { Line 4619  sub _get_next_token ($) {
4619        
4620          redo A;          redo A;
4621        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4622            if ($self->{is_xml}) {
4623              
4624              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
4625            } else {
4626              
4627            }
4628    
4629          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4630                    $self->{s_kwd} = '';
4631      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {          ## Reconsume.
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
4632          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
4633                        
4634            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3589  sub _get_next_token ($) { Line 4661  sub _get_next_token ($) {
4661    
4662        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
4663      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
4664          ## XML5: "CDATA bracket state".
4665    
4666        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4667                    
4668          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3606  sub _get_next_token ($) { Line 4680  sub _get_next_token ($) {
4680          redo A;          redo A;
4681        } else {        } else {
4682                    
4683            ## XML5: If EOF, "]" is not appended and changed to the data state.
4684          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
4685          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
4686          ## Reconsume.          ## Reconsume.
4687          redo A;          redo A;
4688        }        }
4689      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
4690          ## XML5: "CDATA end state".
4691    
4692        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4693          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4694            $self->{s_kwd} = '';
4695                    
4696      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4697        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3653  sub _get_next_token ($) { Line 4731  sub _get_next_token ($) {
4731                    
4732          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
4733          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
4734          ## Reconsume.          ## Reconsume. ## XML5: Emit.
4735          redo A;          redo A;
4736        }        }
4737      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3662  sub _get_next_token ($) { Line 4740  sub _get_next_token ($) {
4740              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4741              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4742            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4743                    if ($self->{is_xml}) {
4744              
4745              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4746                              line => $self->{line_prev},
4747                              column => $self->{column_prev}
4748                                  + ($self->{nc} == -1 ? 1 : 0));
4749            } else {
4750              
4751              ## No error
4752            }
4753          ## Don't consume          ## Don't consume
         ## No error  
4754          ## Return nothing.          ## Return nothing.
4755          #          #
4756        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4757                    
4758          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4759          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4760                    
4761      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4762        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3683  sub _get_next_token ($) { Line 4769  sub _get_next_token ($) {
4769      }      }
4770        
4771          redo A;          redo A;
4772        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4773                   (0x0041 <= $self->{nc} and
4774                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4775                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4776                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
4777                    
4778          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4779          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4780          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4781          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4782          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4783                    
4784      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3721  sub _get_next_token ($) { Line 4808  sub _get_next_token ($) {
4808        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4809                    
4810          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4811            $self->{s_kwd} = '';
4812          ## Reconsume.          ## Reconsume.
4813          return  ({type => CHARACTER_TOKEN, data => '&',          return  ({type => CHARACTER_TOKEN, data => '&',
4814                    line => $self->{line_prev},                    line => $self->{line_prev},
# Line 3731  sub _get_next_token ($) { Line 4819  sub _get_next_token ($) {
4819                    
4820          $self->{ca}->{value} .= '&';          $self->{ca}->{value} .= '&';
4821          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4822            $self->{s_kwd} = '';
4823          ## Reconsume.          ## Reconsume.
4824          redo A;          redo A;
4825        }        }
4826      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4827        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
           $self->{nc} == 0x0058) { # X  
4828                    
4829          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4830          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4831            
4832        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4833          $self->{line_prev} = $self->{line};
4834          $self->{column_prev} = $self->{column};
4835          $self->{column}++;
4836          $self->{nc}
4837              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4838        } else {
4839          $self->{set_nc}->($self);
4840        }
4841      
4842            redo A;
4843          } elsif ($self->{nc} == 0x0058) { # X
4844            
4845            if ($self->{is_xml}) {
4846              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4847            }
4848            $self->{state} = HEXREF_X_STATE;
4849            $self->{kwd} .= chr $self->{nc};
4850                    
4851      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4852        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3756  sub _get_next_token ($) { Line 4863  sub _get_next_token ($) {
4863                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4864                    
4865          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4866          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4867                    
4868      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4869        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3781  sub _get_next_token ($) { Line 4888  sub _get_next_token ($) {
4888          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
4889                        
4890            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4891              $self->{s_kwd} = '';
4892            ## Reconsume.            ## Reconsume.
4893            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4894                      data => '&#',                      data => '&#',
# Line 3792  sub _get_next_token ($) { Line 4900  sub _get_next_token ($) {
4900                        
4901            $self->{ca}->{value} .= '&#';            $self->{ca}->{value} .= '&#';
4902            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4903              $self->{s_kwd} = '';
4904            ## Reconsume.            ## Reconsume.
4905            redo A;            redo A;
4906          }          }
# Line 3800  sub _get_next_token ($) { Line 4909  sub _get_next_token ($) {
4909        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4910            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4911                    
4912          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4913          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4914                    
4915          ## Stay in the state.          ## Stay in the state.
4916                    
# Line 3837  sub _get_next_token ($) { Line 4946  sub _get_next_token ($) {
4946          #          #
4947        }        }
4948    
4949        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4950        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4951        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4952        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
4953              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
4954              ($self->{is_xml} and $code == 0x0000)) {
4955                    
4956          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
4957                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 3857  sub _get_next_token ($) { Line 4968  sub _get_next_token ($) {
4968        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
4969                    
4970          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4971            $self->{s_kwd} = '';
4972          ## Reconsume.          ## Reconsume.
4973          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4974                      has_reference => 1,
4975                    line => $l, column => $c,                    line => $l, column => $c,
4976                   });                   });
4977          redo A;          redo A;
# Line 3867  sub _get_next_token ($) { Line 4980  sub _get_next_token ($) {
4980          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
4981          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
4982          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
4983            $self->{s_kwd} = '';
4984          ## Reconsume.          ## Reconsume.
4985          redo A;          redo A;
4986        }        }
# Line 3877  sub _get_next_token ($) { Line 4991  sub _get_next_token ($) {
4991          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4992                    
4993          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4994          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4995          ## Reconsume.          ## Reconsume.
4996          redo A;          redo A;
4997        } else {        } else {
# Line 3892  sub _get_next_token ($) { Line 5006  sub _get_next_token ($) {
5006          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
5007                        
5008            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
5009              $self->{s_kwd} = '';
5010            ## Reconsume.            ## Reconsume.
5011            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
5012                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
5013                      line => $self->{line_prev},                      line => $self->{line_prev},
5014                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
5015                     });                     });
5016            redo A;            redo A;
5017          } else {          } else {
5018                        
5019            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
5020            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
5021              $self->{s_kwd} = '';
5022            ## Reconsume.            ## Reconsume.
5023            redo A;            redo A;
5024          }          }
# Line 3911  sub _get_next_token ($) { Line 5027  sub _get_next_token ($) {
5027        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
5028          # 0..9          # 0..9
5029                    
5030          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
5031          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
5032          ## Stay in the state.          ## Stay in the state.
5033                    
5034      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3929  sub _get_next_token ($) { Line 5045  sub _get_next_token ($) {
5045        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
5046                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
5047                    
5048          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
5049          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
5050          ## Stay in the state.          ## Stay in the state.
5051                    
5052      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3947  sub _get_next_token ($) { Line 5063  sub _get_next_token ($) {
5063        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
5064                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
5065                    
5066          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
5067          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
5068          ## Stay in the state.          ## Stay in the state.
5069                    
5070      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3985  sub _get_next_token ($) { Line 5101  sub _get_next_token ($) {
5101          #          #
5102        }        }
5103    
5104        my $code = $self->{s_kwd};        my $code = $self->{kwd};
5105        my $l = $self->{line_prev};        my $l = $self->{line_prev};
5106        my $c = $self->{column_prev};        my $c = $self->{column_prev};
5107        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
5108              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
5109              ($self->{is_xml} and $code == 0x0000)) {
5110                    
5111          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
5112                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4005  sub _get_next_token ($) { Line 5123  sub _get_next_token ($) {
5123        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
5124                    
5125          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
5126            $self->{s_kwd} = '';
5127          ## Reconsume.          ## Reconsume.
5128          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
5129                      has_reference => 1,
5130                    line => $l, column => $c,                    line => $l, column => $c,
5131                   });                   });
5132          redo A;          redo A;
# Line 4015  sub _get_next_token ($) { Line 5135  sub _get_next_token ($) {
5135          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
5136          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
5137          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
5138            $self->{s_kwd} = '';
5139          ## Reconsume.          ## Reconsume.
5140          redo A;          redo A;
5141        }        }
5142      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5143        if (length $self->{s_kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5144            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5145            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5146              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5147             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5148              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5149             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5150              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5151             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5152                    {
5153                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5154                      $self->{entity_add} => 1,
5155                    }->{$self->{nc}}))) {
5156          our $EntityChar;          our $EntityChar;
5157          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5158          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5159                $self->{ge}->{$self->{kwd}}) {
5160            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5161                            if (defined $self->{ge}->{$self->{kwd}}) {
5162              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5163                    
5164                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5165                  } else {
5166                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5167                      
5168                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5169                                      value => $self->{kwd});
5170                    } else {
5171                      
5172                    }
5173                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5174                  }
5175                } else {
5176                  if ($self->{is_xml}) {
5177                    
5178                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5179                                    value => $self->{kwd},
5180                                    level => {
5181                                              'amp;' => $self->{level}->{warn},
5182                                              'quot;' => $self->{level}->{warn},
5183                                              'lt;' => $self->{level}->{warn},
5184                                              'gt;' => $self->{level}->{warn},
5185                                              'apos;' => $self->{level}->{warn},
5186                                             }->{$self->{kwd}} ||
5187                                             $self->{level}->{must});
5188                  } else {
5189                    
5190                  }
5191                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5192                }
5193              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5194                            
5195      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4049  sub _get_next_token ($) { Line 5205  sub _get_next_token ($) {
5205              #              #
5206            } else {            } else {
5207                            
5208              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
5209              $self->{entity__match} = -1;              $self->{entity__match} = -1;
5210              ## Stay in the state.              ## Stay in the state.
5211                            
# Line 4097  sub _get_next_token ($) { Line 5253  sub _get_next_token ($) {
5253          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
5254              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
5255                        
5256            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
5257            #            #
5258          } else {          } else {
5259                        
# Line 4109  sub _get_next_token ($) { Line 5265  sub _get_next_token ($) {
5265                    
5266          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
5267                          line => $self->{line_prev},                          line => $self->{line_prev},
5268                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
5269          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
5270          #          #
5271        }        }
5272        
# Line 4127  sub _get_next_token ($) { Line 5283  sub _get_next_token ($) {
5283        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
5284                    
5285          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
5286            $self->{s_kwd} = '';
5287          ## Reconsume.          ## Reconsume.
5288          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
5289                    data => $data,                    data => $data,
5290                      has_reference => $has_ref,
5291                    line => $self->{line_prev},                    line => $self->{line_prev},
5292                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
5293                   });                   });
5294          redo A;          redo A;
5295        } else {        } else {
# Line 4139  sub _get_next_token ($) { Line 5297  sub _get_next_token ($) {
5297          $self->{ca}->{value} .= $data;          $self->{ca}->{value} .= $data;
5298          $self->{ca}->{has_reference} = 1 if $has_ref;          $self->{ca}->{has_reference} = 1 if $has_ref;
5299          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
5300            $self->{s_kwd} = '';
5301            ## Reconsume.
5302            redo A;
5303          }
5304    
5305        ## XML-only states
5306    
5307        } elsif ($self->{state} == PI_STATE) {
5308          ## XML5: "Pi state" and "DOCTYPE pi state".
5309    
5310          if ($is_space->{$self->{nc}} or
5311              $self->{nc} == 0x003F or # ?
5312              $self->{nc} == -1) {
5313            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5314            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5315            ## "DOCTYPE pi state": Parse error, switch to the "data
5316            ## state".
5317            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5318                            line => $self->{line_prev},
5319                            column => $self->{column_prev}
5320                                - 1 * ($self->{nc} != -1));
5321            $self->{state} = BOGUS_COMMENT_STATE;
5322            ## Reconsume.
5323            $self->{ct} = {type => COMMENT_TOKEN,
5324                           data => '?',
5325                           line => $self->{line_prev},
5326                           column => $self->{column_prev}
5327                               - 1 * ($self->{nc} != -1),
5328                          };
5329            redo A;
5330          } else {
5331            ## XML5: "DOCTYPE pi state": Stay in the state.
5332            $self->{ct} = {type => PI_TOKEN,
5333                           target => chr $self->{nc},
5334                           data => '',
5335                           line => $self->{line_prev},
5336                           column => $self->{column_prev} - 1,
5337                          };
5338            $self->{state} = PI_TARGET_STATE;
5339            
5340        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5341          $self->{line_prev} = $self->{line};
5342          $self->{column_prev} = $self->{column};
5343          $self->{column}++;
5344          $self->{nc}
5345              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5346        } else {
5347          $self->{set_nc}->($self);
5348        }
5349      
5350            redo A;
5351          }
5352        } elsif ($self->{state} == PI_TARGET_STATE) {
5353          if ($is_space->{$self->{nc}}) {
5354            $self->{state} = PI_TARGET_AFTER_STATE;
5355            
5356        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5357          $self->{line_prev} = $self->{line};
5358          $self->{column_prev} = $self->{column};
5359          $self->{column}++;
5360          $self->{nc}
5361              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5362        } else {
5363          $self->{set_nc}->($self);
5364        }
5365      
5366            redo A;
5367          } elsif ($self->{nc} == -1) {
5368            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5369            if ($self->{in_subset}) {
5370              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5371            } else {
5372              $self->{state} = DATA_STATE;
5373              $self->{s_kwd} = '';
5374            }
5375            ## Reconsume.
5376            return  ($self->{ct}); # pi
5377            redo A;
5378          } elsif ($self->{nc} == 0x003F) { # ?
5379            $self->{state} = PI_AFTER_STATE;
5380            
5381        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5382          $self->{line_prev} = $self->{line};
5383          $self->{column_prev} = $self->{column};
5384          $self->{column}++;
5385          $self->{nc}
5386              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5387        } else {
5388          $self->{set_nc}->($self);
5389        }
5390      
5391            redo A;
5392          } else {
5393            ## XML5: typo ("tag name" -> "target")
5394            $self->{ct}->{target} .= chr $self->{nc}; # pi
5395            
5396        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5397          $self->{line_prev} = $self->{line};
5398          $self->{column_prev} = $self->{column};
5399          $self->{column}++;
5400          $self->{nc}
5401              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5402        } else {
5403          $self->{set_nc}->($self);
5404        }
5405      
5406            redo A;
5407          }
5408        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
5409          if ($is_space->{$self->{nc}}) {
5410            ## Stay in the state.
5411            
5412        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5413          $self->{line_prev} = $self->{line};
5414          $self->{column_prev} = $self->{column};
5415          $self->{column}++;
5416          $self->{nc}
5417              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5418        } else {
5419          $self->{set_nc}->($self);
5420        }
5421      
5422            redo A;
5423          } else {
5424            $self->{state} = PI_DATA_STATE;
5425            ## Reprocess.
5426            redo A;
5427          }
5428        } elsif ($self->{state} == PI_DATA_STATE) {
5429          if ($self->{nc} == 0x003F) { # ?
5430            $self->{state} = PI_DATA_AFTER_STATE;
5431            
5432        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5433          $self->{line_prev} = $self->{line};
5434          $self->{column_prev} = $self->{column};
5435          $self->{column}++;
5436          $self->{nc}
5437              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5438        } else {
5439          $self->{set_nc}->($self);
5440        }
5441      
5442            redo A;
5443          } elsif ($self->{nc} == -1) {
5444            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5445            if ($self->{in_subset}) {
5446              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5447            } else {
5448              $self->{state} = DATA_STATE;
5449              $self->{s_kwd} = '';
5450            }
5451            ## Reprocess.
5452            return  ($self->{ct}); # pi
5453            redo A;
5454          } else {
5455            $self->{ct}->{data} .= chr $self->{nc}; # pi
5456            $self->{read_until}->($self->{ct}->{data}, q[?],
5457                                  length $self->{ct}->{data});
5458            ## Stay in the state.
5459            
5460        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5461          $self->{line_prev} = $self->{line};
5462          $self->{column_prev} = $self->{column};
5463          $self->{column}++;
5464          $self->{nc}
5465              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5466        } else {
5467          $self->{set_nc}->($self);
5468        }
5469      
5470            ## Reprocess.
5471            redo A;
5472          }
5473        } elsif ($self->{state} == PI_AFTER_STATE) {
5474          ## XML5: Part of "Pi after state".
5475    
5476          if ($self->{nc} == 0x003E) { # >
5477            if ($self->{in_subset}) {
5478              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5479            } else {
5480              $self->{state} = DATA_STATE;
5481              $self->{s_kwd} = '';
5482            }
5483            
5484        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5485          $self->{line_prev} = $self->{line};
5486          $self->{column_prev} = $self->{column};
5487          $self->{column}++;
5488          $self->{nc}
5489              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5490        } else {
5491          $self->{set_nc}->($self);
5492        }
5493      
5494            return  ($self->{ct}); # pi
5495            redo A;
5496          } elsif ($self->{nc} == 0x003F) { # ?
5497            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5498                            line => $self->{line_prev},
5499                            column => $self->{column_prev}); ## XML5: no error
5500            $self->{ct}->{data} .= '?';
5501            $self->{state} = PI_DATA_AFTER_STATE;
5502            
5503        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5504          $self->{line_prev} = $self->{line};
5505          $self->{column_prev} = $self->{column};
5506          $self->{column}++;
5507          $self->{nc}
5508              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5509        } else {
5510          $self->{set_nc}->($self);
5511        }
5512      
5513            redo A;
5514          } else {
5515            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5516                            line => $self->{line_prev},
5517                            column => $self->{column_prev}
5518                                + 1 * ($self->{nc} == -1)); ## XML5: no error
5519            $self->{ct}->{data} .= '?'; ## XML5: not appended
5520            $self->{state} = PI_DATA_STATE;
5521            ## Reprocess.
5522            redo A;
5523          }
5524        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5525          ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5526    
5527          if ($self->{nc} == 0x003E) { # >
5528            if ($self->{in_subset}) {
5529              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5530            } else {
5531              $self->{state} = DATA_STATE;
5532              $self->{s_kwd} = '';
5533            }
5534            
5535        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5536          $self->{line_prev} = $self->{line};
5537          $self->{column_prev} = $self->{column};
5538          $self->{column}++;
5539          $self->{nc}
5540              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5541        } else {
5542          $self->{set_nc}->($self);
5543        }
5544      
5545            return  ($self->{ct}); # pi
5546            redo A;
5547          } elsif ($self->{nc} == 0x003F) { # ?
5548            $self->{ct}->{data} .= '?';
5549            ## Stay in the state.
5550            
5551        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5552          $self->{line_prev} = $self->{line};
5553          $self->{column_prev} = $self->{column};
5554          $self->{column}++;
5555          $self->{nc}
5556              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5557        } else {
5558          $self->{set_nc}->($self);
5559        }
5560      
5561            redo A;
5562          } else {
5563            $self->{ct}->{data} .= '?'; ## XML5: not appended
5564            $self->{state} = PI_DATA_STATE;
5565            ## Reprocess.
5566            redo A;
5567          }
5568    
5569        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5570          if ($self->{nc} == 0x003C) { # <
5571            $self->{state} = DOCTYPE_TAG_STATE;
5572            
5573        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5574          $self->{line_prev} = $self->{line};
5575          $self->{column_prev} = $self->{column};
5576          $self->{column}++;
5577          $self->{nc}
5578              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5579        } else {
5580          $self->{set_nc}->($self);
5581        }
5582      
5583            redo A;
5584          } elsif ($self->{nc} == 0x0025) { # %
5585            ## XML5: Not defined yet.
5586    
5587            ## TODO:
5588    
5589            if (not $self->{stop_processing} and
5590                not $self->{document}->xml_standalone) {
5591              $self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type
5592                              level => $self->{level}->{info});
5593              $self->{stop_processing} = 1;
5594            }
5595    
5596            
5597        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5598          $self->{line_prev} = $self->{line};
5599          $self->{column_prev} = $self->{column};
5600          $self->{column}++;
5601          $self->{nc}
5602              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5603        } else {
5604          $self->{set_nc}->($self);
5605        }
5606      
5607            redo A;
5608          } elsif ($self->{nc} == 0x005D) { # ]
5609            delete $self->{in_subset};
5610            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5611            
5612        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5613          $self->{line_prev} = $self->{line};
5614          $self->{column_prev} = $self->{column};
5615          $self->{column}++;
5616          $self->{nc}
5617              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5618        } else {
5619          $self->{set_nc}->($self);
5620        }
5621      
5622            redo A;
5623          } elsif ($is_space->{$self->{nc}}) {
5624            ## Stay in the state.
5625            
5626        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5627          $self->{line_prev} = $self->{line};
5628          $self->{column_prev} = $self->{column};
5629          $self->{column}++;
5630          $self->{nc}
5631              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5632        } else {
5633          $self->{set_nc}->($self);
5634        }
5635      
5636            redo A;
5637          } elsif ($self->{nc} == -1) {
5638            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5639            delete $self->{in_subset};
5640            $self->{state} = DATA_STATE;
5641            $self->{s_kwd} = '';
5642            ## Reconsume.
5643            return  ({type => END_OF_DOCTYPE_TOKEN});
5644            redo A;
5645          } else {
5646            unless ($self->{internal_subset_tainted}) {
5647              ## XML5: No parse error.
5648              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5649              $self->{internal_subset_tainted} = 1;
5650            }
5651            ## Stay in the state.
5652            
5653        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5654          $self->{line_prev} = $self->{line};
5655          $self->{column_prev} = $self->{column};
5656          $self->{column}++;
5657          $self->{nc}
5658              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5659        } else {
5660          $self->{set_nc}->($self);
5661        }
5662      
5663            redo A;
5664          }
5665        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5666          if ($self->{nc} == 0x003E) { # >
5667            $self->{state} = DATA_STATE;
5668            $self->{s_kwd} = '';
5669            
5670        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5671          $self->{line_prev} = $self->{line};
5672          $self->{column_prev} = $self->{column};
5673          $self->{column}++;
5674          $self->{nc}
5675              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5676        } else {
5677          $self->{set_nc}->($self);
5678        }
5679      
5680            return  ({type => END_OF_DOCTYPE_TOKEN});
5681            redo A;
5682          } elsif ($self->{nc} == -1) {
5683            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5684            $self->{state} = DATA_STATE;
5685            $self->{s_kwd} = '';
5686            ## Reconsume.
5687            return  ({type => END_OF_DOCTYPE_TOKEN});
5688            redo A;
5689          } else {
5690            ## XML5: No parse error and stay in the state.
5691            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5692    
5693            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5694            
5695        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5696          $self->{line_prev} = $self->{line};
5697          $self->{column_prev} = $self->{column};
5698          $self->{column}++;
5699          $self->{nc}
5700              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5701        } else {
5702          $self->{set_nc}->($self);
5703        }
5704      
5705            redo A;
5706          }
5707        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5708          if ($self->{nc} == 0x003E) { # >
5709            $self->{state} = DATA_STATE;
5710            $self->{s_kwd} = '';
5711            
5712        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5713          $self->{line_prev} = $self->{line};
5714          $self->{column_prev} = $self->{column};
5715          $self->{column}++;
5716          $self->{nc}
5717              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5718        } else {
5719          $self->{set_nc}->($self);
5720        }
5721      
5722            return  ({type => END_OF_DOCTYPE_TOKEN});
5723            redo A;
5724          } elsif ($self->{nc} == -1) {
5725            $self->{state} = DATA_STATE;
5726            $self->{s_kwd} = '';
5727            ## Reconsume.
5728            return  ({type => END_OF_DOCTYPE_TOKEN});
5729            redo A;
5730          } else {
5731            ## Stay in the state.
5732            
5733        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5734          $self->{line_prev} = $self->{line};
5735          $self->{column_prev} = $self->{column};
5736          $self->{column}++;
5737          $self->{nc}
5738              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5739        } else {
5740          $self->{set_nc}->($self);
5741        }
5742      
5743            redo A;
5744          }
5745        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5746          if ($self->{nc} == 0x0021) { # !
5747            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5748            
5749        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5750          $self->{line_prev} = $self->{line};
5751          $self->{column_prev} = $self->{column};
5752          $self->{column}++;
5753          $self->{nc}
5754              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5755        } else {
5756          $self->{set_nc}->($self);
5757        }
5758      
5759            redo A;
5760          } elsif ($self->{nc} == 0x003F) { # ?
5761            $self->{state} = PI_STATE;
5762            
5763        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5764          $self->{line_prev} = $self->{line};
5765          $self->{column_prev} = $self->{column};
5766          $self->{column}++;
5767          $self->{nc}
5768              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5769        } else {
5770          $self->{set_nc}->($self);
5771        }
5772      
5773            redo A;
5774          } elsif ($self->{nc} == -1) {
5775            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5776            $self->{state} = DATA_STATE;
5777            $self->{s_kwd} = '';
5778            ## Reconsume.
5779            redo A;
5780          } else {
5781            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5782                            line => $self->{line_prev},
5783                            column => $self->{column_prev});
5784            $self->{state} = BOGUS_COMMENT_STATE;
5785            $self->{ct} = {type => COMMENT_TOKEN,
5786                           data => '',
5787                          }; ## NOTE: Will be discarded.
5788            
5789        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5790          $self->{line_prev} = $self->{line};
5791          $self->{column_prev} = $self->{column};
5792          $self->{column}++;
5793          $self->{nc}
5794              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5795        } else {
5796          $self->{set_nc}->($self);
5797        }
5798      
5799            redo A;
5800          }
5801        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5802          ## XML5: "DOCTYPE markup declaration state".
5803          
5804          if ($self->{nc} == 0x002D) { # -
5805            $self->{state} = MD_HYPHEN_STATE;
5806            
5807        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5808          $self->{line_prev} = $self->{line};
5809          $self->{column_prev} = $self->{column};
5810          $self->{column}++;
5811          $self->{nc}
5812              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5813        } else {
5814          $self->{set_nc}->($self);
5815        }
5816      
5817            redo A;
5818          } elsif ($self->{nc} == 0x0045 or # E
5819                   $self->{nc} == 0x0065) { # e
5820            $self->{state} = MD_E_STATE;
5821            $self->{kwd} = chr $self->{nc};
5822            
5823        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5824          $self->{line_prev} = $self->{line};
5825          $self->{column_prev} = $self->{column};
5826          $self->{column}++;
5827          $self->{nc}
5828              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5829        } else {
5830          $self->{set_nc}->($self);
5831        }
5832      
5833            redo A;
5834          } elsif ($self->{nc} == 0x0041 or # A
5835                   $self->{nc} == 0x0061) { # a
5836            $self->{state} = MD_ATTLIST_STATE;
5837            $self->{kwd} = chr $self->{nc};
5838            
5839        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5840          $self->{line_prev} = $self->{line};
5841          $self->{column_prev} = $self->{column};
5842          $self->{column}++;
5843          $self->{nc}
5844              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5845        } else {
5846          $self->{set_nc}->($self);
5847        }
5848      
5849            redo A;
5850          } elsif ($self->{nc} == 0x004E or # N
5851                   $self->{nc} == 0x006E) { # n
5852            $self->{state} = MD_NOTATION_STATE;
5853            $self->{kwd} = chr $self->{nc};
5854            
5855        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5856          $self->{line_prev} = $self->{line};
5857          $self->{column_prev} = $self->{column};
5858          $self->{column}++;
5859          $self->{nc}
5860              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5861        } else {
5862          $self->{set_nc}->($self);
5863        }
5864      
5865            redo A;
5866          } else {
5867            #
5868          }
5869          
5870          ## XML5: No parse error.
5871          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5872                          line => $self->{line_prev},
5873                          column => $self->{column_prev} - 1);
5874          ## Reconsume.
5875          $self->{state} = BOGUS_COMMENT_STATE;
5876          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5877          redo A;
5878        } elsif ($self->{state} == MD_E_STATE) {
5879          if ($self->{nc} == 0x004E or # N
5880              $self->{nc} == 0x006E) { # n
5881            $self->{state} = MD_ENTITY_STATE;
5882            $self->{kwd} .= chr $self->{nc};
5883            
5884        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5885          $self->{line_prev} = $self->{line};
5886          $self->{column_prev} = $self->{column};
5887          $self->{column}++;
5888          $self->{nc}
5889              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5890        } else {
5891          $self->{set_nc}->($self);
5892        }
5893      
5894            redo A;
5895          } elsif ($self->{nc} == 0x004C or # L
5896                   $self->{nc} == 0x006C) { # l
5897            ## XML5: <!ELEMENT> not supported.
5898            $self->{state} = MD_ELEMENT_STATE;
5899            $self->{kwd} .= chr $self->{nc};
5900            
5901        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5902          $self->{line_prev} = $self->{line};
5903          $self->{column_prev} = $self->{column};
5904          $self->{column}++;
5905          $self->{nc}
5906              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5907        } else {
5908          $self->{set_nc}->($self);
5909        }
5910      
5911            redo A;
5912          } else {
5913            ## XML5: No parse error.
5914            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5915                            line => $self->{line_prev},
5916                            column => $self->{column_prev} - 2
5917                                + 1 * ($self->{nc} == -1));
5918            ## Reconsume.
5919            $self->{state} = BOGUS_COMMENT_STATE;
5920            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5921            redo A;
5922          }
5923        } elsif ($self->{state} == MD_ENTITY_STATE) {
5924          if ($self->{nc} == [
5925                undef,
5926                undef,
5927                0x0054, # T
5928                0x0049, # I
5929                0x0054, # T
5930              ]->[length $self->{kwd}] or
5931              $self->{nc} == [
5932                undef,
5933                undef,
5934                0x0074, # t
5935                0x0069, # i
5936                0x0074, # t
5937              ]->[length $self->{kwd}]) {
5938            ## Stay in the state.
5939            $self->{kwd} .= chr $self->{nc};
5940            
5941        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5942          $self->{line_prev} = $self->{line};
5943          $self->{column_prev} = $self->{column};
5944          $self->{column}++;
5945          $self->{nc}
5946              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5947        } else {
5948          $self->{set_nc}->($self);
5949        }
5950      
5951            redo A;
5952          } elsif ((length $self->{kwd}) == 5 and
5953                   ($self->{nc} == 0x0059 or # Y
5954                    $self->{nc} == 0x0079)) { # y
5955            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5956              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5957                              text => 'ENTITY',
5958                              line => $self->{line_prev},
5959                              column => $self->{column_prev} - 4);
5960            }
5961            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5962                           line => $self->{line_prev},
5963                           column => $self->{column_prev} - 6};
5964            $self->{state} = DOCTYPE_MD_STATE;
5965            
5966        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5967          $self->{line_prev} = $self->{line};
5968          $self->{column_prev} = $self->{column};
5969          $self->{column}++;
5970          $self->{nc}
5971              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5972        } else {
5973          $self->{set_nc}->($self);
5974        }
5975      
5976            redo A;
5977          } else {
5978            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5979                            line => $self->{line_prev},
5980                            column => $self->{column_prev} - 1
5981                                - (length $self->{kwd})
5982                                + 1 * ($self->{nc} == -1));
5983            $self->{state} = BOGUS_COMMENT_STATE;
5984            ## Reconsume.
5985            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5986            redo A;
5987          }
5988        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5989          if ($self->{nc} == [
5990               undef,
5991               undef,
5992               0x0045, # E
5993               0x004D, # M
5994               0x0045, # E
5995               0x004E, # N
5996              ]->[length $self->{kwd}] or
5997              $self->{nc} == [
5998               undef,
5999               undef,
6000               0x0065, # e
6001               0x006D, # m
6002               0x0065, # e
6003               0x006E, # n
6004              ]->[length $self->{kwd}]) {
6005            ## Stay in the state.
6006            $self->{kwd} .= chr $self->{nc};
6007            
6008        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6009          $self->{line_prev} = $self->{line};
6010          $self->{column_prev} = $self->{column};
6011          $self->{column}++;
6012          $self->{nc}
6013              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6014        } else {
6015          $self->{set_nc}->($self);
6016        }
6017      
6018            redo A;
6019          } elsif ((length $self->{kwd}) == 6 and
6020                   ($self->{nc} == 0x0054 or # T
6021                    $self->{nc} == 0x0074)) { # t
6022            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
6023              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6024                              text => 'ELEMENT',
6025                              line => $self->{line_prev},
6026                              column => $self->{column_prev} - 5);
6027            }
6028            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
6029                           line => $self->{line_prev},
6030                           column => $self->{column_prev} - 7};
6031            $self->{state} = DOCTYPE_MD_STATE;
6032            
6033        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6034          $self->{line_prev} = $self->{line};
6035          $self->{column_prev} = $self->{column};
6036          $self->{column}++;
6037          $self->{nc}
6038              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6039        } else {
6040          $self->{set_nc}->($self);
6041        }
6042      
6043            redo A;
6044          } else {
6045            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6046                            line => $self->{line_prev},
6047                            column => $self->{column_prev} - 1
6048                                - (length $self->{kwd})
6049                                + 1 * ($self->{nc} == -1));
6050            $self->{state} = BOGUS_COMMENT_STATE;
6051            ## Reconsume.
6052            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6053            redo A;
6054          }
6055        } elsif ($self->{state} == MD_ATTLIST_STATE) {
6056          if ($self->{nc} == [
6057               undef,
6058               0x0054, # T
6059               0x0054, # T
6060               0x004C, # L
6061               0x0049, # I
6062               0x0053, # S
6063              ]->[length $self->{kwd}] or
6064              $self->{nc} == [
6065               undef,
6066               0x0074, # t
6067               0x0074, # t
6068               0x006C, # l
6069               0x0069, # i
6070               0x0073, # s
6071              ]->[length $self->{kwd}]) {
6072            ## Stay in the state.
6073            $self->{kwd} .= chr $self->{nc};
6074            
6075        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6076          $self->{line_prev} = $self->{line};
6077          $self->{column_prev} = $self->{column};
6078          $self->{column}++;
6079          $self->{nc}
6080              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6081        } else {
6082          $self->{set_nc}->($self);
6083        }
6084      
6085            redo A;
6086          } elsif ((length $self->{kwd}) == 6 and
6087                   ($self->{nc} == 0x0054 or # T
6088                    $self->{nc} == 0x0074)) { # t
6089            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
6090              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6091                              text => 'ATTLIST',
6092                              line => $self->{line_prev},
6093                              column => $self->{column_prev} - 5);
6094            }
6095            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
6096                           attrdefs => [],
6097                           line => $self->{line_prev},
6098                           column => $self->{column_prev} - 7};
6099            $self->{state} = DOCTYPE_MD_STATE;
6100            
6101        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6102          $self->{line_prev} = $self->{line};
6103          $self->{column_prev} = $self->{column};
6104          $self->{column}++;
6105          $self->{nc}
6106              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6107        } else {
6108          $self->{set_nc}->($self);
6109        }
6110      
6111            redo A;
6112          } else {
6113            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6114                            line => $self->{line_prev},
6115                            column => $self->{column_prev} - 1
6116                                 - (length $self->{kwd})
6117                                 + 1 * ($self->{nc} == -1));
6118            $self->{state} = BOGUS_COMMENT_STATE;
6119            ## Reconsume.
6120            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6121            redo A;
6122          }
6123        } elsif ($self->{state} == MD_NOTATION_STATE) {
6124          if ($self->{nc} == [
6125               undef,
6126               0x004F, # O
6127               0x0054, # T
6128               0x0041, # A
6129               0x0054, # T
6130               0x0049, # I
6131               0x004F, # O
6132              ]->[length $self->{kwd}] or
6133              $self->{nc} == [
6134               undef,
6135               0x006F, # o
6136               0x0074, # t
6137               0x0061, # a
6138               0x0074, # t
6139               0x0069, # i
6140               0x006F, # o
6141              ]->[length $self->{kwd}]) {
6142            ## Stay in the state.
6143            $self->{kwd} .= chr $self->{nc};
6144            
6145        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6146          $self->{line_prev} = $self->{line};
6147          $self->{column_prev} = $self->{column};
6148          $self->{column}++;
6149          $self->{nc}
6150              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6151        } else {
6152          $self->{set_nc}->($self);
6153        }
6154      
6155            redo A;
6156          } elsif ((length $self->{kwd}) == 7 and
6157                   ($self->{nc} == 0x004E or # N
6158                    $self->{nc} == 0x006E)) { # n
6159            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6160              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6161                              text => 'NOTATION',
6162                              line => $self->{line_prev},
6163                              column => $self->{column_prev} - 6);
6164            }
6165            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6166                           line => $self->{line_prev},
6167                           column => $self->{column_prev} - 8};
6168            $self->{state} = DOCTYPE_MD_STATE;
6169            
6170        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6171          $self->{line_prev} = $self->{line};
6172          $self->{column_prev} = $self->{column};
6173          $self->{column}++;
6174          $self->{nc}
6175              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6176        } else {
6177          $self->{set_nc}->($self);
6178        }
6179      
6180            redo A;
6181          } else {
6182            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6183                            line => $self->{line_prev},
6184                            column => $self->{column_prev} - 1
6185                                - (length $self->{kwd})
6186                                + 1 * ($self->{nc} == -1));
6187            $self->{state} = BOGUS_COMMENT_STATE;
6188            ## Reconsume.
6189            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6190            redo A;
6191          }
6192        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6193          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6194          ## "DOCTYPE NOTATION state".
6195    
6196          if ($is_space->{$self->{nc}}) {
6197            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6198            $self->{state} = BEFORE_MD_NAME_STATE;
6199            
6200        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6201          $self->{line_prev} = $self->{line};
6202          $self->{column_prev} = $self->{column};
6203          $self->{column}++;
6204          $self->{nc}
6205              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6206        } else {
6207          $self->{set_nc}->($self);
6208        }
6209      
6210            redo A;
6211          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6212                   $self->{nc} == 0x0025) { # %
6213            ## XML5: Switch to the "DOCTYPE bogus comment state".
6214            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6215            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6216            
6217        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6218          $self->{line_prev} = $self->{line};
6219          $self->{column_prev} = $self->{column};
6220          $self->{column}++;
6221          $self->{nc}
6222              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6223        } else {
6224          $self->{set_nc}->($self);
6225        }
6226      
6227            redo A;
6228          } elsif ($self->{nc} == -1) {
6229            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6230            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6231            ## Reconsume.
6232            redo A;
6233          } elsif ($self->{nc} == 0x003E) { # >
6234            ## XML5: Switch to the "DOCTYPE bogus comment state".
6235            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6236            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6237            
6238        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6239          $self->{line_prev} = $self->{line};
6240          $self->{column_prev} = $self->{column};
6241          $self->{column}++;
6242          $self->{nc}
6243              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6244        } else {
6245          $self->{set_nc}->($self);
6246        }
6247      
6248            redo A;
6249          } else {
6250            ## XML5: Switch to the "DOCTYPE bogus comment state".
6251            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6252            $self->{state} = BEFORE_MD_NAME_STATE;
6253            redo A;
6254          }
6255        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6256          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6257          ## before state", "DOCTYPE ATTLIST name before state".
6258    
6259          if ($is_space->{$self->{nc}}) {
6260            ## Stay in the state.
6261            
6262        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6263          $self->{line_prev} = $self->{line};
6264          $self->{column_prev} = $self->{column};
6265          $self->{column}++;
6266          $self->{nc}
6267              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6268        } else {
6269          $self->{set_nc}->($self);
6270        }
6271      
6272            redo A;
6273          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6274                   $self->{nc} == 0x0025) { # %
6275            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6276            
6277        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6278          $self->{line_prev} = $self->{line};
6279          $self->{column_prev} = $self->{column};
6280          $self->{column}++;
6281          $self->{nc}
6282              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6283        } else {
6284          $self->{set_nc}->($self);
6285        }
6286      
6287            redo A;
6288          } elsif ($self->{nc} == 0x003E) { # >
6289            ## XML5: Same as "Anything else".
6290            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6291            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6292            
6293        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6294          $self->{line_prev} = $self->{line};
6295          $self->{column_prev} = $self->{column};
6296          $self->{column}++;
6297          $self->{nc}
6298              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6299        } else {
6300          $self->{set_nc}->($self);
6301        }
6302      
6303            redo A;
6304          } elsif ($self->{nc} == -1) {
6305            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6306            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6307            ## Reconsume.
6308            redo A;
6309          } else {
6310            ## XML5: [ATTLIST] Not defined yet.
6311            $self->{ct}->{name} .= chr $self->{nc};
6312            $self->{state} = MD_NAME_STATE;
6313            
6314        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6315          $self->{line_prev} = $self->{line};
6316          $self->{column_prev} = $self->{column};
6317          $self->{column}++;
6318          $self->{nc}
6319              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6320        } else {
6321          $self->{set_nc}->($self);
6322        }
6323      
6324            redo A;
6325          }
6326        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6327          if ($is_space->{$self->{nc}}) {
6328            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6329            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6330            $self->{state} = BEFORE_MD_NAME_STATE;
6331            
6332        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6333          $self->{line_prev} = $self->{line};
6334          $self->{column_prev} = $self->{column};
6335          $self->{column}++;
6336          $self->{nc}
6337              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6338        } else {
6339          $self->{set_nc}->($self);
6340        }
6341      
6342            redo A;
6343          } elsif ($self->{nc} == 0x003E) { # >
6344            ## XML5: Same as "Anything else".
6345            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6346            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6347            
6348        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6349          $self->{line_prev} = $self->{line};
6350          $self->{column_prev} = $self->{column};
6351          $self->{column}++;
6352          $self->{nc}
6353              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6354        } else {
6355          $self->{set_nc}->($self);
6356        }
6357      
6358            redo A;
6359          } elsif ($self->{nc} == -1) {
6360            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6361            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6362            ## Reconsume.
6363            redo A;
6364          } else {
6365            ## XML5: No parse error.
6366            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6367            $self->{state} = BOGUS_COMMENT_STATE;
6368            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6369            ## Reconsume.
6370            redo A;
6371          }
6372        } elsif ($self->{state} == MD_NAME_STATE) {
6373          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6374          
6375          if ($is_space->{$self->{nc}}) {
6376            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6377              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6378            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6379              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6380            } else { # ENTITY/NOTATION
6381              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6382            }
6383            
6384        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6385          $self->{line_prev} = $self->{line};
6386          $self->{column_prev} = $self->{column};
6387          $self->{column}++;
6388          $self->{nc}
6389              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6390        } else {
6391          $self->{set_nc}->($self);
6392        }
6393      
6394            redo A;
6395          } elsif ($self->{nc} == 0x003E) { # >
6396            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6397              #
6398            } else {
6399              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6400            }
6401            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6402            
6403        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6404          $self->{line_prev} = $self->{line};
6405          $self->{column_prev} = $self->{column};
6406          $self->{column}++;
6407          $self->{nc}
6408              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6409        } else {
6410          $self->{set_nc}->($self);
6411        }
6412      
6413            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6414            redo A;
6415          } elsif ($self->{nc} == -1) {
6416            ## XML5: [ATTLIST] No parse error.
6417            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6418            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6419            ## Reconsume.
6420            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6421            redo A;
6422          } else {
6423            ## XML5: [ATTLIST] Not defined yet.
6424            $self->{ct}->{name} .= chr $self->{nc};
6425            ## Stay in the state.
6426            
6427        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6428          $self->{line_prev} = $self->{line};
6429          $self->{column_prev} = $self->{column};
6430          $self->{column}++;
6431          $self->{nc}
6432              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6433        } else {
6434          $self->{set_nc}->($self);
6435        }
6436      
6437            redo A;
6438          }
6439        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6440          if ($is_space->{$self->{nc}}) {
6441            ## Stay in the state.
6442            
6443        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6444          $self->{line_prev} = $self->{line};
6445          $self->{column_prev} = $self->{column};
6446          $self->{column}++;
6447          $self->{nc}
6448              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6449        } else {
6450          $self->{set_nc}->($self);
6451        }
6452      
6453            redo A;
6454          } elsif ($self->{nc} == 0x003E) { # >
6455            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6456            
6457        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6458          $self->{line_prev} = $self->{line};
6459          $self->{column_prev} = $self->{column};
6460          $self->{column}++;
6461          $self->{nc}
6462              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6463        } else {
6464          $self->{set_nc}->($self);
6465        }
6466      
6467            return  ($self->{ct}); # ATTLIST
6468            redo A;
6469          } elsif ($self->{nc} == -1) {
6470            ## XML5: No parse error.
6471            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6472            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6473            return  ($self->{ct});
6474            redo A;
6475          } else {
6476            ## XML5: Not defined yet.
6477            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6478                           tokens => [],
6479                           line => $self->{line}, column => $self->{column}};
6480            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6481            
6482        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6483          $self->{line_prev} = $self->{line};
6484          $self->{column_prev} = $self->{column};
6485          $self->{column}++;
6486          $self->{nc}
6487              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6488        } else {
6489          $self->{set_nc}->($self);
6490        }
6491      
6492            redo A;
6493          }
6494        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6495          if ($is_space->{$self->{nc}}) {
6496            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6497            
6498        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6499          $self->{line_prev} = $self->{line};
6500          $self->{column_prev} = $self->{column};
6501          $self->{column}++;
6502          $self->{nc}
6503              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6504        } else {
6505          $self->{set_nc}->($self);
6506        }
6507      
6508            redo A;
6509          } elsif ($self->{nc} == 0x003E) { # >
6510            ## XML5: Same as "anything else".
6511            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6512            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6513            
6514        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6515          $self->{line_prev} = $self->{line};
6516          $self->{column_prev} = $self->{column};
6517          $self->{column}++;
6518          $self->{nc}
6519              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6520        } else {
6521          $self->{set_nc}->($self);
6522        }
6523      
6524            return  ($self->{ct}); # ATTLIST
6525            redo A;
6526          } elsif ($self->{nc} == 0x0028) { # (
6527            ## XML5: Same as "anything else".
6528            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6529            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6530            
6531        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6532          $self->{line_prev} = $self->{line};
6533          $self->{column_prev} = $self->{column};
6534          $self->{column}++;
6535          $self->{nc}
6536              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6537        } else {
6538          $self->{set_nc}->($self);
6539        }
6540      
6541            redo A;
6542          } elsif ($self->{nc} == -1) {
6543            ## XML5: No parse error.
6544            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6545            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6546            
6547        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6548          $self->{line_prev} = $self->{line};
6549          $self->{column_prev} = $self->{column};
6550          $self->{column}++;
6551          $self->{nc}
6552              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6553        } else {
6554          $self->{set_nc}->($self);
6555        }
6556      
6557            return  ($self->{ct}); # ATTLIST
6558            redo A;
6559          } else {
6560            ## XML5: Not defined yet.
6561            $self->{ca}->{name} .= chr $self->{nc};
6562            ## Stay in the state.
6563            
6564        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6565          $self->{line_prev} = $self->{line};
6566          $self->{column_prev} = $self->{column};
6567          $self->{column}++;
6568          $self->{nc}
6569              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6570        } else {
6571          $self->{set_nc}->($self);
6572        }
6573      
6574            redo A;
6575          }
6576        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6577          if ($is_space->{$self->{nc}}) {
6578            ## Stay in the state.
6579            
6580        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6581          $self->{line_prev} = $self->{line};
6582          $self->{column_prev} = $self->{column};
6583          $self->{column}++;
6584          $self->{nc}
6585              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6586        } else {
6587          $self->{set_nc}->($self);
6588        }
6589      
6590            redo A;
6591          } elsif ($self->{nc} == 0x003E) { # >
6592            ## XML5: Same as "anything else".
6593            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6594            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6595            
6596        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6597          $self->{line_prev} = $self->{line};
6598          $self->{column_prev} = $self->{column};
6599          $self->{column}++;
6600          $self->{nc}
6601              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6602        } else {
6603          $self->{set_nc}->($self);
6604        }
6605      
6606            return  ($self->{ct}); # ATTLIST
6607            redo A;
6608          } elsif ($self->{nc} == 0x0028) { # (
6609            ## XML5: Same as "anything else".
6610            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6611            
6612        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6613          $self->{line_prev} = $self->{line};
6614          $self->{column_prev} = $self->{column};
6615          $self->{column}++;
6616          $self->{nc}
6617              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6618        } else {
6619          $self->{set_nc}->($self);
6620        }
6621      
6622            redo A;
6623          } elsif ($self->{nc} == -1) {
6624            ## XML5: No parse error.
6625            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6626            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6627            
6628        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6629          $self->{line_prev} = $self->{line};
6630          $self->{column_prev} = $self->{column};
6631          $self->{column}++;
6632          $self->{nc}
6633              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6634        } else {
6635          $self->{set_nc}->($self);
6636        }
6637      
6638            return  ($self->{ct});
6639            redo A;
6640          } else {
6641            ## XML5: Not defined yet.
6642            $self->{ca}->{type} = chr $self->{nc};
6643            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6644            
6645        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6646          $self->{line_prev} = $self->{line};
6647          $self->{column_prev} = $self->{column};
6648          $self->{column}++;
6649          $self->{nc}
6650              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6651        } else {
6652          $self->{set_nc}->($self);
6653        }
6654      
6655            redo A;
6656          }
6657        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6658          if ($is_space->{$self->{nc}}) {
6659            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6660            
6661        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6662          $self->{line_prev} = $self->{line};
6663          $self->{column_prev} = $self->{column};
6664          $self->{column}++;
6665          $self->{nc}
6666              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6667        } else {
6668          $self->{set_nc}->($self);
6669        }
6670      
6671            redo A;
6672          } elsif ($self->{nc} == 0x0023) { # #
6673            ## XML5: Same as "anything else".
6674            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6675            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6676            
6677        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6678          $self->{line_prev} = $self->{line};
6679          $self->{column_prev} = $self->{column};
6680          $self->{column}++;
6681          $self->{nc}
6682              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6683        } else {
6684          $self->{set_nc}->($self);
6685        }
6686      
6687            redo A;
6688          } elsif ($self->{nc} == 0x0022) { # "
6689            ## XML5: Same as "anything else".
6690            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6691            $self->{ca}->{value} = '';
6692            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6693            
6694        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6695          $self->{line_prev} = $self->{line};
6696          $self->{column_prev} = $self->{column};
6697          $self->{column}++;
6698          $self->{nc}
6699              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6700        } else {
6701          $self->{set_nc}->($self);
6702        }
6703      
6704            redo A;
6705          } elsif ($self->{nc} == 0x0027) { # '
6706            ## XML5: Same as "anything else".
6707            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6708            $self->{ca}->{value} = '';
6709            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6710            
6711        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6712          $self->{line_prev} = $self->{line};
6713          $self->{column_prev} = $self->{column};
6714          $self->{column}++;
6715          $self->{nc}
6716              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6717        } else {
6718          $self->{set_nc}->($self);
6719        }
6720      
6721            redo A;
6722          } elsif ($self->{nc} == 0x003E) { # >
6723            ## XML5: Same as "anything else".
6724            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6725            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6726            
6727        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6728          $self->{line_prev} = $self->{line};
6729          $self->{column_prev} = $self->{column};
6730          $self->{column}++;
6731          $self->{nc}
6732              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6733        } else {
6734          $self->{set_nc}->($self);
6735        }
6736      
6737            return  ($self->{ct}); # ATTLIST
6738            redo A;
6739          } elsif ($self->{nc} == 0x0028) { # (
6740            ## XML5: Same as "anything else".
6741            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6742            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6743            
6744        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6745          $self->{line_prev} = $self->{line};
6746          $self->{column_prev} = $self->{column};
6747          $self->{column}++;
6748          $self->{nc}
6749              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6750        } else {
6751          $self->{set_nc}->($self);
6752        }
6753      
6754            redo A;
6755          } elsif ($self->{nc} == -1) {
6756            ## XML5: No parse error.
6757            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6758            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6759            
6760        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6761          $self->{line_prev} = $self->{line};
6762          $self->{column_prev} = $self->{column};
6763          $self->{column}++;
6764          $self->{nc}
6765              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6766        } else {
6767          $self->{set_nc}->($self);
6768        }
6769      
6770            return  ($self->{ct});
6771            redo A;
6772          } else {
6773            ## XML5: Not defined yet.
6774            $self->{ca}->{type} .= chr $self->{nc};
6775            ## Stay in the state.
6776            
6777        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6778          $self->{line_prev} = $self->{line};
6779          $self->{column_prev} = $self->{column};
6780          $self->{column}++;
6781          $self->{nc}
6782              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6783        } else {
6784          $self->{set_nc}->($self);
6785        }
6786      
6787            redo A;
6788          }
6789        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6790          if ($is_space->{$self->{nc}}) {
6791            ## Stay in the state.
6792            
6793        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6794          $self->{line_prev} = $self->{line};
6795          $self->{column_prev} = $self->{column};
6796          $self->{column}++;
6797          $self->{nc}
6798              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6799        } else {
6800          $self->{set_nc}->($self);
6801        }
6802      
6803            redo A;
6804          } elsif ($self->{nc} == 0x0028) { # (
6805            ## XML5: Same as "anything else".
6806            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6807            
6808        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6809          $self->{line_prev} = $self->{line};
6810          $self->{column_prev} = $self->{column};
6811          $self->{column}++;
6812          $self->{nc}
6813              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6814        } else {
6815          $self->{set_nc}->($self);
6816        }
6817      
6818            redo A;
6819          } elsif ($self->{nc} == 0x0023) { # #
6820            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6821            
6822        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6823          $self->{line_prev} = $self->{line};
6824          $self->{column_prev} = $self->{column};
6825          $self->{column}++;
6826          $self->{nc}
6827              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6828        } else {
6829          $self->{set_nc}->($self);
6830        }
6831      
6832            redo A;
6833          } elsif ($self->{nc} == 0x0022) { # "
6834            ## XML5: Same as "anything else".
6835            $self->{ca}->{value} = '';
6836            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6837            
6838        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6839          $self->{line_prev} = $self->{line};
6840          $self->{column_prev} = $self->{column};
6841          $self->{column}++;
6842          $self->{nc}
6843              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6844        } else {
6845          $self->{set_nc}->($self);
6846        }
6847      
6848            redo A;
6849          } elsif ($self->{nc} == 0x0027) { # '
6850            ## XML5: Same as "anything else".
6851            $self->{ca}->{value} = '';
6852            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6853            
6854        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6855          $self->{line_prev} = $self->{line};
6856          $self->{column_prev} = $self->{column};
6857          $self->{column}++;
6858          $self->{nc}
6859              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6860        } else {
6861          $self->{set_nc}->($self);
6862        }
6863      
6864            redo A;
6865          } elsif ($self->{nc} == 0x003E) { # >
6866            ## XML5: Same as "anything else".
6867            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6868            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6869            
6870        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6871          $self->{line_prev} = $self->{line};
6872          $self->{column_prev} = $self->{column};
6873          $self->{column}++;
6874          $self->{nc}
6875              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6876        } else {
6877          $self->{set_nc}->($self);
6878        }
6879      
6880            return  ($self->{ct}); # ATTLIST
6881            redo A;
6882          } elsif ($self->{nc} == -1) {
6883            ## XML5: No parse error.
6884            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6885            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6886            
6887        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6888          $self->{line_prev} = $self->{line};
6889          $self->{column_prev} = $self->{column};
6890          $self->{column}++;
6891          $self->{nc}
6892              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6893        } else {
6894          $self->{set_nc}->($self);
6895        }
6896      
6897            return  ($self->{ct});
6898            redo A;
6899          } else {
6900            ## XML5: Switch to the "DOCTYPE bogus comment state".
6901            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6902            $self->{ca}->{value} = '';
6903            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6904            ## Reconsume.
6905            redo A;
6906          }
6907        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6908          if ($is_space->{$self->{nc}}) {
6909            ## Stay in the state.
6910            
6911        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6912          $self->{line_prev} = $self->{line};
6913          $self->{column_prev} = $self->{column};
6914          $self->{column}++;
6915          $self->{nc}
6916              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6917        } else {
6918          $self->{set_nc}->($self);
6919        }
6920      
6921            redo A;
6922          } elsif ($self->{nc} == 0x007C) { # |
6923            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6924            ## Stay in the state.
6925            
6926        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6927          $self->{line_prev} = $self->{line};
6928          $self->{column_prev} = $self->{column};
6929          $self->{column}++;
6930          $self->{nc}
6931              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6932        } else {
6933          $self->{set_nc}->($self);
6934        }
6935      
6936            redo A;
6937          } elsif ($self->{nc} == 0x0029) { # )
6938            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6939            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6940            
6941        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6942          $self->{line_prev} = $self->{line};
6943          $self->{column_prev} = $self->{column};
6944          $self->{column}++;
6945          $self->{nc}
6946              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6947        } else {
6948          $self->{set_nc}->($self);
6949        }
6950      
6951            redo A;
6952          } elsif ($self->{nc} == 0x003E) { # >
6953            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6954            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6955            
6956        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6957          $self->{line_prev} = $self->{line};
6958          $self->{column_prev} = $self->{column};
6959          $self->{column}++;
6960          $self->{nc}
6961              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6962        } else {
6963          $self->{set_nc}->($self);
6964        }
6965      
6966            return  ($self->{ct}); # ATTLIST
6967            redo A;
6968          } elsif ($self->{nc} == -1) {
6969            ## XML5: No parse error.
6970            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6971            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6972            
6973        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6974          $self->{line_prev} = $self->{line};
6975          $self->{column_prev} = $self->{column};
6976          $self->{column}++;
6977          $self->{nc}
6978              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6979        } else {
6980          $self->{set_nc}->($self);
6981        }
6982      
6983            return  ($self->{ct});
6984            redo A;
6985          } else {
6986            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6987            $self->{state} = ALLOWED_TOKEN_STATE;
6988            
6989        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6990          $self->{line_prev} = $self->{line};
6991          $self->{column_prev} = $self->{column};
6992          $self->{column}++;
6993          $self->{nc}
6994              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6995        } else {
6996          $self->{set_nc}->($self);
6997        }
6998      
6999            redo A;
7000          }
7001        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
7002          if ($is_space->{$self->{nc}}) {
7003            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
7004            
7005        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7006          $self->{line_prev} = $self->{line};
7007          $self->{column_prev} = $self->{column};
7008          $self->{column}++;
7009          $self->{nc}
7010              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7011        } else {
7012          $self->{set_nc}->($self);
7013        }
7014      
7015            redo A;
7016          } elsif ($self->{nc} == 0x007C) { # |
7017            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
7018            
7019        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7020          $self->{line_prev} = $self->{line};
7021          $self->{column_prev} = $self->{column};
7022          $self->{column}++;
7023          $self->{nc}
7024              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7025        } else {
7026          $self->{set_nc}->($self);
7027        }
7028      
7029            redo A;
7030          } elsif ($self->{nc} == 0x0029) { # )
7031            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7032            
7033        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7034          $self->{line_prev} = $self->{line};
7035          $self->{column_prev} = $self->{column};
7036          $self->{column}++;
7037          $self->{nc}
7038              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7039        } else {
7040          $self->{set_nc}->($self);
7041        }
7042      
7043            redo A;
7044          } elsif ($self->{nc} == 0x003E) { # >
7045            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7046            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7047            
7048        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7049          $self->{line_prev} = $self->{line};
7050          $self->{column_prev} = $self->{column};
7051          $self->{column}++;
7052          $self->{nc}
7053              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7054        } else {
7055          $self->{set_nc}->($self);
7056        }
7057      
7058            return  ($self->{ct}); # ATTLIST
7059            redo A;
7060          } elsif ($self->{nc} == -1) {
7061            ## XML5: No parse error.
7062            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7063            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7064            
7065        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7066          $self->{line_prev} = $self->{line};
7067          $self->{column_prev} = $self->{column};
7068          $self->{column}++;
7069          $self->{nc}
7070              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7071        } else {
7072          $self->{set_nc}->($self);
7073        }
7074      
7075            return  ($self->{ct});
7076            redo A;
7077          } else {
7078            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
7079            ## Stay in the state.
7080            
7081        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7082          $self->{line_prev} = $self->{line};
7083          $self->{column_prev} = $self->{column};
7084          $self->{column}++;
7085          $self->{nc}
7086              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7087        } else {
7088          $self->{set_nc}->($self);
7089        }
7090      
7091            redo A;
7092          }
7093        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
7094          if ($is_space->{$self->{nc}}) {
7095            ## Stay in the state.
7096            
7097        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7098          $self->{line_prev} = $self->{line};
7099          $self->{column_prev} = $self->{column};
7100          $self->{column}++;
7101          $self->{nc}
7102              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7103        } else {
7104          $self->{set_nc}->($self);
7105        }
7106      
7107            redo A;
7108          } elsif ($self->{nc} == 0x007C) { # |
7109            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
7110            
7111        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7112          $self->{line_prev} = $self->{line};
7113          $self->{column_prev} = $self->{column};
7114          $self->{column}++;
7115          $self->{nc}
7116              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7117        } else {
7118          $self->{set_nc}->($self);
7119        }
7120      
7121            redo A;
7122          } elsif ($self->{nc} == 0x0029) { # )
7123            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7124            
7125        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7126          $self->{line_prev} = $self->{line};
7127          $self->{column_prev} = $self->{column};
7128          $self->{column}++;
7129          $self->{nc}
7130              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7131        } else {
7132          $self->{set_nc}->($self);
7133        }
7134      
7135            redo A;
7136          } elsif ($self->{nc} == 0x003E) { # >
7137            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7138            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7139            
7140        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7141          $self->{line_prev} = $self->{line};
7142          $self->{column_prev} = $self->{column};
7143          $self->{column}++;
7144          $self->{nc}
7145              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7146        } else {
7147          $self->{set_nc}->($self);
7148        }
7149      
7150            return  ($self->{ct}); # ATTLIST
7151            redo A;
7152          } elsif ($self->{nc} == -1) {
7153            ## XML5: No parse error.
7154            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7155            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7156            
7157        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7158          $self->{line_prev} = $self->{line};
7159          $self->{column_prev} = $self->{column};
7160          $self->{column}++;
7161          $self->{nc}
7162              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7163        } else {
7164          $self->{set_nc}->($self);
7165        }
7166      
7167            return  ($self->{ct});
7168            redo A;
7169          } else {
7170            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7171                            line => $self->{line_prev},
7172                            column => $self->{column_prev});
7173            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7174            $self->{state} = ALLOWED_TOKEN_STATE;
7175            
7176        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7177          $self->{line_prev} = $self->{line};
7178          $self->{column_prev} = $self->{column};
7179          $self->{column}++;
7180          $self->{nc}
7181              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7182        } else {
7183          $self->{set_nc}->($self);
7184        }
7185      
7186            redo A;
7187          }
7188        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7189          if ($is_space->{$self->{nc}}) {
7190            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7191            
7192        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7193          $self->{line_prev} = $self->{line};
7194          $self->{column_prev} = $self->{column};
7195          $self->{column}++;
7196          $self->{nc}
7197              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7198        } else {
7199          $self->{set_nc}->($self);
7200        }
7201      
7202            redo A;
7203          } elsif ($self->{nc} == 0x0023) { # #
7204            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7205            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7206            
7207        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7208          $self->{line_prev} = $self->{line};
7209          $self->{column_prev} = $self->{column};
7210          $self->{column}++;
7211          $self->{nc}
7212              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7213        } else {
7214          $self->{set_nc}->($self);
7215        }
7216      
7217            redo A;
7218          } elsif ($self->{nc} == 0x0022) { # "
7219            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7220            $self->{ca}->{value} = '';
7221            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7222            
7223        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7224          $self->{line_prev} = $self->{line};
7225          $self->{column_prev} = $self->{column};
7226          $self->{column}++;
7227          $self->{nc}
7228              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7229        } else {
7230          $self->{set_nc}->($self);
7231        }
7232      
7233            redo A;
7234          } elsif ($self->{nc} == 0x0027) { # '
7235            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7236            $self->{ca}->{value} = '';
7237            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7238            
7239        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7240          $self->{line_prev} = $self->{line};
7241          $self->{column_prev} = $self->{column};
7242          $self->{column}++;
7243          $self->{nc}
7244              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7245        } else {
7246          $self->{set_nc}->($self);
7247        }
7248      
7249            redo A;
7250          } elsif ($self->{nc} == 0x003E) { # >
7251            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7252            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7253            
7254        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7255          $self->{line_prev} = $self->{line};
7256          $self->{column_prev} = $self->{column};
7257          $self->{column}++;
7258          $self->{nc}
7259              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7260        } else {
7261          $self->{set_nc}->($self);
7262        }
7263      
7264            return  ($self->{ct}); # ATTLIST
7265            redo A;
7266          } elsif ($self->{nc} == -1) {
7267            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7268            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7269            
7270        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7271          $self->{line_prev} = $self->{line};
7272          $self->{column_prev} = $self->{column};
7273          $self->{column}++;
7274          $self->{nc}
7275              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7276        } else {
7277          $self->{set_nc}->($self);
7278        }
7279      
7280            return  ($self->{ct});
7281            redo A;
7282          } else {
7283            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7284            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7285            ## Reconsume.
7286            redo A;
7287          }
7288        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7289          if ($is_space->{$self->{nc}}) {
7290            ## Stay in the state.
7291            
7292        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7293          $self->{line_prev} = $self->{line};
7294          $self->{column_prev} = $self->{column};
7295          $self->{column}++;
7296          $self->{nc}
7297              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7298        } else {
7299          $self->{set_nc}->($self);
7300        }
7301      
7302            redo A;
7303          } elsif ($self->{nc} == 0x0023) { # #
7304            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7305            
7306        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7307          $self->{line_prev} = $self->{line};
7308          $self->{column_prev} = $self->{column};
7309          $self->{column}++;
7310          $self->{nc}
7311              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7312        } else {
7313          $self->{set_nc}->($self);
7314        }
7315      
7316            redo A;
7317          } elsif ($self->{nc} == 0x0022) { # "
7318            $self->{ca}->{value} = '';
7319            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7320            
7321        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7322          $self->{line_prev} = $self->{line};
7323          $self->{column_prev} = $self->{column};
7324          $self->{column}++;
7325          $self->{nc}
7326              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7327        } else {
7328          $self->{set_nc}->($self);
7329        }
7330      
7331            redo A;
7332          } elsif ($self->{nc} == 0x0027) { # '
7333            $self->{ca}->{value} = '';
7334            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7335            
7336        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7337          $self->{line_prev} = $self->{line};
7338          $self->{column_prev} = $self->{column};
7339          $self->{column}++;
7340          $self->{nc}
7341              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7342        } else {
7343          $self->{set_nc}->($self);
7344        }
7345      
7346            redo A;
7347          } elsif ($self->{nc} == 0x003E) { # >
7348            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7349            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7350            
7351        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7352          $self->{line_prev} = $self->{line};
7353          $self->{column_prev} = $self->{column};
7354          $self->{column}++;
7355          $self->{nc}
7356              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7357        } else {
7358          $self->{set_nc}->($self);
7359        }
7360      
7361            return  ($self->{ct}); # ATTLIST
7362            redo A;
7363          } elsif ($self->{nc} == -1) {
7364            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7365            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7366            
7367        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7368          $self->{line_prev} = $self->{line};
7369          $self->{column_prev} = $self->{column};
7370          $self->{column}++;
7371          $self->{nc}
7372              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7373        } else {
7374          $self->{set_nc}->($self);
7375        }
7376      
7377            return  ($self->{ct});
7378            redo A;
7379          } else {
7380            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7381            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7382            ## Reconsume.
7383            redo A;
7384          }
7385        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7386          if ($is_space->{$self->{nc}}) {
7387            ## XML5: No parse error.
7388            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7389            $self->{state} = BOGUS_MD_STATE;
7390            ## Reconsume.
7391            redo A;
7392          } elsif ($self->{nc} == 0x0022) { # "
7393            ## XML5: Same as "anything else".
7394            $self->{ca}->{value} = '';
7395            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7396            
7397        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7398          $self->{line_prev} = $self->{line};
7399          $self->{column_prev} = $self->{column};
7400          $self->{column}++;
7401          $self->{nc}
7402              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7403        } else {
7404          $self->{set_nc}->($self);
7405        }
7406      
7407            redo A;
7408          } elsif ($self->{nc} == 0x0027) { # '
7409            ## XML5: Same as "anything else".
7410            $self->{ca}->{value} = '';
7411            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7412            
7413        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7414          $self->{line_prev} = $self->{line};
7415          $self->{column_prev} = $self->{column};
7416          $self->{column}++;
7417          $self->{nc}
7418              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7419        } else {
7420          $self->{set_nc}->($self);
7421        }
7422      
7423            redo A;
7424          } elsif ($self->{nc} == 0x003E) { # >
7425            ## XML5: Same as "anything else".
7426            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7427            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7428            
7429        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7430          $self->{line_prev} = $self->{line};
7431          $self->{column_prev} = $self->{column};
7432          $self->{column}++;
7433          $self->{nc}
7434              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7435        } else {
7436          $self->{set_nc}->($self);
7437        }
7438      
7439            return  ($self->{ct}); # ATTLIST
7440            redo A;
7441          } elsif ($self->{nc} == -1) {
7442            ## XML5: No parse error.
7443            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7444            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7445            
7446        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7447          $self->{line_prev} = $self->{line};
7448          $self->{column_prev} = $self->{column};
7449          $self->{column}++;
7450          $self->{nc}
7451              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7452        } else {
7453          $self->{set_nc}->($self);
7454        }
7455      
7456            return  ($self->{ct});
7457            redo A;
7458          } else {
7459            $self->{ca}->{default} = chr $self->{nc};
7460            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7461            
7462        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7463          $self->{line_prev} = $self->{line};
7464          $self->{column_prev} = $self->{column};
7465          $self->{column}++;
7466          $self->{nc}
7467              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7468        } else {
7469          $self->{set_nc}->($self);
7470        }
7471      
7472            redo A;
7473          }
7474        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7475          if ($is_space->{$self->{nc}}) {
7476            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7477            
7478        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7479          $self->{line_prev} = $self->{line};
7480          $self->{column_prev} = $self->{column};
7481          $self->{column}++;
7482          $self->{nc}
7483              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7484        } else {
7485          $self->{set_nc}->($self);
7486        }
7487      
7488            redo A;
7489          } elsif ($self->{nc} == 0x0022) { # "
7490            ## XML5: Same as "anything else".
7491            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7492            $self->{ca}->{value} = '';
7493            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7494            
7495        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7496          $self->{line_prev} = $self->{line};
7497          $self->{column_prev} = $self->{column};
7498          $self->{column}++;
7499          $self->{nc}
7500              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7501        } else {
7502          $self->{set_nc}->($self);
7503        }
7504      
7505            redo A;
7506          } elsif ($self->{nc} == 0x0027) { # '
7507            ## XML5: Same as "anything else".
7508            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7509            $self->{ca}->{value} = '';
7510            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7511            
7512        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7513          $self->{line_prev} = $self->{line};
7514          $self->{column_prev} = $self->{column};
7515          $self->{column}++;
7516          $self->{nc}
7517              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7518        } else {
7519          $self->{set_nc}->($self);
7520        }
7521      
7522            redo A;
7523          } elsif ($self->{nc} == 0x003E) { # >
7524            ## XML5: Same as "anything else".
7525            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7526            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7527            
7528        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7529          $self->{line_prev} = $self->{line};
7530          $self->{column_prev} = $self->{column};
7531          $self->{column}++;
7532          $self->{nc}
7533              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7534        } else {
7535          $self->{set_nc}->($self);
7536        }
7537      
7538            return  ($self->{ct}); # ATTLIST
7539            redo A;
7540          } elsif ($self->{nc} == -1) {
7541            ## XML5: No parse error.
7542            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7543            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7544            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7545            
7546        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7547          $self->{line_prev} = $self->{line};
7548          $self->{column_prev} = $self->{column};
7549          $self->{column}++;
7550          $self->{nc}
7551              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7552        } else {
7553          $self->{set_nc}->($self);
7554        }
7555      
7556            return  ($self->{ct});
7557            redo A;
7558          } else {
7559            $self->{ca}->{default} .= chr $self->{nc};
7560            ## Stay in the state.
7561            
7562        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7563          $self->{line_prev} = $self->{line};
7564          $self->{column_prev} = $self->{column};
7565          $self->{column}++;
7566          $self->{nc}
7567              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7568        } else {
7569          $self->{set_nc}->($self);
7570        }
7571      
7572            redo A;
7573          }
7574        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7575          if ($is_space->{$self->{nc}}) {
7576            ## Stay in the state.
7577            
7578        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7579          $self->{line_prev} = $self->{line};
7580          $self->{column_prev} = $self->{column};
7581          $self->{column}++;
7582          $self->{nc}
7583              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7584        } else {
7585          $self->{set_nc}->($self);
7586        }
7587      
7588            redo A;
7589          } elsif ($self->{nc} == 0x0022) { # "
7590            $self->{ca}->{value} = '';
7591            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7592            
7593        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7594          $self->{line_prev} = $self->{line};
7595          $self->{column_prev} = $self->{column};
7596          $self->{column}++;
7597          $self->{nc}
7598              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7599        } else {
7600          $self->{set_nc}->($self);
7601        }
7602      
7603            redo A;
7604          } elsif ($self->{nc} == 0x0027) { # '
7605            $self->{ca}->{value} = '';
7606            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7607            
7608        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7609          $self->{line_prev} = $self->{line};
7610          $self->{column_prev} = $self->{column};
7611          $self->{column}++;
7612          $self->{nc}
7613              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7614        } else {
7615          $self->{set_nc}->($self);
7616        }
7617      
7618            redo A;
7619          } elsif ($self->{nc} == 0x003E) { # >
7620            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7621            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7622            
7623        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7624          $self->{line_prev} = $self->{line};
7625          $self->{column_prev} = $self->{column};
7626          $self->{column}++;
7627          $self->{nc}
7628              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7629        } else {
7630          $self->{set_nc}->($self);
7631        }
7632      
7633            return  ($self->{ct}); # ATTLIST
7634            redo A;
7635          } elsif ($self->{nc} == -1) {
7636            ## XML5: No parse error.
7637            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7638            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7639            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7640            
7641        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7642          $self->{line_prev} = $self->{line};
7643          $self->{column_prev} = $self->{column};
7644          $self->{column}++;
7645          $self->{nc}
7646              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7647        } else {
7648          $self->{set_nc}->($self);
7649        }
7650      
7651            return  ($self->{ct});
7652            redo A;
7653          } else {
7654            ## XML5: Not defined yet.
7655            if ($self->{ca}->{default} eq 'FIXED') {
7656              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7657            } else {
7658              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7659              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7660            }
7661            ## Reconsume.
7662            redo A;
7663          }
7664        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7665          if ($is_space->{$self->{nc}} or
7666              $self->{nc} == -1 or
7667              $self->{nc} == 0x003E) { # >
7668            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7669            ## Reconsume.
7670            redo A;
7671          } else {
7672            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7673            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7674            ## Reconsume.
7675            redo A;
7676          }
7677        } elsif ($self->{state} == NDATA_STATE) {
7678          ## ASCII case-insensitive
7679          if ($self->{nc} == [
7680                undef,
7681                0x0044, # D
7682                0x0041, # A
7683                0x0054, # T
7684              ]->[length $self->{kwd}] or
7685              $self->{nc} == [
7686                undef,
7687                0x0064, # d
7688                0x0061, # a
7689                0x0074, # t
7690              ]->[length $self->{kwd}]) {
7691            
7692            ## Stay in the state.
7693            $self->{kwd} .= chr $self->{nc};
7694            
7695        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7696          $self->{line_prev} = $self->{line};
7697          $self->{column_prev} = $self->{column};
7698          $self->{column}++;
7699          $self->{nc}
7700              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7701        } else {
7702          $self->{set_nc}->($self);
7703        }
7704      
7705            redo A;
7706          } elsif ((length $self->{kwd}) == 4 and
7707                   ($self->{nc} == 0x0041 or # A
7708                    $self->{nc} == 0x0061)) { # a
7709            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7710              
7711              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7712                              text => 'NDATA',
7713                              line => $self->{line_prev},
7714                              column => $self->{column_prev} - 4);
7715            } else {
7716              
7717            }
7718            $self->{state} = AFTER_NDATA_STATE;
7719            
7720        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7721          $self->{line_prev} = $self->{line};
7722          $self->{column_prev} = $self->{column};
7723          $self->{column}++;
7724          $self->{nc}
7725              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7726        } else {
7727          $self->{set_nc}->($self);
7728        }
7729      
7730            redo A;
7731          } else {
7732            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7733                            line => $self->{line_prev},
7734                            column => $self->{column_prev} + 1
7735                                - length $self->{kwd});
7736            
7737            $self->{state} = BOGUS_MD_STATE;
7738            ## Reconsume.
7739            redo A;
7740          }
7741        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7742          if ($is_space->{$self->{nc}}) {
7743            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7744            
7745        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7746          $self->{line_prev} = $self->{line};
7747          $self->{column_prev} = $self->{column};
7748          $self->{column}++;
7749          $self->{nc}
7750              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7751        } else {
7752          $self->{set_nc}->($self);
7753        }
7754      
7755            redo A;
7756          } elsif ($self->{nc} == 0x003E) { # >
7757            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7758            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7759            
7760        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7761          $self->{line_prev} = $self->{line};
7762          $self->{column_prev} = $self->{column};
7763          $self->{column}++;
7764          $self->{nc}
7765              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7766        } else {
7767          $self->{set_nc}->($self);
7768        }
7769      
7770            return  ($self->{ct}); # ENTITY
7771            redo A;
7772          } elsif ($self->{nc} == -1) {
7773            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7774            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7775            
7776        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7777          $self->{line_prev} = $self->{line};
7778          $self->{column_prev} = $self->{column};
7779          $self->{column}++;
7780          $self->{nc}
7781              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7782        } else {
7783          $self->{set_nc}->($self);
7784        }
7785      
7786            return  ($self->{ct}); # ENTITY
7787            redo A;
7788          } else {
7789            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7790                            line => $self->{line_prev},
7791                            column => $self->{column_prev} + 1
7792                                - length $self->{kwd});
7793            $self->{state} = BOGUS_MD_STATE;
7794            ## Reconsume.
7795            redo A;
7796          }
7797        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7798          if ($is_space->{$self->{nc}}) {
7799            ## Stay in the state.
7800            
7801        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7802          $self->{line_prev} = $self->{line};
7803          $self->{column_prev} = $self->{column};
7804          $self->{column}++;
7805          $self->{nc}
7806              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7807        } else {
7808          $self->{set_nc}->($self);
7809        }
7810      
7811            redo A;
7812          } elsif ($self->{nc} == 0x003E) { # >
7813            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7814            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7815            
7816        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7817          $self->{line_prev} = $self->{line};
7818          $self->{column_prev} = $self->{column};
7819          $self->{column}++;
7820          $self->{nc}
7821              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7822        } else {
7823          $self->{set_nc}->($self);
7824        }
7825      
7826            return  ($self->{ct}); # ENTITY
7827            redo A;
7828          } elsif ($self->{nc} == -1) {
7829            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7830            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7831            
7832        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7833          $self->{line_prev} = $self->{line};
7834          $self->{column_prev} = $self->{column};
7835          $self->{column}++;
7836          $self->{nc}
7837              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7838        } else {
7839          $self->{set_nc}->($self);
7840        }
7841      
7842            return  ($self->{ct}); # ENTITY
7843            redo A;
7844          } else {
7845            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7846            $self->{state} = NOTATION_NAME_STATE;
7847            
7848        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7849          $self->{line_prev} = $self->{line};
7850          $self->{column_prev} = $self->{column};
7851          $self->{column}++;
7852          $self->{nc}
7853              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7854        } else {
7855          $self->{set_nc}->($self);
7856        }
7857      
7858            redo A;
7859          }
7860        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7861          if ($is_space->{$self->{nc}}) {
7862            $self->{state} = AFTER_MD_DEF_STATE;
7863            
7864        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7865          $self->{line_prev} = $self->{line};
7866          $self->{column_prev} = $self->{column};
7867          $self->{column}++;
7868          $self->{nc}
7869              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7870        } else {
7871          $self->{set_nc}->($self);
7872        }
7873      
7874            redo A;
7875          } elsif ($self->{nc} == 0x003E) { # >
7876            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7877            
7878        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7879          $self->{line_prev} = $self->{line};
7880          $self->{column_prev} = $self->{column};
7881          $self->{column}++;
7882          $self->{nc}
7883              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7884        } else {
7885          $self->{set_nc}->($self);
7886        }
7887      
7888            return  ($self->{ct}); # ENTITY
7889            redo A;
7890          } elsif ($self->{nc} == -1) {
7891            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7892            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7893            
7894        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7895          $self->{line_prev} = $self->{line};
7896          $self->{column_prev} = $self->{column};
7897          $self->{column}++;
7898          $self->{nc}
7899              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7900        } else {
7901          $self->{set_nc}->($self);
7902        }
7903      
7904            return  ($self->{ct}); # ENTITY
7905            redo A;
7906          } else {
7907            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7908            ## Stay in the state.
7909            
7910        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7911          $self->{line_prev} = $self->{line};
7912          $self->{column_prev} = $self->{column};
7913          $self->{column}++;
7914          $self->{nc}
7915              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7916        } else {
7917          $self->{set_nc}->($self);
7918        }
7919      
7920            redo A;
7921          }
7922        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7923          if ($self->{nc} == 0x0022) { # "
7924            $self->{state} = AFTER_MD_DEF_STATE;
7925            
7926        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7927          $self->{line_prev} = $self->{line};
7928          $self->{column_prev} = $self->{column};
7929          $self->{column}++;
7930          $self->{nc}
7931              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7932        } else {
7933          $self->{set_nc}->($self);
7934        }
7935      
7936            redo A;
7937          } elsif ($self->{nc} == 0x0026) { # &
7938            $self->{prev_state} = $self->{state};
7939            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7940            $self->{entity_add} = 0x0022; # "
7941            
7942        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7943          $self->{line_prev} = $self->{line};
7944          $self->{column_prev} = $self->{column};
7945          $self->{column}++;
7946          $self->{nc}
7947              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7948        } else {
7949          $self->{set_nc}->($self);
7950        }
7951      
7952            redo A;
7953    ## TODO: %
7954          } elsif ($self->{nc} == -1) {
7955            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7956            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7957            ## Reconsume.
7958            return  ($self->{ct}); # ENTITY
7959            redo A;
7960          } else {
7961            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7962            
7963        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7964          $self->{line_prev} = $self->{line};
7965          $self->{column_prev} = $self->{column};
7966          $self->{column}++;
7967          $self->{nc}
7968              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7969        } else {
7970          $self->{set_nc}->($self);
7971        }
7972      
7973            redo A;
7974          }
7975        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7976          if ($self->{nc} == 0x0027) { # '
7977            $self->{state} = AFTER_MD_DEF_STATE;
7978            
7979        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7980          $self->{line_prev} = $self->{line};
7981          $self->{column_prev} = $self->{column};
7982          $self->{column}++;
7983          $self->{nc}
7984              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7985        } else {
7986          $self->{set_nc}->($self);
7987        }
7988      
7989            redo A;
7990          } elsif ($self->{nc} == 0x0026) { # &
7991            $self->{prev_state} = $self->{state};
7992            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7993            $self->{entity_add} = 0x0027; # '
7994            
7995        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7996          $self->{line_prev} = $self->{line};
7997          $self->{column_prev} = $self->{column};
7998          $self->{column}++;
7999          $self->{nc}
8000              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8001        } else {
8002          $self->{set_nc}->($self);
8003        }
8004      
8005            redo A;
8006    ## TODO: %
8007          } elsif ($self->{nc} == -1) {
8008            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
8009            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8010            ## Reconsume.
8011            return  ($self->{ct}); # ENTITY
8012            redo A;
8013          } else {
8014            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
8015            
8016        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8017          $self->{line_prev} = $self->{line};
8018          $self->{column_prev} = $self->{column};
8019          $self->{column}++;
8020          $self->{nc}
8021              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8022        } else {
8023          $self->{set_nc}->($self);
8024        }
8025      
8026            redo A;
8027          }
8028        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
8029          if ($is_space->{$self->{nc}} or
8030              {
8031                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
8032                $self->{entity_add} => 1,
8033              }->{$self->{nc}}) {
8034            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
8035                            line => $self->{line_prev},
8036                            column => $self->{column_prev}
8037                                + ($self->{nc} == -1 ? 1 : 0));
8038            ## Don't consume
8039            ## Return nothing.
8040            #
8041          } elsif ($self->{nc} == 0x0023) { # #
8042            $self->{ca} = $self->{ct};
8043            $self->{state} = ENTITY_HASH_STATE;
8044            $self->{kwd} = '#';
8045            
8046        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8047          $self->{line_prev} = $self->{line};
8048          $self->{column_prev} = $self->{column};
8049          $self->{column}++;
8050          $self->{nc}
8051              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8052        } else {
8053          $self->{set_nc}->($self);
8054        }
8055      
8056            redo A;
8057          } else {
8058            #
8059          }
8060    
8061          $self->{ct}->{value} .= '&';
8062          $self->{state} = $self->{prev_state};
8063          ## Reconsume.
8064          redo A;
8065        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
8066          if ($is_space->{$self->{nc}}) {
8067            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
8068            
8069        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8070          $self->{line_prev} = $self->{line};
8071          $self->{column_prev} = $self->{column};
8072          $self->{column}++;
8073          $self->{nc}
8074              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8075        } else {
8076          $self->{set_nc}->($self);
8077        }
8078      
8079            redo A;
8080          } elsif ($self->{nc} == 0x0028) { # (
8081            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8082            $self->{ct}->{content} = ['('];
8083            $self->{group_depth} = 1;
8084            
8085        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8086          $self->{line_prev} = $self->{line};
8087          $self->{column_prev} = $self->{column};
8088          $self->{column}++;
8089          $self->{nc}
8090              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8091        } else {
8092          $self->{set_nc}->($self);
8093        }
8094      
8095            redo A;
8096          } elsif ($self->{nc} == 0x003E) { # >
8097            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
8098            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8099            
8100        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8101          $self->{line_prev} = $self->{line};
8102          $self->{column_prev} = $self->{column};
8103          $self->{column}++;
8104          $self->{nc}
8105              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8106        } else {
8107          $self->{set_nc}->($self);
8108        }
8109      
8110            return  ($self->{ct}); # ELEMENT
8111            redo A;
8112          } elsif ($self->{nc} == -1) {
8113            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8114            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8115            
8116        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8117          $self->{line_prev} = $self->{line};
8118          $self->{column_prev} = $self->{column};
8119          $self->{column}++;
8120          $self->{nc}
8121              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8122        } else {
8123          $self->{set_nc}->($self);
8124        }
8125      
8126            return  ($self->{ct}); # ELEMENT
8127            redo A;
8128          } else {
8129            $self->{ct}->{content} = [chr $self->{nc}];
8130            $self->{state} = CONTENT_KEYWORD_STATE;
8131            
8132        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8133          $self->{line_prev} = $self->{line};
8134          $self->{column_prev} = $self->{column};
8135          $self->{column}++;
8136          $self->{nc}
8137              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8138        } else {
8139          $self->{set_nc}->($self);
8140        }
8141      
8142            redo A;
8143          }
8144        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8145          if ($is_space->{$self->{nc}}) {
8146            $self->{state} = AFTER_MD_DEF_STATE;
8147            
8148        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8149          $self->{line_prev} = $self->{line};
8150          $self->{column_prev} = $self->{column};
8151          $self->{column}++;
8152          $self->{nc}
8153              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8154        } else {
8155          $self->{set_nc}->($self);
8156        }
8157      
8158            redo A;
8159          } elsif ($self->{nc} == 0x003E) { # >
8160            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8161            
8162        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8163          $self->{line_prev} = $self->{line};
8164          $self->{column_prev} = $self->{column};
8165          $self->{column}++;
8166          $self->{nc}
8167              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8168        } else {
8169          $self->{set_nc}->($self);
8170        }
8171      
8172            return  ($self->{ct}); # ELEMENT
8173            redo A;
8174          } elsif ($self->{nc} == -1) {
8175            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8176            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8177            
8178        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8179          $self->{line_prev} = $self->{line};
8180          $self->{column_prev} = $self->{column};
8181          $self->{column}++;
8182          $self->{nc}
8183              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8184        } else {
8185          $self->{set_nc}->($self);
8186        }
8187      
8188            return  ($self->{ct}); # ELEMENT
8189            redo A;
8190          } else {
8191            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8192            ## Stay in the state.
8193            
8194        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8195          $self->{line_prev} = $self->{line};
8196          $self->{column_prev} = $self->{column};
8197          $self->{column}++;
8198          $self->{nc}
8199              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8200        } else {
8201          $self->{set_nc}->($self);
8202        }
8203      
8204            redo A;
8205          }
8206        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8207          if ($is_space->{$self->{nc}}) {
8208            ## Stay in the state.
8209            
8210        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8211          $self->{line_prev} = $self->{line};
8212          $self->{column_prev} = $self->{column};
8213          $self->{column}++;
8214          $self->{nc}
8215              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8216        } else {
8217          $self->{set_nc}->($self);
8218        }
8219      
8220            redo A;
8221          } elsif ($self->{nc} == 0x0028) { # (
8222            $self->{group_depth}++;
8223            push @{$self->{ct}->{content}}, chr $self->{nc};
8224            ## Stay in the state.
8225            
8226        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8227          $self->{line_prev} = $self->{line};
8228          $self->{column_prev} = $self->{column};
8229          $self->{column}++;
8230          $self->{nc}
8231              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8232        } else {
8233          $self->{set_nc}->($self);
8234        }
8235      
8236            redo A;
8237          } elsif ($self->{nc} == 0x007C or # |
8238                   $self->{nc} == 0x002C) { # ,
8239            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8240            ## Stay in the state.
8241            
8242        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8243          $self->{line_prev} = $self->{line};
8244          $self->{column_prev} = $self->{column};
8245          $self->{column}++;
8246          $self->{nc}
8247              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8248        } else {
8249          $self->{set_nc}->($self);
8250        }
8251      
8252            redo A;
8253          } elsif ($self->{nc} == 0x0029) { # )
8254            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8255            push @{$self->{ct}->{content}}, chr $self->{nc};
8256            $self->{group_depth}--;
8257            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8258            
8259        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8260          $self->{line_prev} = $self->{line};
8261          $self->{column_prev} = $self->{column};
8262          $self->{column}++;
8263          $self->{nc}
8264              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8265        } else {
8266          $self->{set_nc}->($self);
8267        }
8268      
8269            redo A;
8270          } elsif ($self->{nc} == 0x003E) { # >
8271            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8272            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8273            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8274            
8275        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8276          $self->{line_prev} = $self->{line};
8277          $self->{column_prev} = $self->{column};
8278          $self->{column}++;
8279          $self->{nc}
8280              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8281        } else {
8282          $self->{set_nc}->($self);
8283        }
8284      
8285            return  ($self->{ct}); # ELEMENT
8286            redo A;
8287          } elsif ($self->{nc} == -1) {
8288            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8289            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8290            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8291            
8292        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8293          $self->{line_prev} = $self->{line};
8294          $self->{column_prev} = $self->{column};
8295          $self->{column}++;
8296          $self->{nc}
8297              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8298        } else {
8299          $self->{set_nc}->($self);
8300        }
8301      
8302            return  ($self->{ct}); # ELEMENT
8303            redo A;
8304          } else {
8305            push @{$self->{ct}->{content}}, chr $self->{nc};
8306            $self->{state} = CM_ELEMENT_NAME_STATE;
8307            
8308        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8309          $self->{line_prev} = $self->{line};
8310          $self->{column_prev} = $self->{column};
8311          $self->{column}++;
8312          $self->{nc}
8313              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8314        } else {
8315          $self->{set_nc}->($self);
8316        }
8317      
8318            redo A;
8319          }
8320        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8321          if ($is_space->{$self->{nc}}) {
8322            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8323            
8324        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8325          $self->{line_prev} = $self->{line};
8326          $self->{column_prev} = $self->{column};
8327          $self->{column}++;
8328          $self->{nc}
8329              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8330        } else {
8331          $self->{set_nc}->($self);
8332        }
8333      
8334            redo A;
8335          } elsif ($self->{nc} == 0x002A or # *
8336                   $self->{nc} == 0x002B or # +
8337                   $self->{nc} == 0x003F) { # ?
8338            push @{$self->{ct}->{content}}, chr $self->{nc};
8339            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8340            
8341        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8342          $self->{line_prev} = $self->{line};
8343          $self->{column_prev} = $self->{column};
8344          $self->{column}++;
8345          $self->{nc}
8346              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8347        } else {
8348          $self->{set_nc}->($self);
8349        }
8350      
8351            redo A;
8352          } elsif ($self->{nc} == 0x007C or # |
8353                   $self->{nc} == 0x002C) { # ,
8354            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8355            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8356            
8357        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8358          $self->{line_prev} = $self->{line};
8359          $self->{column_prev} = $self->{column};
8360          $self->{column}++;
8361          $self->{nc}
8362              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8363        } else {
8364          $self->{set_nc}->($self);
8365        }
8366      
8367            redo A;
8368          } elsif ($self->{nc} == 0x0029) { # )
8369            $self->{group_depth}--;
8370            push @{$self->{ct}->{content}}, chr $self->{nc};
8371            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8372            
8373        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8374          $self->{line_prev} = $self->{line};
8375          $self->{column_prev} = $self->{column};
8376          $self->{column}++;
8377          $self->{nc}
8378              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8379        } else {
8380          $self->{set_nc}->($self);
8381        }
8382      
8383            redo A;
8384          } elsif ($self->{nc} == 0x003E) { # >
8385            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8386            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8387            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8388            
8389        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8390          $self->{line_prev} = $self->{line};
8391          $self->{column_prev} = $self->{column};
8392          $self->{column}++;
8393          $self->{nc}
8394              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8395        } else {
8396          $self->{set_nc}->($self);
8397        }
8398      
8399            return  ($self->{ct}); # ELEMENT
8400            redo A;
8401          } elsif ($self->{nc} == -1) {
8402            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8403            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8404            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8405            
8406        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8407          $self->{line_prev} = $self->{line};
8408          $self->{column_prev} = $self->{column};
8409          $self->{column}++;
8410          $self->{nc}
8411              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8412        } else {
8413          $self->{set_nc}->($self);
8414        }
8415      
8416            return  ($self->{ct}); # ELEMENT
8417            redo A;
8418          } else {
8419            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8420            ## Stay in the state.
8421            
8422        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8423          $self->{line_prev} = $self->{line};
8424          $self->{column_prev} = $self->{column};
8425          $self->{column}++;
8426          $self->{nc}
8427              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8428        } else {
8429          $self->{set_nc}->($self);
8430        }
8431      
8432            redo A;
8433          }
8434        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8435          if ($is_space->{$self->{nc}}) {
8436            ## Stay in the state.
8437            
8438        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8439          $self->{line_prev} = $self->{line};
8440          $self->{column_prev} = $self->{column};
8441          $self->{column}++;
8442          $self->{nc}
8443              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8444        } else {
8445          $self->{set_nc}->($self);
8446        }
8447      
8448            redo A;
8449          } elsif ($self->{nc} == 0x007C or # |
8450                   $self->{nc} == 0x002C) { # ,
8451            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8452            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8453            
8454        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8455          $self->{line_prev} = $self->{line};
8456          $self->{column_prev} = $self->{column};
8457          $self->{column}++;
8458          $self->{nc}
8459              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8460        } else {
8461          $self->{set_nc}->($self);
8462        }
8463      
8464            redo A;
8465          } elsif ($self->{nc} == 0x0029) { # )
8466            $self->{group_depth}--;
8467            push @{$self->{ct}->{content}}, chr $self->{nc};
8468            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8469            
8470        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8471          $self->{line_prev} = $self->{line};
8472          $self->{column_prev} = $self->{column};
8473          $self->{column}++;
8474          $self->{nc}
8475              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8476        } else {
8477          $self->{set_nc}->($self);
8478        }
8479      
8480            redo A;
8481          } elsif ($self->{nc} == 0x003E) { # >
8482            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8483            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8484            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8485            
8486        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8487          $self->{line_prev} = $self->{line};
8488          $self->{column_prev} = $self->{column};
8489          $self->{column}++;
8490          $self->{nc}
8491              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8492        } else {
8493          $self->{set_nc}->($self);
8494        }
8495      
8496            return  ($self->{ct}); # ELEMENT
8497            redo A;
8498          } elsif ($self->{nc} == -1) {
8499            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8500            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8501            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8502            
8503        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8504          $self->{line_prev} = $self->{line};
8505          $self->{column_prev} = $self->{column};
8506          $self->{column}++;
8507          $self->{nc}
8508              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8509        } else {
8510          $self->{set_nc}->($self);
8511        }
8512      
8513            return  ($self->{ct}); # ELEMENT
8514            redo A;
8515          } else {
8516            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8517            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8518            $self->{state} = BOGUS_MD_STATE;
8519            
8520        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8521          $self->{line_prev} = $self->{line};
8522          $self->{column_prev} = $self->{column};
8523          $self->{column}++;
8524          $self->{nc}
8525              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8526        } else {
8527          $self->{set_nc}->($self);
8528        }
8529      
8530            redo A;
8531          }
8532        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8533          if ($is_space->{$self->{nc}}) {
8534            if ($self->{group_depth}) {
8535              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8536            } else {
8537              $self->{state} = AFTER_MD_DEF_STATE;
8538            }
8539            
8540        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8541          $self->{line_prev} = $self->{line};
8542          $self->{column_prev} = $self->{column};
8543          $self->{column}++;
8544          $self->{nc}
8545              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8546        } else {
8547          $self->{set_nc}->($self);
8548        }
8549      
8550            redo A;
8551          } elsif ($self->{nc} == 0x002A or # *
8552                   $self->{nc} == 0x002B or # +
8553                   $self->{nc} == 0x003F) { # ?
8554            push @{$self->{ct}->{content}}, chr $self->{nc};
8555            if ($self->{group_depth}) {
8556              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8557            } else {
8558              $self->{state} = AFTER_MD_DEF_STATE;
8559            }
8560            
8561        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8562          $self->{line_prev} = $self->{line};
8563          $self->{column_prev} = $self->{column};
8564          $self->{column}++;
8565          $self->{nc}
8566              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8567        } else {
8568          $self->{set_nc}->($self);
8569        }
8570      
8571            redo A;
8572          } elsif ($self->{nc} == 0x0029) { # )
8573            if ($self->{group_depth}) {
8574              $self->{group_depth}--;
8575              push @{$self->{ct}->{content}}, chr $self->{nc};
8576              ## Stay in the state.
8577              
8578        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8579          $self->{line_prev} = $self->{line};
8580          $self->{column_prev} = $self->{column};
8581          $self->{column}++;
8582          $self->{nc}
8583              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8584        } else {
8585          $self->{set_nc}->($self);
8586        }
8587      
8588              redo A;
8589            } else {
8590              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8591              $self->{state} = BOGUS_MD_STATE;
8592              ## Reconsume.
8593              redo A;
8594            }
8595          } elsif ($self->{nc} == 0x003E) { # >
8596            if ($self->{group_depth}) {
8597              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8598              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8599            }
8600            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8601            
8602        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8603          $self->{line_prev} = $self->{line};
8604          $self->{column_prev} = $self->{column};
8605          $self->{column}++;
8606          $self->{nc}
8607              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8608        } else {
8609          $self->{set_nc}->($self);
8610        }
8611      
8612            return  ($self->{ct}); # ELEMENT
8613            redo A;
8614          } elsif ($self->{nc} == -1) {
8615            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8616            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8617            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8618            
8619        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8620          $self->{line_prev} = $self->{line};
8621          $self->{column_prev} = $self->{column};
8622          $self->{column}++;
8623          $self->{nc}
8624              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8625        } else {
8626          $self->{set_nc}->($self);
8627        }
8628      
8629            return  ($self->{ct}); # ELEMENT
8630            redo A;
8631          } else {
8632            if ($self->{group_depth}) {
8633              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8634            } else {
8635              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8636              $self->{state} = BOGUS_MD_STATE;
8637            }
8638          ## Reconsume.          ## Reconsume.
8639          redo A;          redo A;
8640        }        }
8641        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8642          if ($is_space->{$self->{nc}}) {
8643            ## Stay in the state.
8644            
8645        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8646          $self->{line_prev} = $self->{line};
8647          $self->{column_prev} = $self->{column};
8648          $self->{column}++;
8649          $self->{nc}
8650              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8651        } else {
8652          $self->{set_nc}->($self);
8653        }
8654      
8655            redo A;
8656          } elsif ($self->{nc} == 0x003E) { # >
8657            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8658            
8659        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8660          $self->{line_prev} = $self->{line};
8661          $self->{column_prev} = $self->{column};
8662          $self->{column}++;
8663          $self->{nc}
8664              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8665        } else {
8666          $self->{set_nc}->($self);
8667        }
8668      
8669            return  ($self->{ct}); # ENTITY/ELEMENT
8670            redo A;
8671          } elsif ($self->{nc} == -1) {
8672            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8673            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8674            
8675        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8676          $self->{line_prev} = $self->{line};
8677          $self->{column_prev} = $self->{column};
8678          $self->{column}++;
8679          $self->{nc}
8680              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8681        } else {
8682          $self->{set_nc}->($self);
8683        }
8684      
8685            return  ($self->{ct}); # ENTITY/ELEMENT
8686            redo A;
8687          } else {
8688            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8689            $self->{state} = BOGUS_MD_STATE;
8690            ## Reconsume.
8691            redo A;
8692          }
8693        } elsif ($self->{state} == BOGUS_MD_STATE) {
8694          if ($self->{nc} == 0x003E) { # >
8695            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8696            
8697        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8698          $self->{line_prev} = $self->{line};
8699          $self->{column_prev} = $self->{column};
8700          $self->{column}++;
8701          $self->{nc}
8702              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8703        } else {
8704          $self->{set_nc}->($self);
8705        }
8706      
8707            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8708            redo A;
8709          } elsif ($self->{nc} == -1) {
8710            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8711            ## Reconsume.
8712            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8713            redo A;
8714          } else {
8715            ## Stay in the state.
8716            
8717        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8718          $self->{line_prev} = $self->{line};
8719          $self->{column_prev} = $self->{column};
8720          $self->{column}++;
8721          $self->{nc}
8722              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8723        } else {
8724          $self->{set_nc}->($self);
8725        }
8726      
8727            redo A;
8728          }
8729      } else {      } else {
8730        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8731      }      }
# Line 4152  sub _get_next_token ($) { Line 8736  sub _get_next_token ($) {
8736    
8737  1;  1;
8738  ## $Date$  ## $Date$
8739                                    

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.31

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24