/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Tue Oct 14 14:38:59 2008 UTC revision 1.23 by wakaba, Sun Oct 19 13:43:55 2008 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145    ## XML-only states
146    sub PI_STATE () { 51 }
147    sub PI_TARGET_STATE () { 52 }
148    sub PI_TARGET_AFTER_STATE () { 53 }
149    sub PI_DATA_STATE () { 54 }
150    sub PI_AFTER_STATE () { 55 }
151    sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
200    
# Line 178  sub _initialize_tokenizer ($) { Line 259  sub _initialize_tokenizer ($) {
259    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
260    
261    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
262    $self->{s_kwd} = ''; # state keyword    $self->{s_kwd} = ''; # Data state keyword
263      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
264    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
265    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
266    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 208  sub _initialize_tokenizer ($) { Line 290  sub _initialize_tokenizer ($) {
290    
291  ## A token has:  ## A token has:
292  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
293  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
294  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
295  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
296    ##   ->{target} (PI_TOKEN)
297  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
298  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
299  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 218  sub _initialize_tokenizer ($) { Line 301  sub _initialize_tokenizer ($) {
301  ##        ->{name}  ##        ->{name}
302  ##        ->{value}  ##        ->{value}
303  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
304  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
305    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
306    ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
307    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
308    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
309    
310  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
311  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
312  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 238  my $is_space = { Line 326  my $is_space = {
326    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
327    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
328    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
329    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
330    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
331    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
332  };  };
# Line 498  sub _get_next_token ($) { Line 586  sub _get_next_token ($) {
586        return  ($token);        return  ($token);
587        redo A;        redo A;
588      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
589          ## XML5: "tag state".
590    
591        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
592          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
593                        
# Line 516  sub _get_next_token ($) { Line 606  sub _get_next_token ($) {
606            redo A;            redo A;
607          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
608                        
609            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
610            #            #
611          } else {          } else {
612                        
613              $self->{s_kwd} = '';
614            #            #
615          }          }
616    
617          ## reconsume          ## reconsume
618          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
         $self->{s_kwd} = '';  
619          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
620                    line => $self->{line_prev},                    line => $self->{line_prev},
621                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 629  sub _get_next_token ($) { Line 719  sub _get_next_token ($) {
719    
720            redo A;            redo A;
721          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
722                        if ($self->{is_xml}) {
723            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
724                            line => $self->{line_prev},              $self->{state} = PI_STATE;
725                            column => $self->{column_prev});              
726            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
727            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
728                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
729                                      column => $self->{column_prev},        $self->{column}++;
730                                     };        $self->{nc}
731            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
732            redo A;      } else {
733          } else {        $self->{set_nc}->($self);
734        }
735      
736                redo A;
737              } else {
738                
739                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
740                                line => $self->{line_prev},
741                                column => $self->{column_prev});
742                $self->{state} = BOGUS_COMMENT_STATE;
743                $self->{ct} = {type => COMMENT_TOKEN, data => '',
744                               line => $self->{line_prev},
745                               column => $self->{column_prev},
746                              };
747                ## $self->{nc} is intentionally left as is
748                redo A;
749              }
750            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
751                        
752            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
753                            line => $self->{line_prev},                            line => $self->{line_prev},
# Line 655  sub _get_next_token ($) { Line 762  sub _get_next_token ($) {
762                     });                     });
763    
764            redo A;            redo A;
765            } else {
766              ## XML5: "<:" is a parse error.
767              
768              $self->{ct} = {type => START_TAG_TOKEN,
769                                        tag_name => chr ($self->{nc}),
770                                        line => $self->{line_prev},
771                                        column => $self->{column_prev}};
772              $self->{state} = TAG_NAME_STATE;
773              
774        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
775          $self->{line_prev} = $self->{line};
776          $self->{column_prev} = $self->{column};
777          $self->{column}++;
778          $self->{nc}
779              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
780        } else {
781          $self->{set_nc}->($self);
782        }
783      
784              redo A;
785          }          }
786        } else {        } else {
787          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 663  sub _get_next_token ($) { Line 790  sub _get_next_token ($) {
790        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
791        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
792    
793          ## XML5: "end tag state".
794    
795        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
796        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
797          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
798            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
799            $self->{s_kwd} = '';            $self->{kwd} = '';
800            ## Reconsume.            ## Reconsume.
801            redo A;            redo A;
802          } else {          } else {
# Line 724  sub _get_next_token ($) { Line 853  sub _get_next_token ($) {
853        
854          redo A;          redo A;
855        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
856          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
857                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
858                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
859          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
860          $self->{s_kwd} = '';          $self->{s_kwd} = '';
861                    if ($self->{is_xml}) {
862              
863              ## XML5: No parse error.
864              
865              ## NOTE: This parser raises a parse error, since it supports
866              ## XML1, not XML5.
867    
868              ## NOTE: A short end tag token.
869              my $ct = {type => END_TAG_TOKEN,
870                        tag_name => '',
871                        line => $self->{line_prev},
872                        column => $self->{column_prev} - 1,
873                       };
874              
875        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
876          $self->{line_prev} = $self->{line};
877          $self->{column_prev} = $self->{column};
878          $self->{column}++;
879          $self->{nc}
880              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
881        } else {
882          $self->{set_nc}->($self);
883        }
884      
885              return  ($ct);
886            } else {
887              
888              
889      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
890        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
891        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 741  sub _get_next_token ($) { Line 896  sub _get_next_token ($) {
896        $self->{set_nc}->($self);        $self->{set_nc}->($self);
897      }      }
898        
899            }
900          redo A;          redo A;
901        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
902                    
# Line 754  sub _get_next_token ($) { Line 910  sub _get_next_token ($) {
910                   });                   });
911    
912          redo A;          redo A;
913        } else {        } elsif (not $self->{is_xml} or
914                   $is_space->{$self->{nc}}) {
915                    
916          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
917                            line => $self->{line_prev}, # "<" of "</"
918                            column => $self->{column_prev} - 1);
919          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
920          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
921                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 769  sub _get_next_token ($) { Line 928  sub _get_next_token ($) {
928          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
929          ## "bogus comment state" entry.          ## "bogus comment state" entry.
930          redo A;          redo A;
931          } else {
932            ## XML5: "</:" is a parse error.
933            
934            $self->{ct} = {type => END_TAG_TOKEN,
935                           tag_name => chr ($self->{nc}),
936                           line => $l, column => $c};
937            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
938            
939        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
940          $self->{line_prev} = $self->{line};
941          $self->{column_prev} = $self->{column};
942          $self->{column}++;
943          $self->{nc}
944              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
945        } else {
946          $self->{set_nc}->($self);
947        }
948      
949            redo A;
950        }        }
951      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
952        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
953        if (length $ch) {        if (length $ch) {
954          my $CH = $ch;          my $CH = $ch;
955          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 779  sub _get_next_token ($) { Line 957  sub _get_next_token ($) {
957          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
958                        
959            ## Stay in the state.            ## Stay in the state.
960            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
961                        
962      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
963        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 798  sub _get_next_token ($) { Line 976  sub _get_next_token ($) {
976            $self->{s_kwd} = '';            $self->{s_kwd} = '';
977            ## Reconsume.            ## Reconsume.
978            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
979                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
980                      line => $self->{line_prev},                      line => $self->{line_prev},
981                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
982                     });                     });
983            redo A;            redo A;
984          }          }
# Line 816  sub _get_next_token ($) { Line 994  sub _get_next_token ($) {
994            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
995            $self->{s_kwd} = '';            $self->{s_kwd} = '';
996            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
997                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
998                      line => $self->{line_prev},                      line => $self->{line_prev},
999                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
1000                     });                     });
1001            redo A;            redo A;
1002          } else {          } else {
# Line 827  sub _get_next_token ($) { Line 1005  sub _get_next_token ($) {
1005                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
1006                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
1007                   line => $self->{line_prev},                   line => $self->{line_prev},
1008                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
1009            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
1010            ## Reconsume.            ## Reconsume.
1011            redo A;            redo A;
# Line 959  sub _get_next_token ($) { Line 1137  sub _get_next_token ($) {
1137          redo A;          redo A;
1138        }        }
1139      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1140          ## XML5: "Tag attribute name before state".
1141    
1142        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1143                    
1144          ## Stay in the state          ## Stay in the state
# Line 1071  sub _get_next_token ($) { Line 1251  sub _get_next_token ($) {
1251               0x003D => 1, # =               0x003D => 1, # =
1252              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1253                        
1254              ## XML5: Not a parse error.
1255            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1256          } else {          } else {
1257                        
1258              ## XML5: ":" raises a parse error and is ignored.
1259          }          }
1260          $self->{ca}          $self->{ca}
1261              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1094  sub _get_next_token ($) { Line 1276  sub _get_next_token ($) {
1276          redo A;          redo A;
1277        }        }
1278      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1279          ## XML5: "Tag attribute name state".
1280    
1281        my $before_leave = sub {        my $before_leave = sub {
1282          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1283              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1104  sub _get_next_token ($) { Line 1288  sub _get_next_token ($) {
1288                        
1289            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1290              = $self->{ca};              = $self->{ca};
1291              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1292          }          }
1293        }; # $before_leave        }; # $before_leave
1294    
# Line 1140  sub _get_next_token ($) { Line 1325  sub _get_next_token ($) {
1325        
1326          redo A;          redo A;
1327        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1328            if ($self->{is_xml}) {
1329              
1330              ## XML5: Not a parse error.
1331              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1332            } else {
1333              
1334            }
1335    
1336          $before_leave->();          $before_leave->();
1337          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1338                        
# Line 1189  sub _get_next_token ($) { Line 1382  sub _get_next_token ($) {
1382        
1383          redo A;          redo A;
1384        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1385            if ($self->{is_xml}) {
1386              
1387              ## XML5: Not a parse error.
1388              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1389            } else {
1390              
1391            }
1392                    
1393          $before_leave->();          $before_leave->();
1394          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1233  sub _get_next_token ($) { Line 1433  sub _get_next_token ($) {
1433          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1434              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1435                        
1436              ## XML5: Not a parse error.
1437            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1438          } else {          } else {
1439                        
# Line 1253  sub _get_next_token ($) { Line 1454  sub _get_next_token ($) {
1454          redo A;          redo A;
1455        }        }
1456      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1457          ## XML5: "Tag attribute name after state".
1458          
1459        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1460                    
1461          ## Stay in the state          ## Stay in the state
# Line 1284  sub _get_next_token ($) { Line 1487  sub _get_next_token ($) {
1487        
1488          redo A;          redo A;
1489        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1490            if ($self->{is_xml}) {
1491              
1492              ## XML5: Not a parse error.
1493              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1494            } else {
1495              
1496            }
1497    
1498          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1499                        
1500            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1337  sub _get_next_token ($) { Line 1548  sub _get_next_token ($) {
1548        
1549          redo A;          redo A;
1550        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1551            if ($self->{is_xml}) {
1552              
1553              ## XML5: Not a parse error.
1554              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1555            } else {
1556              
1557            }
1558                    
1559          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1560                    
# Line 1376  sub _get_next_token ($) { Line 1594  sub _get_next_token ($) {
1594    
1595          redo A;          redo A;
1596        } else {        } else {
1597            if ($self->{is_xml}) {
1598              
1599              ## XML5: Not a parse error.
1600              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1601            } else {
1602              
1603            }
1604    
1605          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1606              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1607                        
1608              ## XML5: Not a parse error.
1609            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1610          } else {          } else {
1611                        
# Line 1402  sub _get_next_token ($) { Line 1629  sub _get_next_token ($) {
1629          redo A;                  redo A;        
1630        }        }
1631      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1632          ## XML5: "Tag attribute value before state".
1633    
1634        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1635                    
1636          ## Stay in the state          ## Stay in the state
# Line 1513  sub _get_next_token ($) { Line 1742  sub _get_next_token ($) {
1742        } else {        } else {
1743          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D) { # =
1744                        
1745              ## XML5: Not a parse error.
1746            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1747            } elsif ($self->{is_xml}) {
1748              
1749              ## XML5: No parse error.
1750              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1751          } else {          } else {
1752                        
1753          }          }
# Line 1533  sub _get_next_token ($) { Line 1767  sub _get_next_token ($) {
1767          redo A;          redo A;
1768        }        }
1769      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1770          ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1771          ## ATTLIST attribute value double quoted state".
1772          
1773        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1774                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1775          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1776              ## XML5: "DOCTYPE ATTLIST name after state".
1777              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1778              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1779            } else {
1780              
1781              ## XML5: "Tag attribute name before state".
1782              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1783            }
1784                    
1785      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1786        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1550  sub _get_next_token ($) { Line 1795  sub _get_next_token ($) {
1795          redo A;          redo A;
1796        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1797                    
1798            ## XML5: Not defined yet.
1799    
1800          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1801          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1802          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1574  sub _get_next_token ($) { Line 1821  sub _get_next_token ($) {
1821          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1822                        
1823            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1824    
1825              $self->{state} = DATA_STATE;
1826              $self->{s_kwd} = '';
1827              ## reconsume
1828              return  ($self->{ct}); # start tag
1829              redo A;
1830          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1831            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1832            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1583  sub _get_next_token ($) { Line 1836  sub _get_next_token ($) {
1836              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1837                            
1838            }            }
1839    
1840              $self->{state} = DATA_STATE;
1841              $self->{s_kwd} = '';
1842              ## reconsume
1843              return  ($self->{ct}); # end tag
1844              redo A;
1845            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1846              ## XML5: No parse error above; not defined yet.
1847              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1848              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1849              ## Reconsume.
1850              return  ($self->{ct}); # ATTLIST
1851              redo A;
1852          } else {          } else {
1853            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1854          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1855        } else {        } else {
1856                    ## XML5 [ATTLIST]: Not defined yet.
1857            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1858              
1859              ## XML5: Not a parse error.
1860              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1861            } else {
1862              
1863            }
1864          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1865          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1866                                q["&],                                q["&<],
1867                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1868    
1869          ## Stay in the state          ## Stay in the state
# Line 1615  sub _get_next_token ($) { Line 1881  sub _get_next_token ($) {
1881          redo A;          redo A;
1882        }        }
1883      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1884          ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1885          ## ATTLIST attribute value single quoted state".
1886    
1887        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1888                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1889          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1890              ## XML5: "DOCTYPE ATTLIST name after state".
1891              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1892              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1893            } else {
1894              
1895              ## XML5: "Before attribute name state" (sic).
1896              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1897            }
1898                    
1899      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1900        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1632  sub _get_next_token ($) { Line 1909  sub _get_next_token ($) {
1909          redo A;          redo A;
1910        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1911                    
1912            ## XML5: Not defined yet.
1913    
1914          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1915          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1916          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1656  sub _get_next_token ($) { Line 1935  sub _get_next_token ($) {
1935          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1936                        
1937            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1938    
1939              $self->{state} = DATA_STATE;
1940              $self->{s_kwd} = '';
1941              ## reconsume
1942              return  ($self->{ct}); # start tag
1943              redo A;
1944          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1945            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1946            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1665  sub _get_next_token ($) { Line 1950  sub _get_next_token ($) {
1950              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1951                            
1952            }            }
1953    
1954              $self->{state} = DATA_STATE;
1955              $self->{s_kwd} = '';
1956              ## reconsume
1957              return  ($self->{ct}); # end tag
1958              redo A;
1959            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1960              ## XML5: No parse error above; not defined yet.
1961              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1962              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1963              ## Reconsume.
1964              return  ($self->{ct}); # ATTLIST
1965              redo A;
1966          } else {          } else {
1967            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1968          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1969        } else {        } else {
1970                    ## XML5 [ATTLIST]: Not defined yet.
1971            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1972              
1973              ## XML5: Not a parse error.
1974              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1975            } else {
1976              
1977            }
1978          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1979          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1980                                q['&],                                q['&<],
1981                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1982    
1983          ## Stay in the state          ## Stay in the state
# Line 1697  sub _get_next_token ($) { Line 1995  sub _get_next_token ($) {
1995          redo A;          redo A;
1996        }        }
1997      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1998          ## XML5: "Tag attribute value unquoted state".
1999    
2000        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2001                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2002          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            
2003              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2004              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2005            } else {
2006              
2007              ## XML5: "Tag attribute name before state".
2008              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2009            }
2010                    
2011      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2012        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1714  sub _get_next_token ($) { Line 2021  sub _get_next_token ($) {
2021          redo A;          redo A;
2022        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
2023                    
2024    
2025            ## XML5: Not defined yet.
2026    
2027          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
2028          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
2029          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1737  sub _get_next_token ($) { Line 2047  sub _get_next_token ($) {
2047          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2048                        
2049            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2050    
2051              $self->{state} = DATA_STATE;
2052              $self->{s_kwd} = '';
2053              
2054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2055          $self->{line_prev} = $self->{line};
2056          $self->{column_prev} = $self->{column};
2057          $self->{column}++;
2058          $self->{nc}
2059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2060        } else {
2061          $self->{set_nc}->($self);
2062        }
2063      
2064              return  ($self->{ct}); # start tag
2065              redo A;
2066          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2067            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2068            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1746  sub _get_next_token ($) { Line 2072  sub _get_next_token ($) {
2072              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2073                            
2074            }            }
2075          } else {  
2076            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2077          }            $self->{s_kwd} = '';
2078          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2079      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2080        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2081        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1762  sub _get_next_token ($) { Line 2086  sub _get_next_token ($) {
2086        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2087      }      }
2088        
2089              return  ($self->{ct}); # end tag
2090          return  ($self->{ct}); # start tag or end tag            redo A;
2091            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2092          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2093              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2094              
2095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2096          $self->{line_prev} = $self->{line};
2097          $self->{column_prev} = $self->{column};
2098          $self->{column}++;
2099          $self->{nc}
2100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2101        } else {
2102          $self->{set_nc}->($self);
2103        }
2104      
2105              return  ($self->{ct}); # ATTLIST
2106              redo A;
2107            } else {
2108              die "$0: $self->{ct}->{type}: Unknown token type";
2109            }
2110        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2111          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2112                        
2113              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2114            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2115    
2116              $self->{state} = DATA_STATE;
2117              $self->{s_kwd} = '';
2118              ## reconsume
2119              return  ($self->{ct}); # start tag
2120              redo A;
2121          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2122              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2123            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2124            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2125                            
# Line 1780  sub _get_next_token ($) { Line 2128  sub _get_next_token ($) {
2128              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2129                            
2130            }            }
2131    
2132              $self->{state} = DATA_STATE;
2133              $self->{s_kwd} = '';
2134              ## reconsume
2135              return  ($self->{ct}); # end tag
2136              redo A;
2137            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2138              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2139              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2140              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2141              ## Reconsume.
2142              return  ($self->{ct}); # ATTLIST
2143              redo A;
2144          } else {          } else {
2145            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2146          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2147        } else {        } else {
2148          if ({          if ({
2149               0x0022 => 1, # "               0x0022 => 1, # "
# Line 1797  sub _get_next_token ($) { Line 2151  sub _get_next_token ($) {
2151               0x003D => 1, # =               0x003D => 1, # =
2152              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2153                        
2154              ## XML5: Not a parse error.
2155            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
2156          } else {          } else {
2157                        
# Line 1913  sub _get_next_token ($) { Line 2268  sub _get_next_token ($) {
2268          redo A;          redo A;
2269        }        }
2270      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2271          ## XML5: "Empty tag state".
2272    
2273        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2274          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2275                        
# Line 1964  sub _get_next_token ($) { Line 2321  sub _get_next_token ($) {
2321          } else {          } else {
2322            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2323          }          }
2324            ## XML5: "Tag attribute name before state".
2325          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2326          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2327          ## Reconsume.          ## Reconsume.
# Line 1978  sub _get_next_token ($) { Line 2336  sub _get_next_token ($) {
2336          redo A;          redo A;
2337        }        }
2338      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2339        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2340    
2341        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2342        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2343                
2344        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2345                    if ($self->{in_subset}) {
2346          $self->{state} = DATA_STATE;            
2347          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2348            } else {
2349              
2350              $self->{state} = DATA_STATE;
2351              $self->{s_kwd} = '';
2352            }
2353                    
2354      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2355        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2002  sub _get_next_token ($) { Line 2365  sub _get_next_token ($) {
2365          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2366          redo A;          redo A;
2367        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2368                    if ($self->{in_subset}) {
2369          $self->{state} = DATA_STATE;            
2370          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2371            } else {
2372              
2373              $self->{state} = DATA_STATE;
2374              $self->{s_kwd} = '';
2375            }
2376          ## reconsume          ## reconsume
2377    
2378          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2031  sub _get_next_token ($) { Line 2399  sub _get_next_token ($) {
2399          redo A;          redo A;
2400        }        }
2401      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2402        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2403                
2404        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2405                    
# Line 2053  sub _get_next_token ($) { Line 2421  sub _get_next_token ($) {
2421          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2422                    
2423          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2424          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2425                    
2426      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2427        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2072  sub _get_next_token ($) { Line 2440  sub _get_next_token ($) {
2440                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2441                                                    
2442          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2443          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2444                    
2445      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2446        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2106  sub _get_next_token ($) { Line 2474  sub _get_next_token ($) {
2474                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2475                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2476                                   };                                   };
2477          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2478                    
2479      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2480        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2142  sub _get_next_token ($) { Line 2510  sub _get_next_token ($) {
2510              0x0054, # T              0x0054, # T
2511              0x0059, # Y              0x0059, # Y
2512              0x0050, # P              0x0050, # P
2513            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2514            $self->{nc} == [            $self->{nc} == [
2515              undef,              undef,
2516              0x006F, # o              0x006F, # o
# Line 2150  sub _get_next_token ($) { Line 2518  sub _get_next_token ($) {
2518              0x0074, # t              0x0074, # t
2519              0x0079, # y              0x0079, # y
2520              0x0070, # p              0x0070, # p
2521            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2522                    
2523          ## Stay in the state.          ## Stay in the state.
2524          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2525                    
2526      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2527        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2166  sub _get_next_token ($) { Line 2534  sub _get_next_token ($) {
2534      }      }
2535        
2536          redo A;          redo A;
2537        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2538                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2539                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2540                    if ($self->{is_xml} and
2541                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2542              
2543              ## XML5: case-sensitive.
2544              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2545                              text => 'DOCTYPE',
2546                              line => $self->{line_prev},
2547                              column => $self->{column_prev} - 5);
2548            } else {
2549              
2550            }
2551          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2552          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2553                                    quirks => 1,                                    quirks => 1,
# Line 2192  sub _get_next_token ($) { Line 2570  sub _get_next_token ($) {
2570                                    
2571          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2572                          line => $self->{line_prev},                          line => $self->{line_prev},
2573                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2574          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2575          ## Reconsume.          ## Reconsume.
2576          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2577                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2578                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2579                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2580                                   };                                   };
2581          redo A;          redo A;
2582        }        }
# Line 2209  sub _get_next_token ($) { Line 2587  sub _get_next_token ($) {
2587              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2588              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2589              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2590            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2591                    
2592          ## Stay in the state.          ## Stay in the state.
2593          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2594                    
2595      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2596        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2225  sub _get_next_token ($) { Line 2603  sub _get_next_token ($) {
2603      }      }
2604        
2605          redo A;          redo A;
2606        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2607                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2608                    if ($self->{is_xml} and
2609                not $self->{tainted} and
2610                @{$self->{open_elements} or []} == 0) {
2611              
2612              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2613                              line => $self->{line_prev},
2614                              column => $self->{column_prev} - 7);
2615              $self->{tainted} = 1;
2616            } else {
2617              
2618            }
2619    
2620          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2621                                    data => '',                                    data => '',
2622                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2249  sub _get_next_token ($) { Line 2638  sub _get_next_token ($) {
2638                    
2639          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2640                          line => $self->{line_prev},                          line => $self->{line_prev},
2641                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2642          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2643          ## Reconsume.          ## Reconsume.
2644          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2645                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2646                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2647                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2648                                   };                                   };
2649          redo A;          redo A;
2650        }        }
# Line 2276  sub _get_next_token ($) { Line 2665  sub _get_next_token ($) {
2665        
2666          redo A;          redo A;
2667        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2668          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2669          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2670          $self->{s_kwd} = '';            
2671              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2672            } else {
2673              
2674              $self->{state} = DATA_STATE;
2675              $self->{s_kwd} = '';
2676            }
2677                    
2678      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2679        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2296  sub _get_next_token ($) { Line 2690  sub _get_next_token ($) {
2690    
2691          redo A;          redo A;
2692        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2693          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2694          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2695          $self->{s_kwd} = '';            
2696              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2697            } else {
2698              
2699              $self->{state} = DATA_STATE;
2700              $self->{s_kwd} = '';
2701            }
2702          ## reconsume          ## reconsume
2703    
2704          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2340  sub _get_next_token ($) { Line 2739  sub _get_next_token ($) {
2739        
2740          redo A;          redo A;
2741        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2742          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2743          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2744          $self->{s_kwd} = '';            
2745              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2746            } else {
2747              
2748              $self->{state} = DATA_STATE;
2749              $self->{s_kwd} = '';
2750            }
2751                    
2752      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2753        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2360  sub _get_next_token ($) { Line 2764  sub _get_next_token ($) {
2764    
2765          redo A;          redo A;
2766        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2767          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2768          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2769          $self->{s_kwd} = '';            
2770              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2771            } else {
2772              
2773              $self->{state} = DATA_STATE;
2774              $self->{s_kwd} = '';
2775            }
2776          ## reconsume          ## reconsume
2777    
2778          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2388  sub _get_next_token ($) { Line 2797  sub _get_next_token ($) {
2797          redo A;          redo A;
2798        }        }
2799      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2800          ## XML5: "Comment state" and "DOCTYPE comment state".
2801    
2802        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2803                    
2804          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2404  sub _get_next_token ($) { Line 2815  sub _get_next_token ($) {
2815        
2816          redo A;          redo A;
2817        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2818          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2819          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2820          $self->{s_kwd} = '';            
2821              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2822            } else {
2823              
2824              $self->{state} = DATA_STATE;
2825              $self->{s_kwd} = '';
2826            }
2827          ## reconsume          ## reconsume
2828    
2829          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2435  sub _get_next_token ($) { Line 2851  sub _get_next_token ($) {
2851          redo A;          redo A;
2852        }        }
2853      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2854          ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2855    
2856        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2857                    
2858          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2451  sub _get_next_token ($) { Line 2869  sub _get_next_token ($) {
2869        
2870          redo A;          redo A;
2871        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2872          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2873          $self->{s_kwd} = '';          if ($self->{in_subset}) {
2874          $self->{state} = DATA_STATE;            
2875          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2876            } else {
2877              
2878              $self->{state} = DATA_STATE;
2879              $self->{s_kwd} = '';
2880            }
2881          ## reconsume          ## reconsume
2882    
2883          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2479  sub _get_next_token ($) { Line 2901  sub _get_next_token ($) {
2901          redo A;          redo A;
2902        }        }
2903      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2904          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2905    
2906        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2907                    if ($self->{in_subset}) {
2908          $self->{state} = DATA_STATE;            
2909          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2910            } else {
2911              
2912              $self->{state} = DATA_STATE;
2913              $self->{s_kwd} = '';
2914            }
2915                    
2916      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2917        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2500  sub _get_next_token ($) { Line 2929  sub _get_next_token ($) {
2929          redo A;          redo A;
2930        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2931                    
2932            ## XML5: Not a parse error.
2933          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2934                          line => $self->{line_prev},                          line => $self->{line_prev},
2935                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2518  sub _get_next_token ($) { Line 2948  sub _get_next_token ($) {
2948        
2949          redo A;          redo A;
2950        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2951          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2952          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2953          $self->{s_kwd} = '';            
2954              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2955            } else {
2956              
2957              $self->{state} = DATA_STATE;
2958              $self->{s_kwd} = '';
2959            }
2960          ## reconsume          ## reconsume
2961    
2962          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2529  sub _get_next_token ($) { Line 2964  sub _get_next_token ($) {
2964          redo A;          redo A;
2965        } else {        } else {
2966                    
2967            ## XML5: Not a parse error.
2968          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2969                          line => $self->{line_prev},                          line => $self->{line_prev},
2970                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2565  sub _get_next_token ($) { Line 3001  sub _get_next_token ($) {
3001          redo A;          redo A;
3002        } else {        } else {
3003                    
3004            ## XML5: Unless EOF, swith to the bogus comment state.
3005          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3006          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3007          ## reconsume          ## reconsume
3008          redo A;          redo A;
3009        }        }
3010      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
3011          ## XML5: "DOCTYPE root name before state".
3012    
3013        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3014                    
3015          ## Stay in the state          ## Stay in the state
# Line 2588  sub _get_next_token ($) { Line 3027  sub _get_next_token ($) {
3027          redo A;          redo A;
3028        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3029                    
3030            ## XML5: No parse error.
3031          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3032          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3033          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 2616  sub _get_next_token ($) { Line 3056  sub _get_next_token ($) {
3056          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3057    
3058          redo A;          redo A;
3059          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3060            
3061            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3062            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3063            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3064            $self->{in_subset} = 1;
3065            
3066        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3067          $self->{line_prev} = $self->{line};
3068          $self->{column_prev} = $self->{column};
3069          $self->{column}++;
3070          $self->{nc}
3071              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3072        } else {
3073          $self->{set_nc}->($self);
3074        }
3075      
3076            return  ($self->{ct}); # DOCTYPE
3077            redo A;
3078        } else {        } else {
3079                    
3080          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2635  sub _get_next_token ($) { Line 3094  sub _get_next_token ($) {
3094          redo A;          redo A;
3095        }        }
3096      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3097  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3098    
3099          ## ISSUE: Redundant "First," in the spec.
3100    
3101        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3102                    
3103          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2681  sub _get_next_token ($) { Line 3143  sub _get_next_token ($) {
3143          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3144    
3145          redo A;          redo A;
3146          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3147            
3148            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3149            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3150            $self->{in_subset} = 1;
3151            
3152        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3153          $self->{line_prev} = $self->{line};
3154          $self->{column_prev} = $self->{column};
3155          $self->{column}++;
3156          $self->{nc}
3157              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3158        } else {
3159          $self->{set_nc}->($self);
3160        }
3161      
3162            return  ($self->{ct}); # DOCTYPE
3163            redo A;
3164        } else {        } else {
3165                    
3166          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2700  sub _get_next_token ($) { Line 3180  sub _get_next_token ($) {
3180          redo A;          redo A;
3181        }        }
3182      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3183          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3184          ## state", but implemented differently.
3185    
3186        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3187                    
3188          ## Stay in the state          ## Stay in the state
# Line 2716  sub _get_next_token ($) { Line 3199  sub _get_next_token ($) {
3199        
3200          redo A;          redo A;
3201        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3202            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3203              
3204              $self->{state} = DATA_STATE;
3205              $self->{s_kwd} = '';
3206            } else {
3207              
3208              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3209              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3210            }
3211                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3212                    
3213      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3214        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2730  sub _get_next_token ($) { Line 3220  sub _get_next_token ($) {
3220        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3221      }      }
3222        
3223            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3224          redo A;          redo A;
3225        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3226            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3227              
3228              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3229              $self->{state} = DATA_STATE;
3230              $self->{s_kwd} = '';
3231              $self->{ct}->{quirks} = 1;
3232            } else {
3233              
3234              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3235              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3236            }
3237                    
3238          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3239          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3240          redo A;          redo A;
3241        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3242                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3243            
3244          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3245          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3246                    
3247      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3248        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2763  sub _get_next_token ($) { Line 3257  sub _get_next_token ($) {
3257          redo A;          redo A;
3258        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3259                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3260            
3261          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3262          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3263                    
3264      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3265        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2777  sub _get_next_token ($) { Line 3272  sub _get_next_token ($) {
3272      }      }
3273        
3274          redo A;          redo A;
3275        } else {        } elsif ($self->{nc} == 0x0022 and # "
3276                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3277                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3278                    
3279          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3280          $self->{ct}->{quirks} = 1;          $self->{ct}->{value} = ''; # ENTITY
3281            
3282        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3283          $self->{line_prev} = $self->{line};
3284          $self->{column_prev} = $self->{column};
3285          $self->{column}++;
3286          $self->{nc}
3287              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3288        } else {
3289          $self->{set_nc}->($self);
3290        }
3291      
3292            redo A;
3293          } elsif ($self->{nc} == 0x0027 and # '
3294                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3295                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3296            
3297            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3298            $self->{ct}->{value} = ''; # ENTITY
3299            
3300        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3301          $self->{line_prev} = $self->{line};
3302          $self->{column_prev} = $self->{column};
3303          $self->{column}++;
3304          $self->{nc}
3305              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3306        } else {
3307          $self->{set_nc}->($self);
3308        }
3309      
3310            redo A;
3311          } elsif ($self->{is_xml} and
3312                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3313                   $self->{nc} == 0x005B) { # [
3314            
3315            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3316            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3317            $self->{in_subset} = 1;
3318            
3319        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3320          $self->{line_prev} = $self->{line};
3321          $self->{column_prev} = $self->{column};
3322          $self->{column}++;
3323          $self->{nc}
3324              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3325        } else {
3326          $self->{set_nc}->($self);
3327        }
3328      
3329            return  ($self->{ct}); # DOCTYPE
3330            redo A;
3331          } else {
3332            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3333    
3334            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3335              
3336              $self->{ct}->{quirks} = 1;
3337              $self->{state} = BOGUS_DOCTYPE_STATE;
3338            } else {
3339              
3340              $self->{state} = BOGUS_MD_STATE;
3341            }
3342    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3343                    
3344      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3345        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2804  sub _get_next_token ($) { Line 3361  sub _get_next_token ($) {
3361              0x0042, # B              0x0042, # B
3362              0x004C, # L              0x004C, # L
3363              0x0049, # I              0x0049, # I
3364            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3365            $self->{nc} == [            $self->{nc} == [
3366              undef,              undef,
3367              0x0075, # u              0x0075, # u
3368              0x0062, # b              0x0062, # b
3369              0x006C, # l              0x006C, # l
3370              0x0069, # i              0x0069, # i
3371            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3372                    
3373          ## Stay in the state.          ## Stay in the state.
3374          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3375                    
3376      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3377        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2827  sub _get_next_token ($) { Line 3384  sub _get_next_token ($) {
3384      }      }
3385        
3386          redo A;          redo A;
3387        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3388                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3389                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3390                    if ($self->{is_xml} and
3391                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3392              
3393              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3394                              text => 'PUBLIC',
3395                              line => $self->{line_prev},
3396                              column => $self->{column_prev} - 4);
3397            } else {
3398              
3399            }
3400          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3401                    
3402      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2845  sub _get_next_token ($) { Line 3411  sub _get_next_token ($) {
3411        
3412          redo A;          redo A;
3413        } else {        } else {
3414                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3415                          line => $self->{line_prev},                          line => $self->{line_prev},
3416                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3417          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3418              
3419          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3420              $self->{state} = BOGUS_DOCTYPE_STATE;
3421            } else {
3422              
3423              $self->{state} = BOGUS_MD_STATE;
3424            }
3425          ## Reconsume.          ## Reconsume.
3426          redo A;          redo A;
3427        }        }
# Line 2863  sub _get_next_token ($) { Line 3433  sub _get_next_token ($) {
3433              0x0053, # S              0x0053, # S
3434              0x0054, # T              0x0054, # T
3435              0x0045, # E              0x0045, # E
3436            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3437            $self->{nc} == [            $self->{nc} == [
3438              undef,              undef,
3439              0x0079, # y              0x0079, # y
3440              0x0073, # s              0x0073, # s
3441              0x0074, # t              0x0074, # t
3442              0x0065, # e              0x0065, # e
3443            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3444                    
3445          ## Stay in the state.          ## Stay in the state.
3446          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3447                    
3448      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3449        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2886  sub _get_next_token ($) { Line 3456  sub _get_next_token ($) {
3456      }      }
3457        
3458          redo A;          redo A;
3459        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3460                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3461                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3462                    if ($self->{is_xml} and
3463                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3464              
3465              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3466                              text => 'SYSTEM',
3467                              line => $self->{line_prev},
3468                              column => $self->{column_prev} - 4);
3469            } else {
3470              
3471            }
3472          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3473                    
3474      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2904  sub _get_next_token ($) { Line 3483  sub _get_next_token ($) {
3483        
3484          redo A;          redo A;
3485        } else {        } else {
3486                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3487                          line => $self->{line_prev},                          line => $self->{line_prev},
3488                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3489          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3490              
3491          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3492              $self->{state} = BOGUS_DOCTYPE_STATE;
3493            } else {
3494              
3495              $self->{state} = BOGUS_MD_STATE;
3496            }
3497          ## Reconsume.          ## Reconsume.
3498          redo A;          redo A;
3499        }        }
# Line 2963  sub _get_next_token ($) { Line 3546  sub _get_next_token ($) {
3546        
3547          redo A;          redo A;
3548        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3549          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3550            
3551          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3552          $self->{s_kwd} = '';            
3553              $self->{state} = DATA_STATE;
3554              $self->{s_kwd} = '';
3555              $self->{ct}->{quirks} = 1;
3556            } else {
3557              
3558              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3559            }
3560            
3561                    
3562      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3563        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2979  sub _get_next_token ($) { Line 3569  sub _get_next_token ($) {
3569        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3570      }      }
3571        
3572            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3573          redo A;          redo A;
3574        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3575            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3576              
3577              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3578              $self->{state} = DATA_STATE;
3579              $self->{s_kwd} = '';
3580              $self->{ct}->{quirks} = 1;
3581            } else {
3582              
3583              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3584              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3585            }
3586                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3587          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3588          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3589          redo A;          redo A;
3590        } else {        } elsif ($self->{is_xml} and
3591                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3592                   $self->{nc} == 0x005B) { # [
3593                    
3594            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3595            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3596            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3597            $self->{in_subset} = 1;
3598            
3599        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3600          $self->{line_prev} = $self->{line};
3601          $self->{column_prev} = $self->{column};
3602          $self->{column}++;
3603          $self->{nc}
3604              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3605        } else {
3606          $self->{set_nc}->($self);
3607        }
3608      
3609            return  ($self->{ct}); # DOCTYPE
3610            redo A;
3611          } else {
3612          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3613    
3614          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3615              
3616              $self->{ct}->{quirks} = 1;
3617              $self->{state} = BOGUS_DOCTYPE_STATE;
3618            } else {
3619              
3620              $self->{state} = BOGUS_MD_STATE;
3621            }
3622    
3623                    
3624      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3625        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3032  sub _get_next_token ($) { Line 3650  sub _get_next_token ($) {
3650        
3651          redo A;          redo A;
3652        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3653          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3654    
3655          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3656          $self->{s_kwd} = '';            
3657              $self->{state} = DATA_STATE;
3658              $self->{s_kwd} = '';
3659              $self->{ct}->{quirks} = 1;
3660            } else {
3661              
3662              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3663            }
3664    
3665                    
3666      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3667        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3048  sub _get_next_token ($) { Line 3673  sub _get_next_token ($) {
3673        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3674      }      }
3675        
3676            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3677          redo A;          redo A;
3678        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3679          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3680    
3681          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3682          $self->{s_kwd} = '';            
3683          ## reconsume            $self->{state} = DATA_STATE;
3684              $self->{s_kwd} = '';
3685          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3686            } else {
3687              
3688              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3689            }
3690            
3691            ## Reconsume.
3692          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3693          redo A;          redo A;
3694        } else {        } else {
3695                    
3696          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3697          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3698                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3699    
# Line 3103  sub _get_next_token ($) { Line 3728  sub _get_next_token ($) {
3728        
3729          redo A;          redo A;
3730        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3731          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3732    
3733          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3734          $self->{s_kwd} = '';            
3735              $self->{state} = DATA_STATE;
3736              $self->{s_kwd} = '';
3737              $self->{ct}->{quirks} = 1;
3738            } else {
3739              
3740              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3741            }
3742    
3743                    
3744      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3745        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3119  sub _get_next_token ($) { Line 3751  sub _get_next_token ($) {
3751        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3752      }      }
3753        
3754            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3755          redo A;          redo A;
3756        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3757          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3758    
3759          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3760          $self->{s_kwd} = '';            
3761              $self->{state} = DATA_STATE;
3762              $self->{s_kwd} = '';
3763              $self->{ct}->{quirks} = 1;
3764            } else {
3765              
3766              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3767            }
3768          
3769          ## reconsume          ## reconsume
3770            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3771          redo A;          redo A;
3772        } else {        } else {
3773                    
3774          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3775          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3776                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3777    
# Line 3175  sub _get_next_token ($) { Line 3807  sub _get_next_token ($) {
3807          redo A;          redo A;
3808        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3809                    
3810          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3811          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3812                    
3813      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3191  sub _get_next_token ($) { Line 3823  sub _get_next_token ($) {
3823          redo A;          redo A;
3824        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3825                    
3826          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3827          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3828                    
3829      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3206  sub _get_next_token ($) { Line 3838  sub _get_next_token ($) {
3838        
3839          redo A;          redo A;
3840        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3841            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3842              if ($self->{is_xml}) {
3843                
3844                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3845              } else {
3846                
3847              }
3848              $self->{state} = DATA_STATE;
3849              $self->{s_kwd} = '';
3850            } else {
3851              if ($self->{ct}->{type} == NOTATION_TOKEN) {
3852                
3853              } else {
3854                
3855                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3856              }
3857              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3858            }
3859                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3860                    
3861      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3862        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3220  sub _get_next_token ($) { Line 3868  sub _get_next_token ($) {
3868        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3869      }      }
3870        
3871            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3872          redo A;          redo A;
3873        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3874            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3875              
3876              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3877              
3878              $self->{state} = DATA_STATE;
3879              $self->{s_kwd} = '';
3880              $self->{ct}->{quirks} = 1;
3881            } else {
3882              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3883              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3884            }
3885                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3886          ## reconsume          ## reconsume
3887            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3888          $self->{ct}->{quirks} = 1;          redo A;
3889          } elsif ($self->{is_xml} and
3890                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3891                   $self->{nc} == 0x005B) { # [
3892            
3893            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3894            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3895            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3896            $self->{in_subset} = 1;
3897            
3898        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3899          $self->{line_prev} = $self->{line};
3900          $self->{column_prev} = $self->{column};
3901          $self->{column}++;
3902          $self->{nc}
3903              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3904        } else {
3905          $self->{set_nc}->($self);
3906        }
3907      
3908          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3909          redo A;          redo A;
3910        } else {        } else {
           
3911          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3912    
3913          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3914              
3915              $self->{ct}->{quirks} = 1;
3916              $self->{state} = BOGUS_DOCTYPE_STATE;
3917            } else {
3918              
3919              $self->{state} = BOGUS_MD_STATE;
3920            }
3921    
3922                    
3923      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3924        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3304  sub _get_next_token ($) { Line 3981  sub _get_next_token ($) {
3981        
3982          redo A;          redo A;
3983        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3984          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3985                    
3986      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3987        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3320  sub _get_next_token ($) { Line 3994  sub _get_next_token ($) {
3994      }      }
3995        
3996    
3997          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3998          return  ($self->{ct}); # DOCTYPE            
3999              $self->{state} = DATA_STATE;
4000              $self->{s_kwd} = '';
4001              $self->{ct}->{quirks} = 1;
4002            } else {
4003              
4004              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4005            }
4006    
4007            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4008          redo A;          redo A;
4009        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4010            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4011              
4012              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4013              $self->{state} = DATA_STATE;
4014              $self->{s_kwd} = '';
4015              $self->{ct}->{quirks} = 1;
4016            } else {
4017              
4018              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4019              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4020            }
4021                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4022          ## reconsume          ## reconsume
4023            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4024            redo A;
4025          } elsif ($self->{is_xml} and
4026                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4027                   $self->{nc} == 0x005B) { # [
4028            
4029            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4030    
4031          $self->{ct}->{quirks} = 1;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4032            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4033            $self->{in_subset} = 1;
4034            
4035        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4036          $self->{line_prev} = $self->{line};
4037          $self->{column_prev} = $self->{column};
4038          $self->{column}++;
4039          $self->{nc}
4040              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4041        } else {
4042          $self->{set_nc}->($self);
4043        }
4044      
4045          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4046          redo A;          redo A;
4047        } else {        } else {
           
4048          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4049    
4050          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4051                        
4052              $self->{ct}->{quirks} = 1;
4053              $self->{state} = BOGUS_DOCTYPE_STATE;
4054            } else {
4055              
4056              $self->{state} = BOGUS_MD_STATE;
4057            }
4058    
4059                    
4060      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4061        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3371  sub _get_next_token ($) { Line 4085  sub _get_next_token ($) {
4085      }      }
4086        
4087          redo A;          redo A;
4088        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4089          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4090    
4091          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4092          $self->{s_kwd} = '';            
4093              $self->{state} = DATA_STATE;
4094              $self->{s_kwd} = '';
4095              $self->{ct}->{quirks} = 1;
4096            } else {
4097              
4098              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4099            }
4100            
4101                    
4102      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4103        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3388  sub _get_next_token ($) { Line 4109  sub _get_next_token ($) {
4109        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4110      }      }
4111        
4112            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4113          redo A;          redo A;
4114        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4115          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4116    
4117          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4118          $self->{s_kwd} = '';            
4119              $self->{state} = DATA_STATE;
4120              $self->{s_kwd} = '';
4121              $self->{ct}->{quirks} = 1;
4122            } else {
4123              
4124              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4125            }
4126            
4127          ## reconsume          ## reconsume
4128            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4129          redo A;          redo A;
4130        } else {        } else {
4131                    
4132          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4133          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4134                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4135    
# Line 3442  sub _get_next_token ($) { Line 4163  sub _get_next_token ($) {
4163      }      }
4164        
4165          redo A;          redo A;
4166        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4167                    
4168          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4169    
# Line 3465  sub _get_next_token ($) { Line 4186  sub _get_next_token ($) {
4186    
4187          redo A;          redo A;
4188        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4189          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4190    
4191          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4192          $self->{s_kwd} = '';            
4193          ## reconsume            $self->{state} = DATA_STATE;
4194              $self->{s_kwd} = '';
4195          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4196          return  ($self->{ct}); # DOCTYPE          } else {
4197              
4198              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4199            }
4200    
4201            ## reconsume
4202            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4203          redo A;          redo A;
4204        } else {        } else {
4205                    
4206          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4207          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4208                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4209    
# Line 3499  sub _get_next_token ($) { Line 4223  sub _get_next_token ($) {
4223        }        }
4224      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4225        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4226                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4227          ## Stay in the state            
4228              $self->{state} = BEFORE_NDATA_STATE;
4229            } else {
4230              
4231              ## Stay in the state
4232            }
4233                    
4234      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4235        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3514  sub _get_next_token ($) { Line 4243  sub _get_next_token ($) {
4243        
4244          redo A;          redo A;
4245        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4246            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4247              
4248              $self->{state} = DATA_STATE;
4249              $self->{s_kwd} = '';
4250            } else {
4251              
4252              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4253            }
4254    
4255                    
4256          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4257          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4258          $self->{column_prev} = $self->{column};
4259          $self->{column}++;
4260          $self->{nc}
4261              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4262        } else {
4263          $self->{set_nc}->($self);
4264        }
4265      
4266            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4267            redo A;
4268          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4269                   ($self->{nc} == 0x004E or # N
4270                    $self->{nc} == 0x006E)) { # n
4271            
4272            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4273            $self->{state} = NDATA_STATE;
4274            $self->{kwd} = chr $self->{nc};
4275                    
4276      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4277        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3528  sub _get_next_token ($) { Line 4283  sub _get_next_token ($) {
4283        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4284      }      }
4285        
4286            redo A;
4287          } elsif ($self->{nc} == -1) {
4288            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4289              
4290              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4291              $self->{state} = DATA_STATE;
4292              $self->{s_kwd} = '';
4293              $self->{ct}->{quirks} = 1;
4294            } else {
4295              
4296              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4297              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4298            }
4299    
4300            ## reconsume
4301            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4302            redo A;
4303          } elsif ($self->{is_xml} and
4304                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4305                   $self->{nc} == 0x005B) { # [
4306            
4307            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4308            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4309            $self->{in_subset} = 1;
4310            
4311        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4312          $self->{line_prev} = $self->{line};
4313          $self->{column_prev} = $self->{column};
4314          $self->{column}++;
4315          $self->{nc}
4316              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4317        } else {
4318          $self->{set_nc}->($self);
4319        }
4320      
4321          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4322            redo A;
4323          } else {
4324            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4325    
4326            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4327              
4328              #$self->{ct}->{quirks} = 1;
4329              $self->{state} = BOGUS_DOCTYPE_STATE;
4330            } else {
4331              
4332              $self->{state} = BOGUS_MD_STATE;
4333            }
4334    
4335            
4336        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4337          $self->{line_prev} = $self->{line};
4338          $self->{column_prev} = $self->{column};
4339          $self->{column}++;
4340          $self->{nc}
4341              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4342        } else {
4343          $self->{set_nc}->($self);
4344        }
4345      
4346            redo A;
4347          }
4348        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4349          if ($is_space->{$self->{nc}}) {
4350            
4351            ## Stay in the state.
4352            
4353        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4354          $self->{line_prev} = $self->{line};
4355          $self->{column_prev} = $self->{column};
4356          $self->{column}++;
4357          $self->{nc}
4358              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4359        } else {
4360          $self->{set_nc}->($self);
4361        }
4362      
4363            redo A;
4364          } elsif ($self->{nc} == 0x003E) { # >
4365            
4366            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4367            
4368        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4369          $self->{line_prev} = $self->{line};
4370          $self->{column_prev} = $self->{column};
4371          $self->{column}++;
4372          $self->{nc}
4373              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4374        } else {
4375          $self->{set_nc}->($self);
4376        }
4377      
4378            return  ($self->{ct}); # ENTITY
4379            redo A;
4380          } elsif ($self->{nc} == 0x004E or # N
4381                   $self->{nc} == 0x006E) { # n
4382            
4383            $self->{state} = NDATA_STATE;
4384            $self->{kwd} = chr $self->{nc};
4385            
4386        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4387          $self->{line_prev} = $self->{line};
4388          $self->{column_prev} = $self->{column};
4389          $self->{column}++;
4390          $self->{nc}
4391              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4392        } else {
4393          $self->{set_nc}->($self);
4394        }
4395      
4396          redo A;          redo A;
4397        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4398                    
4399          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4400          $self->{state} = DATA_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
         $self->{s_kwd} = '';  
4401          ## reconsume          ## reconsume
4402            return  ($self->{ct}); # ENTITY
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4403          redo A;          redo A;
4404        } else {        } else {
4405                    
4406          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4407          #$self->{ct}->{quirks} = 1;          $self->{state} = BOGUS_MD_STATE;
   
         $self->{state} = BOGUS_DOCTYPE_STATE;  
4408                    
4409      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4410        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3582  sub _get_next_token ($) { Line 4438  sub _get_next_token ($) {
4438          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4439    
4440          redo A;          redo A;
4441          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4442            
4443            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4444            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4445            $self->{in_subset} = 1;
4446            
4447        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4448          $self->{line_prev} = $self->{line};
4449          $self->{column_prev} = $self->{column};
4450          $self->{column}++;
4451          $self->{nc}
4452              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4453        } else {
4454          $self->{set_nc}->($self);
4455        }
4456      
4457            return  ($self->{ct}); # DOCTYPE
4458            redo A;
4459        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4460                    
4461          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3594  sub _get_next_token ($) { Line 4468  sub _get_next_token ($) {
4468        } else {        } else {
4469                    
4470          my $s = '';          my $s = '';
4471          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4472    
4473          ## Stay in the state          ## Stay in the state
4474                    
# Line 3614  sub _get_next_token ($) { Line 4488  sub _get_next_token ($) {
4488        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
4489        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
4490        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
4491    
4492          ## XML5: "CDATA state".
4493                
4494        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4495                    
# Line 3631  sub _get_next_token ($) { Line 4507  sub _get_next_token ($) {
4507        
4508          redo A;          redo A;
4509        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4510            if ($self->{is_xml}) {
4511              
4512              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
4513            } else {
4514              
4515            }
4516    
4517          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4518          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4519                    ## Reconsume.
     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {  
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
4520          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
4521                        
4522            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3676  sub _get_next_token ($) { Line 4549  sub _get_next_token ($) {
4549    
4550        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
4551      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
4552          ## XML5: "CDATA bracket state".
4553    
4554        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4555                    
4556          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3693  sub _get_next_token ($) { Line 4568  sub _get_next_token ($) {
4568          redo A;          redo A;
4569        } else {        } else {
4570                    
4571            ## XML5: If EOF, "]" is not appended and changed to the data state.
4572          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
4573          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
4574          ## Reconsume.          ## Reconsume.
4575          redo A;          redo A;
4576        }        }
4577      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
4578          ## XML5: "CDATA end state".
4579    
4580        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4581          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4582          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 3741  sub _get_next_token ($) { Line 4619  sub _get_next_token ($) {
4619                    
4620          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
4621          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
4622          ## Reconsume.          ## Reconsume. ## XML5: Emit.
4623          redo A;          redo A;
4624        }        }
4625      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3750  sub _get_next_token ($) { Line 4628  sub _get_next_token ($) {
4628              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4629              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4630            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4631                    if ($self->{is_xml}) {
4632              
4633              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4634                              line => $self->{line_prev},
4635                              column => $self->{column_prev}
4636                                  + ($self->{nc} == -1 ? 1 : 0));
4637            } else {
4638              
4639              ## No error
4640            }
4641          ## Don't consume          ## Don't consume
         ## No error  
4642          ## Return nothing.          ## Return nothing.
4643          #          #
4644        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4645                    
4646          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4647          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4648                    
4649      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4650        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3771  sub _get_next_token ($) { Line 4657  sub _get_next_token ($) {
4657      }      }
4658        
4659          redo A;          redo A;
4660        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4661                   (0x0041 <= $self->{nc} and
4662                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4663                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4664                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
4665                    
4666          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4667          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4668          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4669          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4670          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4671                    
4672      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3825  sub _get_next_token ($) { Line 4712  sub _get_next_token ($) {
4712          redo A;          redo A;
4713        }        }
4714      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4715        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
           $self->{nc} == 0x0058) { # X  
4716                    
4717          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4718          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4719            
4720        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4721          $self->{line_prev} = $self->{line};
4722          $self->{column_prev} = $self->{column};
4723          $self->{column}++;
4724          $self->{nc}
4725              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4726        } else {
4727          $self->{set_nc}->($self);
4728        }
4729      
4730            redo A;
4731          } elsif ($self->{nc} == 0x0058) { # X
4732            
4733            if ($self->{is_xml}) {
4734              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4735            }
4736            $self->{state} = HEXREF_X_STATE;
4737            $self->{kwd} .= chr $self->{nc};
4738                    
4739      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4740        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3846  sub _get_next_token ($) { Line 4751  sub _get_next_token ($) {
4751                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4752                    
4753          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4754          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4755                    
4756      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4757        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3892  sub _get_next_token ($) { Line 4797  sub _get_next_token ($) {
4797        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4798            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4799                    
4800          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4801          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4802                    
4803          ## Stay in the state.          ## Stay in the state.
4804                    
# Line 3929  sub _get_next_token ($) { Line 4834  sub _get_next_token ($) {
4834          #          #
4835        }        }
4836    
4837        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4838        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4839        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4840        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 3952  sub _get_next_token ($) { Line 4857  sub _get_next_token ($) {
4857          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4858          ## Reconsume.          ## Reconsume.
4859          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4860                      has_reference => 1,
4861                    line => $l, column => $c,                    line => $l, column => $c,
4862                   });                   });
4863          redo A;          redo A;
# Line 3971  sub _get_next_token ($) { Line 4877  sub _get_next_token ($) {
4877          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4878                    
4879          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4880          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4881          ## Reconsume.          ## Reconsume.
4882          redo A;          redo A;
4883        } else {        } else {
# Line 3989  sub _get_next_token ($) { Line 4895  sub _get_next_token ($) {
4895            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4896            ## Reconsume.            ## Reconsume.
4897            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4898                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4899                      line => $self->{line_prev},                      line => $self->{line_prev},
4900                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4901                     });                     });
4902            redo A;            redo A;
4903          } else {          } else {
4904                        
4905            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4906            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4907            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4908            ## Reconsume.            ## Reconsume.
# Line 4007  sub _get_next_token ($) { Line 4913  sub _get_next_token ($) {
4913        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4914          # 0..9          # 0..9
4915                    
4916          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4917          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4918          ## Stay in the state.          ## Stay in the state.
4919                    
4920      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4025  sub _get_next_token ($) { Line 4931  sub _get_next_token ($) {
4931        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4932                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4933                    
4934          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4935          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4936          ## Stay in the state.          ## Stay in the state.
4937                    
4938      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4043  sub _get_next_token ($) { Line 4949  sub _get_next_token ($) {
4949        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4950                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4951                    
4952          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4953          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4954          ## Stay in the state.          ## Stay in the state.
4955                    
4956      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4081  sub _get_next_token ($) { Line 4987  sub _get_next_token ($) {
4987          #          #
4988        }        }
4989    
4990        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4991        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4992        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4993        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4104  sub _get_next_token ($) { Line 5010  sub _get_next_token ($) {
5010          $self->{s_kwd} = '';          $self->{s_kwd} = '';
5011          ## Reconsume.          ## Reconsume.
5012          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
5013                      has_reference => 1,
5014                    line => $l, column => $c,                    line => $l, column => $c,
5015                   });                   });
5016          redo A;          redo A;
# Line 4117  sub _get_next_token ($) { Line 5024  sub _get_next_token ($) {
5024          redo A;          redo A;
5025        }        }
5026      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5027        if (length $self->{s_kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5028            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5029            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5030              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5031             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5032              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5033             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5034              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5035             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5036                    {
5037                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5038                      $self->{entity_add} => 1,
5039                    }->{$self->{nc}}))) {
5040          our $EntityChar;          our $EntityChar;
5041          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5042          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5043                $self->{ge}->{$self->{kwd}}) {
5044            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5045                            if (defined $self->{ge}->{$self->{kwd}}) {
5046              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5047                    
5048                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5049                  } else {
5050                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5051                      
5052                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5053                                      value => $self->{kwd});
5054                    } else {
5055                      
5056                    }
5057                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5058                  }
5059                } else {
5060                  if ($self->{is_xml}) {
5061                    
5062                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5063                                    value => $self->{kwd},
5064                                    level => {
5065                                              'amp;' => $self->{level}->{warn},
5066                                              'quot;' => $self->{level}->{warn},
5067                                              'lt;' => $self->{level}->{warn},
5068                                              'gt;' => $self->{level}->{warn},
5069                                              'apos;' => $self->{level}->{warn},
5070                                             }->{$self->{kwd}} ||
5071                                             $self->{level}->{must});
5072                  } else {
5073                    
5074                  }
5075                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5076                }
5077              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5078                            
5079      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4147  sub _get_next_token ($) { Line 5089  sub _get_next_token ($) {
5089              #              #
5090            } else {            } else {
5091                            
5092              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
5093              $self->{entity__match} = -1;              $self->{entity__match} = -1;
5094              ## Stay in the state.              ## Stay in the state.
5095                            
# Line 4195  sub _get_next_token ($) { Line 5137  sub _get_next_token ($) {
5137          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
5138              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
5139                        
5140            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
5141            #            #
5142          } else {          } else {
5143                        
# Line 4207  sub _get_next_token ($) { Line 5149  sub _get_next_token ($) {
5149                    
5150          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
5151                          line => $self->{line_prev},                          line => $self->{line_prev},
5152                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
5153          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
5154          #          #
5155        }        }
5156        
# Line 4229  sub _get_next_token ($) { Line 5171  sub _get_next_token ($) {
5171          ## Reconsume.          ## Reconsume.
5172          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
5173                    data => $data,                    data => $data,
5174                      has_reference => $has_ref,
5175                    line => $self->{line_prev},                    line => $self->{line_prev},
5176                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
5177                   });                   });
5178          redo A;          redo A;
5179        } else {        } else {
# Line 4242  sub _get_next_token ($) { Line 5185  sub _get_next_token ($) {
5185          ## Reconsume.          ## Reconsume.
5186          redo A;          redo A;
5187        }        }
5188    
5189        ## XML-only states
5190    
5191        } elsif ($self->{state} == PI_STATE) {
5192          ## XML5: "Pi state" and "DOCTYPE pi state".
5193    
5194          if ($is_space->{$self->{nc}} or
5195              $self->{nc} == 0x003F or # ?
5196              $self->{nc} == -1) {
5197            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5198            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5199            ## "DOCTYPE pi state": Parse error, switch to the "data
5200            ## state".
5201            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5202                            line => $self->{line_prev},
5203                            column => $self->{column_prev}
5204                                - 1 * ($self->{nc} != -1));
5205            $self->{state} = BOGUS_COMMENT_STATE;
5206            ## Reconsume.
5207            $self->{ct} = {type => COMMENT_TOKEN,
5208                           data => '?',
5209                           line => $self->{line_prev},
5210                           column => $self->{column_prev}
5211                               - 1 * ($self->{nc} != -1),
5212                          };
5213            redo A;
5214          } else {
5215            ## XML5: "DOCTYPE pi state": Stay in the state.
5216            $self->{ct} = {type => PI_TOKEN,
5217                           target => chr $self->{nc},
5218                           data => '',
5219                           line => $self->{line_prev},
5220                           column => $self->{column_prev} - 1,
5221                          };
5222            $self->{state} = PI_TARGET_STATE;
5223            
5224        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5225          $self->{line_prev} = $self->{line};
5226          $self->{column_prev} = $self->{column};
5227          $self->{column}++;
5228          $self->{nc}
5229              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5230        } else {
5231          $self->{set_nc}->($self);
5232        }
5233      
5234            redo A;
5235          }
5236        } elsif ($self->{state} == PI_TARGET_STATE) {
5237          if ($is_space->{$self->{nc}}) {
5238            $self->{state} = PI_TARGET_AFTER_STATE;
5239            
5240        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5241          $self->{line_prev} = $self->{line};
5242          $self->{column_prev} = $self->{column};
5243          $self->{column}++;
5244          $self->{nc}
5245              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5246        } else {
5247          $self->{set_nc}->($self);
5248        }
5249      
5250            redo A;
5251          } elsif ($self->{nc} == -1) {
5252            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5253            if ($self->{in_subset}) {
5254              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5255            } else {
5256              $self->{state} = DATA_STATE;
5257              $self->{s_kwd} = '';
5258            }
5259            ## Reconsume.
5260            return  ($self->{ct}); # pi
5261            redo A;
5262          } elsif ($self->{nc} == 0x003F) { # ?
5263            $self->{state} = PI_AFTER_STATE;
5264            
5265        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5266          $self->{line_prev} = $self->{line};
5267          $self->{column_prev} = $self->{column};
5268          $self->{column}++;
5269          $self->{nc}
5270              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5271        } else {
5272          $self->{set_nc}->($self);
5273        }
5274      
5275            redo A;
5276          } else {
5277            ## XML5: typo ("tag name" -> "target")
5278            $self->{ct}->{target} .= chr $self->{nc}; # pi
5279            
5280        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5281          $self->{line_prev} = $self->{line};
5282          $self->{column_prev} = $self->{column};
5283          $self->{column}++;
5284          $self->{nc}
5285              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5286        } else {
5287          $self->{set_nc}->($self);
5288        }
5289      
5290            redo A;
5291          }
5292        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
5293          if ($is_space->{$self->{nc}}) {
5294            ## Stay in the state.
5295            
5296        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5297          $self->{line_prev} = $self->{line};
5298          $self->{column_prev} = $self->{column};
5299          $self->{column}++;
5300          $self->{nc}
5301              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5302        } else {
5303          $self->{set_nc}->($self);
5304        }
5305      
5306            redo A;
5307          } else {
5308            $self->{state} = PI_DATA_STATE;
5309            ## Reprocess.
5310            redo A;
5311          }
5312        } elsif ($self->{state} == PI_DATA_STATE) {
5313          if ($self->{nc} == 0x003F) { # ?
5314            $self->{state} = PI_DATA_AFTER_STATE;
5315            
5316        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5317          $self->{line_prev} = $self->{line};
5318          $self->{column_prev} = $self->{column};
5319          $self->{column}++;
5320          $self->{nc}
5321              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5322        } else {
5323          $self->{set_nc}->($self);
5324        }
5325      
5326            redo A;
5327          } elsif ($self->{nc} == -1) {
5328            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5329            if ($self->{in_subset}) {
5330              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5331            } else {
5332              $self->{state} = DATA_STATE;
5333              $self->{s_kwd} = '';
5334            }
5335            ## Reprocess.
5336            return  ($self->{ct}); # pi
5337            redo A;
5338          } else {
5339            $self->{ct}->{data} .= chr $self->{nc}; # pi
5340            $self->{read_until}->($self->{ct}->{data}, q[?],
5341                                  length $self->{ct}->{data});
5342            ## Stay in the state.
5343            
5344        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5345          $self->{line_prev} = $self->{line};
5346          $self->{column_prev} = $self->{column};
5347          $self->{column}++;
5348          $self->{nc}
5349              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5350        } else {
5351          $self->{set_nc}->($self);
5352        }
5353      
5354            ## Reprocess.
5355            redo A;
5356          }
5357        } elsif ($self->{state} == PI_AFTER_STATE) {
5358          ## XML5: Part of "Pi after state".
5359    
5360          if ($self->{nc} == 0x003E) { # >
5361            if ($self->{in_subset}) {
5362              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5363            } else {
5364              $self->{state} = DATA_STATE;
5365              $self->{s_kwd} = '';
5366            }
5367            
5368        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5369          $self->{line_prev} = $self->{line};
5370          $self->{column_prev} = $self->{column};
5371          $self->{column}++;
5372          $self->{nc}
5373              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5374        } else {
5375          $self->{set_nc}->($self);
5376        }
5377      
5378            return  ($self->{ct}); # pi
5379            redo A;
5380          } elsif ($self->{nc} == 0x003F) { # ?
5381            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5382                            line => $self->{line_prev},
5383                            column => $self->{column_prev}); ## XML5: no error
5384            $self->{ct}->{data} .= '?';
5385            $self->{state} = PI_DATA_AFTER_STATE;
5386            
5387        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5388          $self->{line_prev} = $self->{line};
5389          $self->{column_prev} = $self->{column};
5390          $self->{column}++;
5391          $self->{nc}
5392              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5393        } else {
5394          $self->{set_nc}->($self);
5395        }
5396      
5397            redo A;
5398          } else {
5399            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5400                            line => $self->{line_prev},
5401                            column => $self->{column_prev}
5402                                + 1 * ($self->{nc} == -1)); ## XML5: no error
5403            $self->{ct}->{data} .= '?'; ## XML5: not appended
5404            $self->{state} = PI_DATA_STATE;
5405            ## Reprocess.
5406            redo A;
5407          }
5408        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5409          ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5410    
5411          if ($self->{nc} == 0x003E) { # >
5412            if ($self->{in_subset}) {
5413              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5414            } else {
5415              $self->{state} = DATA_STATE;
5416              $self->{s_kwd} = '';
5417            }
5418            
5419        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5420          $self->{line_prev} = $self->{line};
5421          $self->{column_prev} = $self->{column};
5422          $self->{column}++;
5423          $self->{nc}
5424              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5425        } else {
5426          $self->{set_nc}->($self);
5427        }
5428      
5429            return  ($self->{ct}); # pi
5430            redo A;
5431          } elsif ($self->{nc} == 0x003F) { # ?
5432            $self->{ct}->{data} .= '?';
5433            ## Stay in the state.
5434            
5435        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5436          $self->{line_prev} = $self->{line};
5437          $self->{column_prev} = $self->{column};
5438          $self->{column}++;
5439          $self->{nc}
5440              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5441        } else {
5442          $self->{set_nc}->($self);
5443        }
5444      
5445            redo A;
5446          } else {
5447            $self->{ct}->{data} .= '?'; ## XML5: not appended
5448            $self->{state} = PI_DATA_STATE;
5449            ## Reprocess.
5450            redo A;
5451          }
5452    
5453        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5454          if ($self->{nc} == 0x003C) { # <
5455            $self->{state} = DOCTYPE_TAG_STATE;
5456            
5457        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5458          $self->{line_prev} = $self->{line};
5459          $self->{column_prev} = $self->{column};
5460          $self->{column}++;
5461          $self->{nc}
5462              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5463        } else {
5464          $self->{set_nc}->($self);
5465        }
5466      
5467            redo A;
5468          } elsif ($self->{nc} == 0x0025) { # %
5469            ## XML5: Not defined yet.
5470    
5471            ## TODO:
5472            
5473        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5474          $self->{line_prev} = $self->{line};
5475          $self->{column_prev} = $self->{column};
5476          $self->{column}++;
5477          $self->{nc}
5478              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5479        } else {
5480          $self->{set_nc}->($self);
5481        }
5482      
5483            redo A;
5484          } elsif ($self->{nc} == 0x005D) { # ]
5485            delete $self->{in_subset};
5486            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5487            
5488        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5489          $self->{line_prev} = $self->{line};
5490          $self->{column_prev} = $self->{column};
5491          $self->{column}++;
5492          $self->{nc}
5493              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5494        } else {
5495          $self->{set_nc}->($self);
5496        }
5497      
5498            redo A;
5499          } elsif ($is_space->{$self->{nc}}) {
5500            ## Stay in the state.
5501            
5502        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5503          $self->{line_prev} = $self->{line};
5504          $self->{column_prev} = $self->{column};
5505          $self->{column}++;
5506          $self->{nc}
5507              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5508        } else {
5509          $self->{set_nc}->($self);
5510        }
5511      
5512            redo A;
5513          } elsif ($self->{nc} == -1) {
5514            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5515            delete $self->{in_subset};
5516            $self->{state} = DATA_STATE;
5517            $self->{s_kwd} = '';
5518            ## Reconsume.
5519            return  ({type => END_OF_DOCTYPE_TOKEN});
5520            redo A;
5521          } else {
5522            unless ($self->{internal_subset_tainted}) {
5523              ## XML5: No parse error.
5524              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5525              $self->{internal_subset_tainted} = 1;
5526            }
5527            ## Stay in the state.
5528            
5529        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5530          $self->{line_prev} = $self->{line};
5531          $self->{column_prev} = $self->{column};
5532          $self->{column}++;
5533          $self->{nc}
5534              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5535        } else {
5536          $self->{set_nc}->($self);
5537        }
5538      
5539            redo A;
5540          }
5541        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5542          if ($self->{nc} == 0x003E) { # >
5543            $self->{state} = DATA_STATE;
5544            $self->{s_kwd} = '';
5545            
5546        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5547          $self->{line_prev} = $self->{line};
5548          $self->{column_prev} = $self->{column};
5549          $self->{column}++;
5550          $self->{nc}
5551              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5552        } else {
5553          $self->{set_nc}->($self);
5554        }
5555      
5556            return  ({type => END_OF_DOCTYPE_TOKEN});
5557            redo A;
5558          } elsif ($self->{nc} == -1) {
5559            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5560            $self->{state} = DATA_STATE;
5561            $self->{s_kwd} = '';
5562            ## Reconsume.
5563            return  ({type => END_OF_DOCTYPE_TOKEN});
5564            redo A;
5565          } else {
5566            ## XML5: No parse error and stay in the state.
5567            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5568    
5569            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5570            
5571        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5572          $self->{line_prev} = $self->{line};
5573          $self->{column_prev} = $self->{column};
5574          $self->{column}++;
5575          $self->{nc}
5576              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5577        } else {
5578          $self->{set_nc}->($self);
5579        }
5580      
5581            redo A;
5582          }
5583        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5584          if ($self->{nc} == 0x003E) { # >
5585            $self->{state} = DATA_STATE;
5586            $self->{s_kwd} = '';
5587            
5588        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5589          $self->{line_prev} = $self->{line};
5590          $self->{column_prev} = $self->{column};
5591          $self->{column}++;
5592          $self->{nc}
5593              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5594        } else {
5595          $self->{set_nc}->($self);
5596        }
5597      
5598            return  ({type => END_OF_DOCTYPE_TOKEN});
5599            redo A;
5600          } elsif ($self->{nc} == -1) {
5601            $self->{state} = DATA_STATE;
5602            $self->{s_kwd} = '';
5603            ## Reconsume.
5604            return  ({type => END_OF_DOCTYPE_TOKEN});
5605            redo A;
5606          } else {
5607            ## Stay in the state.
5608            
5609        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5610          $self->{line_prev} = $self->{line};
5611          $self->{column_prev} = $self->{column};
5612          $self->{column}++;
5613          $self->{nc}
5614              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5615        } else {
5616          $self->{set_nc}->($self);
5617        }
5618      
5619            redo A;
5620          }
5621        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5622          if ($self->{nc} == 0x0021) { # !
5623            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5624            
5625        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5626          $self->{line_prev} = $self->{line};
5627          $self->{column_prev} = $self->{column};
5628          $self->{column}++;
5629          $self->{nc}
5630              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5631        } else {
5632          $self->{set_nc}->($self);
5633        }
5634      
5635            redo A;
5636          } elsif ($self->{nc} == 0x003F) { # ?
5637            $self->{state} = PI_STATE;
5638            
5639        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5640          $self->{line_prev} = $self->{line};
5641          $self->{column_prev} = $self->{column};
5642          $self->{column}++;
5643          $self->{nc}
5644              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5645        } else {
5646          $self->{set_nc}->($self);
5647        }
5648      
5649            redo A;
5650          } elsif ($self->{nc} == -1) {
5651            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5652            $self->{state} = DATA_STATE;
5653            $self->{s_kwd} = '';
5654            ## Reconsume.
5655            redo A;
5656          } else {
5657            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5658                            line => $self->{line_prev},
5659                            column => $self->{column_prev});
5660            $self->{state} = BOGUS_COMMENT_STATE;
5661            $self->{ct} = {type => COMMENT_TOKEN,
5662                           data => '',
5663                          }; ## NOTE: Will be discarded.
5664            
5665        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5666          $self->{line_prev} = $self->{line};
5667          $self->{column_prev} = $self->{column};
5668          $self->{column}++;
5669          $self->{nc}
5670              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5671        } else {
5672          $self->{set_nc}->($self);
5673        }
5674      
5675            redo A;
5676          }
5677        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5678          ## XML5: "DOCTYPE markup declaration state".
5679          
5680          if ($self->{nc} == 0x002D) { # -
5681            $self->{state} = MD_HYPHEN_STATE;
5682            
5683        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5684          $self->{line_prev} = $self->{line};
5685          $self->{column_prev} = $self->{column};
5686          $self->{column}++;
5687          $self->{nc}
5688              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5689        } else {
5690          $self->{set_nc}->($self);
5691        }
5692      
5693            redo A;
5694          } elsif ($self->{nc} == 0x0045 or # E
5695                   $self->{nc} == 0x0065) { # e
5696            $self->{state} = MD_E_STATE;
5697            $self->{kwd} = chr $self->{nc};
5698            
5699        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5700          $self->{line_prev} = $self->{line};
5701          $self->{column_prev} = $self->{column};
5702          $self->{column}++;
5703          $self->{nc}
5704              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5705        } else {
5706          $self->{set_nc}->($self);
5707        }
5708      
5709            redo A;
5710          } elsif ($self->{nc} == 0x0041 or # A
5711                   $self->{nc} == 0x0061) { # a
5712            $self->{state} = MD_ATTLIST_STATE;
5713            $self->{kwd} = chr $self->{nc};
5714            
5715        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5716          $self->{line_prev} = $self->{line};
5717          $self->{column_prev} = $self->{column};
5718          $self->{column}++;
5719          $self->{nc}
5720              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5721        } else {
5722          $self->{set_nc}->($self);
5723        }
5724      
5725            redo A;
5726          } elsif ($self->{nc} == 0x004E or # N
5727                   $self->{nc} == 0x006E) { # n
5728            $self->{state} = MD_NOTATION_STATE;
5729            $self->{kwd} = chr $self->{nc};
5730            
5731        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5732          $self->{line_prev} = $self->{line};
5733          $self->{column_prev} = $self->{column};
5734          $self->{column}++;
5735          $self->{nc}
5736              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5737        } else {
5738          $self->{set_nc}->($self);
5739        }
5740      
5741            redo A;
5742          } else {
5743            #
5744          }
5745          
5746          ## XML5: No parse error.
5747          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5748                          line => $self->{line_prev},
5749                          column => $self->{column_prev} - 1);
5750          ## Reconsume.
5751          $self->{state} = BOGUS_COMMENT_STATE;
5752          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5753          redo A;
5754        } elsif ($self->{state} == MD_E_STATE) {
5755          if ($self->{nc} == 0x004E or # N
5756              $self->{nc} == 0x006E) { # n
5757            $self->{state} = MD_ENTITY_STATE;
5758            $self->{kwd} .= chr $self->{nc};
5759            
5760        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5761          $self->{line_prev} = $self->{line};
5762          $self->{column_prev} = $self->{column};
5763          $self->{column}++;
5764          $self->{nc}
5765              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5766        } else {
5767          $self->{set_nc}->($self);
5768        }
5769      
5770            redo A;
5771          } elsif ($self->{nc} == 0x004C or # L
5772                   $self->{nc} == 0x006C) { # l
5773            ## XML5: <!ELEMENT> not supported.
5774            $self->{state} = MD_ELEMENT_STATE;
5775            $self->{kwd} .= chr $self->{nc};
5776            
5777        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5778          $self->{line_prev} = $self->{line};
5779          $self->{column_prev} = $self->{column};
5780          $self->{column}++;
5781          $self->{nc}
5782              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5783        } else {
5784          $self->{set_nc}->($self);
5785        }
5786      
5787            redo A;
5788          } else {
5789            ## XML5: No parse error.
5790            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5791                            line => $self->{line_prev},
5792                            column => $self->{column_prev} - 2
5793                                + 1 * ($self->{nc} == -1));
5794            ## Reconsume.
5795            $self->{state} = BOGUS_COMMENT_STATE;
5796            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5797            redo A;
5798          }
5799        } elsif ($self->{state} == MD_ENTITY_STATE) {
5800          if ($self->{nc} == [
5801                undef,
5802                undef,
5803                0x0054, # T
5804                0x0049, # I
5805                0x0054, # T
5806              ]->[length $self->{kwd}] or
5807              $self->{nc} == [
5808                undef,
5809                undef,
5810                0x0074, # t
5811                0x0069, # i
5812                0x0074, # t
5813              ]->[length $self->{kwd}]) {
5814            ## Stay in the state.
5815            $self->{kwd} .= chr $self->{nc};
5816            
5817        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5818          $self->{line_prev} = $self->{line};
5819          $self->{column_prev} = $self->{column};
5820          $self->{column}++;
5821          $self->{nc}
5822              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5823        } else {
5824          $self->{set_nc}->($self);
5825        }
5826      
5827            redo A;
5828          } elsif ((length $self->{kwd}) == 5 and
5829                   ($self->{nc} == 0x0059 or # Y
5830                    $self->{nc} == 0x0079)) { # y
5831            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5832              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5833                              text => 'ENTITY',
5834                              line => $self->{line_prev},
5835                              column => $self->{column_prev} - 4);
5836            }
5837            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5838                           line => $self->{line_prev},
5839                           column => $self->{column_prev} - 6};
5840            $self->{state} = DOCTYPE_MD_STATE;
5841            
5842        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5843          $self->{line_prev} = $self->{line};
5844          $self->{column_prev} = $self->{column};
5845          $self->{column}++;
5846          $self->{nc}
5847              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5848        } else {
5849          $self->{set_nc}->($self);
5850        }
5851      
5852            redo A;
5853          } else {
5854            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5855                            line => $self->{line_prev},
5856                            column => $self->{column_prev} - 1
5857                                - (length $self->{kwd})
5858                                + 1 * ($self->{nc} == -1));
5859            $self->{state} = BOGUS_COMMENT_STATE;
5860            ## Reconsume.
5861            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5862            redo A;
5863          }
5864        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5865          if ($self->{nc} == [
5866               undef,
5867               undef,
5868               0x0045, # E
5869               0x004D, # M
5870               0x0045, # E
5871               0x004E, # N
5872              ]->[length $self->{kwd}] or
5873              $self->{nc} == [
5874               undef,
5875               undef,
5876               0x0065, # e
5877               0x006D, # m
5878               0x0065, # e
5879               0x006E, # n
5880              ]->[length $self->{kwd}]) {
5881            ## Stay in the state.
5882            $self->{kwd} .= chr $self->{nc};
5883            
5884        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5885          $self->{line_prev} = $self->{line};
5886          $self->{column_prev} = $self->{column};
5887          $self->{column}++;
5888          $self->{nc}
5889              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5890        } else {
5891          $self->{set_nc}->($self);
5892        }
5893      
5894            redo A;
5895          } elsif ((length $self->{kwd}) == 6 and
5896                   ($self->{nc} == 0x0054 or # T
5897                    $self->{nc} == 0x0074)) { # t
5898            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5899              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5900                              text => 'ELEMENT',
5901                              line => $self->{line_prev},
5902                              column => $self->{column_prev} - 5);
5903            }
5904            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5905                           line => $self->{line_prev},
5906                           column => $self->{column_prev} - 7};
5907            $self->{state} = DOCTYPE_MD_STATE;
5908            
5909        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5910          $self->{line_prev} = $self->{line};
5911          $self->{column_prev} = $self->{column};
5912          $self->{column}++;
5913          $self->{nc}
5914              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5915        } else {
5916          $self->{set_nc}->($self);
5917        }
5918      
5919            redo A;
5920          } else {
5921            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5922                            line => $self->{line_prev},
5923                            column => $self->{column_prev} - 1
5924                                - (length $self->{kwd})
5925                                + 1 * ($self->{nc} == -1));
5926            $self->{state} = BOGUS_COMMENT_STATE;
5927            ## Reconsume.
5928            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5929            redo A;
5930          }
5931        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5932          if ($self->{nc} == [
5933               undef,
5934               0x0054, # T
5935               0x0054, # T
5936               0x004C, # L
5937               0x0049, # I
5938               0x0053, # S
5939              ]->[length $self->{kwd}] or
5940              $self->{nc} == [
5941               undef,
5942               0x0074, # t
5943               0x0074, # t
5944               0x006C, # l
5945               0x0069, # i
5946               0x0073, # s
5947              ]->[length $self->{kwd}]) {
5948            ## Stay in the state.
5949            $self->{kwd} .= chr $self->{nc};
5950            
5951        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5952          $self->{line_prev} = $self->{line};
5953          $self->{column_prev} = $self->{column};
5954          $self->{column}++;
5955          $self->{nc}
5956              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5957        } else {
5958          $self->{set_nc}->($self);
5959        }
5960      
5961            redo A;
5962          } elsif ((length $self->{kwd}) == 6 and
5963                   ($self->{nc} == 0x0054 or # T
5964                    $self->{nc} == 0x0074)) { # t
5965            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
5966              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5967                              text => 'ATTLIST',
5968                              line => $self->{line_prev},
5969                              column => $self->{column_prev} - 5);
5970            }
5971            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5972                           attrdefs => [],
5973                           line => $self->{line_prev},
5974                           column => $self->{column_prev} - 7};
5975            $self->{state} = DOCTYPE_MD_STATE;
5976            
5977        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5978          $self->{line_prev} = $self->{line};
5979          $self->{column_prev} = $self->{column};
5980          $self->{column}++;
5981          $self->{nc}
5982              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5983        } else {
5984          $self->{set_nc}->($self);
5985        }
5986      
5987            redo A;
5988          } else {
5989            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5990                            line => $self->{line_prev},
5991                            column => $self->{column_prev} - 1
5992                                 - (length $self->{kwd})
5993                                 + 1 * ($self->{nc} == -1));
5994            $self->{state} = BOGUS_COMMENT_STATE;
5995            ## Reconsume.
5996            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5997            redo A;
5998          }
5999        } elsif ($self->{state} == MD_NOTATION_STATE) {
6000          if ($self->{nc} == [
6001               undef,
6002               0x004F, # O
6003               0x0054, # T
6004               0x0041, # A
6005               0x0054, # T
6006               0x0049, # I
6007               0x004F, # O
6008              ]->[length $self->{kwd}] or
6009              $self->{nc} == [
6010               undef,
6011               0x006F, # o
6012               0x0074, # t
6013               0x0061, # a
6014               0x0074, # t
6015               0x0069, # i
6016               0x006F, # o
6017              ]->[length $self->{kwd}]) {
6018            ## Stay in the state.
6019            $self->{kwd} .= chr $self->{nc};
6020            
6021        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6022          $self->{line_prev} = $self->{line};
6023          $self->{column_prev} = $self->{column};
6024          $self->{column}++;
6025          $self->{nc}
6026              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6027        } else {
6028          $self->{set_nc}->($self);
6029        }
6030      
6031            redo A;
6032          } elsif ((length $self->{kwd}) == 7 and
6033                   ($self->{nc} == 0x004E or # N
6034                    $self->{nc} == 0x006E)) { # n
6035            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6036              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6037                              text => 'NOTATION',
6038                              line => $self->{line_prev},
6039                              column => $self->{column_prev} - 6);
6040            }
6041            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6042                           line => $self->{line_prev},
6043                           column => $self->{column_prev} - 8};
6044            $self->{state} = DOCTYPE_MD_STATE;
6045            
6046        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6047          $self->{line_prev} = $self->{line};
6048          $self->{column_prev} = $self->{column};
6049          $self->{column}++;
6050          $self->{nc}
6051              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6052        } else {
6053          $self->{set_nc}->($self);
6054        }
6055      
6056            redo A;
6057          } else {
6058            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6059                            line => $self->{line_prev},
6060                            column => $self->{column_prev} - 1
6061                                - (length $self->{kwd})
6062                                + 1 * ($self->{nc} == -1));
6063            $self->{state} = BOGUS_COMMENT_STATE;
6064            ## Reconsume.
6065            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6066            redo A;
6067          }
6068        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6069          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6070          ## "DOCTYPE NOTATION state".
6071    
6072          if ($is_space->{$self->{nc}}) {
6073            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6074            $self->{state} = BEFORE_MD_NAME_STATE;
6075            
6076        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6077          $self->{line_prev} = $self->{line};
6078          $self->{column_prev} = $self->{column};
6079          $self->{column}++;
6080          $self->{nc}
6081              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6082        } else {
6083          $self->{set_nc}->($self);
6084        }
6085      
6086            redo A;
6087          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6088                   $self->{nc} == 0x0025) { # %
6089            ## XML5: Switch to the "DOCTYPE bogus comment state".
6090            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6091            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6092            
6093        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6094          $self->{line_prev} = $self->{line};
6095          $self->{column_prev} = $self->{column};
6096          $self->{column}++;
6097          $self->{nc}
6098              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6099        } else {
6100          $self->{set_nc}->($self);
6101        }
6102      
6103            redo A;
6104          } elsif ($self->{nc} == -1) {
6105            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6106            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6107            ## Reconsume.
6108            redo A;
6109          } elsif ($self->{nc} == 0x003E) { # >
6110            ## XML5: Switch to the "DOCTYPE bogus comment state".
6111            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6112            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6113            
6114        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6115          $self->{line_prev} = $self->{line};
6116          $self->{column_prev} = $self->{column};
6117          $self->{column}++;
6118          $self->{nc}
6119              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6120        } else {
6121          $self->{set_nc}->($self);
6122        }
6123      
6124            redo A;
6125          } else {
6126            ## XML5: Switch to the "DOCTYPE bogus comment state".
6127            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6128            $self->{state} = BEFORE_MD_NAME_STATE;
6129            redo A;
6130          }
6131        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6132          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6133          ## before state", "DOCTYPE ATTLIST name before state".
6134    
6135          if ($is_space->{$self->{nc}}) {
6136            ## Stay in the state.
6137            
6138        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6139          $self->{line_prev} = $self->{line};
6140          $self->{column_prev} = $self->{column};
6141          $self->{column}++;
6142          $self->{nc}
6143              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6144        } else {
6145          $self->{set_nc}->($self);
6146        }
6147      
6148            redo A;
6149          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6150                   $self->{nc} == 0x0025) { # %
6151            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6152            
6153        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6154          $self->{line_prev} = $self->{line};
6155          $self->{column_prev} = $self->{column};
6156          $self->{column}++;
6157          $self->{nc}
6158              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6159        } else {
6160          $self->{set_nc}->($self);
6161        }
6162      
6163            redo A;
6164          } elsif ($self->{nc} == 0x003E) { # >
6165            ## XML5: Same as "Anything else".
6166            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6167            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6168            
6169        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6170          $self->{line_prev} = $self->{line};
6171          $self->{column_prev} = $self->{column};
6172          $self->{column}++;
6173          $self->{nc}
6174              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6175        } else {
6176          $self->{set_nc}->($self);
6177        }
6178      
6179            redo A;
6180          } elsif ($self->{nc} == -1) {
6181            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6182            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6183            ## Reconsume.
6184            redo A;
6185          } else {
6186            ## XML5: [ATTLIST] Not defined yet.
6187            $self->{ct}->{name} .= chr $self->{nc};
6188            $self->{state} = MD_NAME_STATE;
6189            
6190        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6191          $self->{line_prev} = $self->{line};
6192          $self->{column_prev} = $self->{column};
6193          $self->{column}++;
6194          $self->{nc}
6195              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6196        } else {
6197          $self->{set_nc}->($self);
6198        }
6199      
6200            redo A;
6201          }
6202        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6203          if ($is_space->{$self->{nc}}) {
6204            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6205            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6206            $self->{state} = BEFORE_MD_NAME_STATE;
6207            
6208        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6209          $self->{line_prev} = $self->{line};
6210          $self->{column_prev} = $self->{column};
6211          $self->{column}++;
6212          $self->{nc}
6213              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6214        } else {
6215          $self->{set_nc}->($self);
6216        }
6217      
6218            redo A;
6219          } elsif ($self->{nc} == 0x003E) { # >
6220            ## XML5: Same as "Anything else".
6221            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6222            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6223            
6224        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6225          $self->{line_prev} = $self->{line};
6226          $self->{column_prev} = $self->{column};
6227          $self->{column}++;
6228          $self->{nc}
6229              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6230        } else {
6231          $self->{set_nc}->($self);
6232        }
6233      
6234            redo A;
6235          } elsif ($self->{nc} == -1) {
6236            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6237            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6238            ## Reconsume.
6239            redo A;
6240          } else {
6241            ## XML5: No parse error.
6242            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6243            $self->{state} = BOGUS_COMMENT_STATE;
6244            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6245            ## Reconsume.
6246            redo A;
6247          }
6248        } elsif ($self->{state} == MD_NAME_STATE) {
6249          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6250          
6251          if ($is_space->{$self->{nc}}) {
6252            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6253              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6254            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6255              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6256            } else { # ENTITY/NOTATION
6257              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6258            }
6259            
6260        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6261          $self->{line_prev} = $self->{line};
6262          $self->{column_prev} = $self->{column};
6263          $self->{column}++;
6264          $self->{nc}
6265              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6266        } else {
6267          $self->{set_nc}->($self);
6268        }
6269      
6270            redo A;
6271          } elsif ($self->{nc} == 0x003E) { # >
6272            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6273              #
6274            } else {
6275              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6276            }
6277            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6278            
6279        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6280          $self->{line_prev} = $self->{line};
6281          $self->{column_prev} = $self->{column};
6282          $self->{column}++;
6283          $self->{nc}
6284              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6285        } else {
6286          $self->{set_nc}->($self);
6287        }
6288      
6289            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6290            redo A;
6291          } elsif ($self->{nc} == -1) {
6292            ## XML5: [ATTLIST] No parse error.
6293            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6294            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6295            ## Reconsume.
6296            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6297            redo A;
6298          } else {
6299            ## XML5: [ATTLIST] Not defined yet.
6300            $self->{ct}->{name} .= chr $self->{nc};
6301            ## Stay in the state.
6302            
6303        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6304          $self->{line_prev} = $self->{line};
6305          $self->{column_prev} = $self->{column};
6306          $self->{column}++;
6307          $self->{nc}
6308              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6309        } else {
6310          $self->{set_nc}->($self);
6311        }
6312      
6313            redo A;
6314          }
6315        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6316          if ($is_space->{$self->{nc}}) {
6317            ## Stay in the state.
6318            
6319        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6320          $self->{line_prev} = $self->{line};
6321          $self->{column_prev} = $self->{column};
6322          $self->{column}++;
6323          $self->{nc}
6324              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6325        } else {
6326          $self->{set_nc}->($self);
6327        }
6328      
6329            redo A;
6330          } elsif ($self->{nc} == 0x003E) { # >
6331            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6332            
6333        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6334          $self->{line_prev} = $self->{line};
6335          $self->{column_prev} = $self->{column};
6336          $self->{column}++;
6337          $self->{nc}
6338              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6339        } else {
6340          $self->{set_nc}->($self);
6341        }
6342      
6343            return  ($self->{ct}); # ATTLIST
6344            redo A;
6345          } elsif ($self->{nc} == -1) {
6346            ## XML5: No parse error.
6347            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6348            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6349            return  ($self->{ct});
6350            redo A;
6351          } else {
6352            ## XML5: Not defined yet.
6353            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6354                           tokens => [],
6355                           line => $self->{line}, column => $self->{column}};
6356            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6357            
6358        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6359          $self->{line_prev} = $self->{line};
6360          $self->{column_prev} = $self->{column};
6361          $self->{column}++;
6362          $self->{nc}
6363              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6364        } else {
6365          $self->{set_nc}->($self);
6366        }
6367      
6368            redo A;
6369          }
6370        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6371          if ($is_space->{$self->{nc}}) {
6372            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6373            
6374        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6375          $self->{line_prev} = $self->{line};
6376          $self->{column_prev} = $self->{column};
6377          $self->{column}++;
6378          $self->{nc}
6379              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6380        } else {
6381          $self->{set_nc}->($self);
6382        }
6383      
6384            redo A;
6385          } elsif ($self->{nc} == 0x003E) { # >
6386            ## XML5: Same as "anything else".
6387            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6388            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6389            
6390        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6391          $self->{line_prev} = $self->{line};
6392          $self->{column_prev} = $self->{column};
6393          $self->{column}++;
6394          $self->{nc}
6395              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6396        } else {
6397          $self->{set_nc}->($self);
6398        }
6399      
6400            return  ($self->{ct}); # ATTLIST
6401            redo A;
6402          } elsif ($self->{nc} == 0x0028) { # (
6403            ## XML5: Same as "anything else".
6404            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6405            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6406            
6407        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6408          $self->{line_prev} = $self->{line};
6409          $self->{column_prev} = $self->{column};
6410          $self->{column}++;
6411          $self->{nc}
6412              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6413        } else {
6414          $self->{set_nc}->($self);
6415        }
6416      
6417            redo A;
6418          } elsif ($self->{nc} == -1) {
6419            ## XML5: No parse error.
6420            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6421            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6422            
6423        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6424          $self->{line_prev} = $self->{line};
6425          $self->{column_prev} = $self->{column};
6426          $self->{column}++;
6427          $self->{nc}
6428              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6429        } else {
6430          $self->{set_nc}->($self);
6431        }
6432      
6433            return  ($self->{ct}); # ATTLIST
6434            redo A;
6435          } else {
6436            ## XML5: Not defined yet.
6437            $self->{ca}->{name} .= chr $self->{nc};
6438            ## Stay in the state.
6439            
6440        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6441          $self->{line_prev} = $self->{line};
6442          $self->{column_prev} = $self->{column};
6443          $self->{column}++;
6444          $self->{nc}
6445              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6446        } else {
6447          $self->{set_nc}->($self);
6448        }
6449      
6450            redo A;
6451          }
6452        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6453          if ($is_space->{$self->{nc}}) {
6454            ## Stay in the state.
6455            
6456        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6457          $self->{line_prev} = $self->{line};
6458          $self->{column_prev} = $self->{column};
6459          $self->{column}++;
6460          $self->{nc}
6461              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6462        } else {
6463          $self->{set_nc}->($self);
6464        }
6465      
6466            redo A;
6467          } elsif ($self->{nc} == 0x003E) { # >
6468            ## XML5: Same as "anything else".
6469            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6470            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6471            
6472        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6473          $self->{line_prev} = $self->{line};
6474          $self->{column_prev} = $self->{column};
6475          $self->{column}++;
6476          $self->{nc}
6477              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6478        } else {
6479          $self->{set_nc}->($self);
6480        }
6481      
6482            return  ($self->{ct}); # ATTLIST
6483            redo A;
6484          } elsif ($self->{nc} == 0x0028) { # (
6485            ## XML5: Same as "anything else".
6486            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6487            
6488        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6489          $self->{line_prev} = $self->{line};
6490          $self->{column_prev} = $self->{column};
6491          $self->{column}++;
6492          $self->{nc}
6493              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6494        } else {
6495          $self->{set_nc}->($self);
6496        }
6497      
6498            redo A;
6499          } elsif ($self->{nc} == -1) {
6500            ## XML5: No parse error.
6501            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6502            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6503            
6504        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6505          $self->{line_prev} = $self->{line};
6506          $self->{column_prev} = $self->{column};
6507          $self->{column}++;
6508          $self->{nc}
6509              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6510        } else {
6511          $self->{set_nc}->($self);
6512        }
6513      
6514            return  ($self->{ct});
6515            redo A;
6516          } else {
6517            ## XML5: Not defined yet.
6518            $self->{ca}->{type} = chr $self->{nc};
6519            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6520            
6521        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6522          $self->{line_prev} = $self->{line};
6523          $self->{column_prev} = $self->{column};
6524          $self->{column}++;
6525          $self->{nc}
6526              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6527        } else {
6528          $self->{set_nc}->($self);
6529        }
6530      
6531            redo A;
6532          }
6533        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6534          if ($is_space->{$self->{nc}}) {
6535            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6536            
6537        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6538          $self->{line_prev} = $self->{line};
6539          $self->{column_prev} = $self->{column};
6540          $self->{column}++;
6541          $self->{nc}
6542              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6543        } else {
6544          $self->{set_nc}->($self);
6545        }
6546      
6547            redo A;
6548          } elsif ($self->{nc} == 0x0023) { # #
6549            ## XML5: Same as "anything else".
6550            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6551            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6552            
6553        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6554          $self->{line_prev} = $self->{line};
6555          $self->{column_prev} = $self->{column};
6556          $self->{column}++;
6557          $self->{nc}
6558              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6559        } else {
6560          $self->{set_nc}->($self);
6561        }
6562      
6563            redo A;
6564          } elsif ($self->{nc} == 0x0022) { # "
6565            ## XML5: Same as "anything else".
6566            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6567            $self->{ca}->{value} = '';
6568            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6569            
6570        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6571          $self->{line_prev} = $self->{line};
6572          $self->{column_prev} = $self->{column};
6573          $self->{column}++;
6574          $self->{nc}
6575              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6576        } else {
6577          $self->{set_nc}->($self);
6578        }
6579      
6580            redo A;
6581          } elsif ($self->{nc} == 0x0027) { # '
6582            ## XML5: Same as "anything else".
6583            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6584            $self->{ca}->{value} = '';
6585            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6586            
6587        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6588          $self->{line_prev} = $self->{line};
6589          $self->{column_prev} = $self->{column};
6590          $self->{column}++;
6591          $self->{nc}
6592              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6593        } else {
6594          $self->{set_nc}->($self);
6595        }
6596      
6597            redo A;
6598          } elsif ($self->{nc} == 0x003E) { # >
6599            ## XML5: Same as "anything else".
6600            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6601            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6602            
6603        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6604          $self->{line_prev} = $self->{line};
6605          $self->{column_prev} = $self->{column};
6606          $self->{column}++;
6607          $self->{nc}
6608              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6609        } else {
6610          $self->{set_nc}->($self);
6611        }
6612      
6613            return  ($self->{ct}); # ATTLIST
6614            redo A;
6615          } elsif ($self->{nc} == 0x0028) { # (
6616            ## XML5: Same as "anything else".
6617            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6618            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6619            
6620        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6621          $self->{line_prev} = $self->{line};
6622          $self->{column_prev} = $self->{column};
6623          $self->{column}++;
6624          $self->{nc}
6625              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6626        } else {
6627          $self->{set_nc}->($self);
6628        }
6629      
6630            redo A;
6631          } elsif ($self->{nc} == -1) {
6632            ## XML5: No parse error.
6633            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6634            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6635            
6636        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6637          $self->{line_prev} = $self->{line};
6638          $self->{column_prev} = $self->{column};
6639          $self->{column}++;
6640          $self->{nc}
6641              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6642        } else {
6643          $self->{set_nc}->($self);
6644        }
6645      
6646            return  ($self->{ct});
6647            redo A;
6648          } else {
6649            ## XML5: Not defined yet.
6650            $self->{ca}->{type} .= chr $self->{nc};
6651            ## Stay in the state.
6652            
6653        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6654          $self->{line_prev} = $self->{line};
6655          $self->{column_prev} = $self->{column};
6656          $self->{column}++;
6657          $self->{nc}
6658              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6659        } else {
6660          $self->{set_nc}->($self);
6661        }
6662      
6663            redo A;
6664          }
6665        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6666          if ($is_space->{$self->{nc}}) {
6667            ## Stay in the state.
6668            
6669        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6670          $self->{line_prev} = $self->{line};
6671          $self->{column_prev} = $self->{column};
6672          $self->{column}++;
6673          $self->{nc}
6674              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6675        } else {
6676          $self->{set_nc}->($self);
6677        }
6678      
6679            redo A;
6680          } elsif ($self->{nc} == 0x0028) { # (
6681            ## XML5: Same as "anything else".
6682            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6683            
6684        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6685          $self->{line_prev} = $self->{line};
6686          $self->{column_prev} = $self->{column};
6687          $self->{column}++;
6688          $self->{nc}
6689              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6690        } else {
6691          $self->{set_nc}->($self);
6692        }
6693      
6694            redo A;
6695          } elsif ($self->{nc} == 0x0023) { # #
6696            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6697            
6698        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6699          $self->{line_prev} = $self->{line};
6700          $self->{column_prev} = $self->{column};
6701          $self->{column}++;
6702          $self->{nc}
6703              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6704        } else {
6705          $self->{set_nc}->($self);
6706        }
6707      
6708            redo A;
6709          } elsif ($self->{nc} == 0x0022) { # "
6710            ## XML5: Same as "anything else".
6711            $self->{ca}->{value} = '';
6712            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6713            
6714        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6715          $self->{line_prev} = $self->{line};
6716          $self->{column_prev} = $self->{column};
6717          $self->{column}++;
6718          $self->{nc}
6719              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6720        } else {
6721          $self->{set_nc}->($self);
6722        }
6723      
6724            redo A;
6725          } elsif ($self->{nc} == 0x0027) { # '
6726            ## XML5: Same as "anything else".
6727            $self->{ca}->{value} = '';
6728            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6729            
6730        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6731          $self->{line_prev} = $self->{line};
6732          $self->{column_prev} = $self->{column};
6733          $self->{column}++;
6734          $self->{nc}
6735              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6736        } else {
6737          $self->{set_nc}->($self);
6738        }
6739      
6740            redo A;
6741          } elsif ($self->{nc} == 0x003E) { # >
6742            ## XML5: Same as "anything else".
6743            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6744            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6745            
6746        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6747          $self->{line_prev} = $self->{line};
6748          $self->{column_prev} = $self->{column};
6749          $self->{column}++;
6750          $self->{nc}
6751              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6752        } else {
6753          $self->{set_nc}->($self);
6754        }
6755      
6756            return  ($self->{ct}); # ATTLIST
6757            redo A;
6758          } elsif ($self->{nc} == -1) {
6759            ## XML5: No parse error.
6760            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6761            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6762            
6763        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6764          $self->{line_prev} = $self->{line};
6765          $self->{column_prev} = $self->{column};
6766          $self->{column}++;
6767          $self->{nc}
6768              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6769        } else {
6770          $self->{set_nc}->($self);
6771        }
6772      
6773            return  ($self->{ct});
6774            redo A;
6775          } else {
6776            ## XML5: Switch to the "DOCTYPE bogus comment state".
6777            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6778            $self->{ca}->{value} = '';
6779            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6780            ## Reconsume.
6781            redo A;
6782          }
6783        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6784          if ($is_space->{$self->{nc}}) {
6785            ## Stay in the state.
6786            
6787        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6788          $self->{line_prev} = $self->{line};
6789          $self->{column_prev} = $self->{column};
6790          $self->{column}++;
6791          $self->{nc}
6792              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6793        } else {
6794          $self->{set_nc}->($self);
6795        }
6796      
6797            redo A;
6798          } elsif ($self->{nc} == 0x007C) { # |
6799            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6800            ## Stay in the state.
6801            
6802        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6803          $self->{line_prev} = $self->{line};
6804          $self->{column_prev} = $self->{column};
6805          $self->{column}++;
6806          $self->{nc}
6807              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6808        } else {
6809          $self->{set_nc}->($self);
6810        }
6811      
6812            redo A;
6813          } elsif ($self->{nc} == 0x0029) { # )
6814            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6815            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6816            
6817        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6818          $self->{line_prev} = $self->{line};
6819          $self->{column_prev} = $self->{column};
6820          $self->{column}++;
6821          $self->{nc}
6822              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6823        } else {
6824          $self->{set_nc}->($self);
6825        }
6826      
6827            redo A;
6828          } elsif ($self->{nc} == 0x003E) { # >
6829            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6830            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6831            
6832        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6833          $self->{line_prev} = $self->{line};
6834          $self->{column_prev} = $self->{column};
6835          $self->{column}++;
6836          $self->{nc}
6837              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6838        } else {
6839          $self->{set_nc}->($self);
6840        }
6841      
6842            return  ($self->{ct}); # ATTLIST
6843            redo A;
6844          } elsif ($self->{nc} == -1) {
6845            ## XML5: No parse error.
6846            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6847            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6848            
6849        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6850          $self->{line_prev} = $self->{line};
6851          $self->{column_prev} = $self->{column};
6852          $self->{column}++;
6853          $self->{nc}
6854              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6855        } else {
6856          $self->{set_nc}->($self);
6857        }
6858      
6859            return  ($self->{ct});
6860            redo A;
6861          } else {
6862            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6863            $self->{state} = ALLOWED_TOKEN_STATE;
6864            
6865        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6866          $self->{line_prev} = $self->{line};
6867          $self->{column_prev} = $self->{column};
6868          $self->{column}++;
6869          $self->{nc}
6870              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6871        } else {
6872          $self->{set_nc}->($self);
6873        }
6874      
6875            redo A;
6876          }
6877        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6878          if ($is_space->{$self->{nc}}) {
6879            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6880            
6881        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6882          $self->{line_prev} = $self->{line};
6883          $self->{column_prev} = $self->{column};
6884          $self->{column}++;
6885          $self->{nc}
6886              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6887        } else {
6888          $self->{set_nc}->($self);
6889        }
6890      
6891            redo A;
6892          } elsif ($self->{nc} == 0x007C) { # |
6893            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6894            
6895        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6896          $self->{line_prev} = $self->{line};
6897          $self->{column_prev} = $self->{column};
6898          $self->{column}++;
6899          $self->{nc}
6900              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6901        } else {
6902          $self->{set_nc}->($self);
6903        }
6904      
6905            redo A;
6906          } elsif ($self->{nc} == 0x0029) { # )
6907            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6908            
6909        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6910          $self->{line_prev} = $self->{line};
6911          $self->{column_prev} = $self->{column};
6912          $self->{column}++;
6913          $self->{nc}
6914              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6915        } else {
6916          $self->{set_nc}->($self);
6917        }
6918      
6919            redo A;
6920          } elsif ($self->{nc} == 0x003E) { # >
6921            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6922            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6923            
6924        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6925          $self->{line_prev} = $self->{line};
6926          $self->{column_prev} = $self->{column};
6927          $self->{column}++;
6928          $self->{nc}
6929              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6930        } else {
6931          $self->{set_nc}->($self);
6932        }
6933      
6934            return  ($self->{ct}); # ATTLIST
6935            redo A;
6936          } elsif ($self->{nc} == -1) {
6937            ## XML5: No parse error.
6938            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6939            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6940            
6941        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6942          $self->{line_prev} = $self->{line};
6943          $self->{column_prev} = $self->{column};
6944          $self->{column}++;
6945          $self->{nc}
6946              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6947        } else {
6948          $self->{set_nc}->($self);
6949        }
6950      
6951            return  ($self->{ct});
6952            redo A;
6953          } else {
6954            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6955            ## Stay in the state.
6956            
6957        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6958          $self->{line_prev} = $self->{line};
6959          $self->{column_prev} = $self->{column};
6960          $self->{column}++;
6961          $self->{nc}
6962              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6963        } else {
6964          $self->{set_nc}->($self);
6965        }
6966      
6967            redo A;
6968          }
6969        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6970          if ($is_space->{$self->{nc}}) {
6971            ## Stay in the state.
6972            
6973        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6974          $self->{line_prev} = $self->{line};
6975          $self->{column_prev} = $self->{column};
6976          $self->{column}++;
6977          $self->{nc}
6978              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6979        } else {
6980          $self->{set_nc}->($self);
6981        }
6982      
6983            redo A;
6984          } elsif ($self->{nc} == 0x007C) { # |
6985            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6986            
6987        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6988          $self->{line_prev} = $self->{line};
6989          $self->{column_prev} = $self->{column};
6990          $self->{column}++;
6991          $self->{nc}
6992              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6993        } else {
6994          $self->{set_nc}->($self);
6995        }
6996      
6997            redo A;
6998          } elsif ($self->{nc} == 0x0029) { # )
6999            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7000            
7001        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7002          $self->{line_prev} = $self->{line};
7003          $self->{column_prev} = $self->{column};
7004          $self->{column}++;
7005          $self->{nc}
7006              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7007        } else {
7008          $self->{set_nc}->($self);
7009        }
7010      
7011            redo A;
7012          } elsif ($self->{nc} == 0x003E) { # >
7013            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7014            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7015            
7016        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7017          $self->{line_prev} = $self->{line};
7018          $self->{column_prev} = $self->{column};
7019          $self->{column}++;
7020          $self->{nc}
7021              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7022        } else {
7023          $self->{set_nc}->($self);
7024        }
7025      
7026            return  ($self->{ct}); # ATTLIST
7027            redo A;
7028          } elsif ($self->{nc} == -1) {
7029            ## XML5: No parse error.
7030            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7031            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7032            
7033        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7034          $self->{line_prev} = $self->{line};
7035          $self->{column_prev} = $self->{column};
7036          $self->{column}++;
7037          $self->{nc}
7038              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7039        } else {
7040          $self->{set_nc}->($self);
7041        }
7042      
7043            return  ($self->{ct});
7044            redo A;
7045          } else {
7046            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7047                            line => $self->{line_prev},
7048                            column => $self->{column_prev});
7049            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7050            $self->{state} = ALLOWED_TOKEN_STATE;
7051            
7052        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7053          $self->{line_prev} = $self->{line};
7054          $self->{column_prev} = $self->{column};
7055          $self->{column}++;
7056          $self->{nc}
7057              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7058        } else {
7059          $self->{set_nc}->($self);
7060        }
7061      
7062            redo A;
7063          }
7064        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7065          if ($is_space->{$self->{nc}}) {
7066            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7067            
7068        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7069          $self->{line_prev} = $self->{line};
7070          $self->{column_prev} = $self->{column};
7071          $self->{column}++;
7072          $self->{nc}
7073              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7074        } else {
7075          $self->{set_nc}->($self);
7076        }
7077      
7078            redo A;
7079          } elsif ($self->{nc} == 0x0023) { # #
7080            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7081            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7082            
7083        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7084          $self->{line_prev} = $self->{line};
7085          $self->{column_prev} = $self->{column};
7086          $self->{column}++;
7087          $self->{nc}
7088              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7089        } else {
7090          $self->{set_nc}->($self);
7091        }
7092      
7093            redo A;
7094          } elsif ($self->{nc} == 0x0022) { # "
7095            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7096            $self->{ca}->{value} = '';
7097            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7098            
7099        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7100          $self->{line_prev} = $self->{line};
7101          $self->{column_prev} = $self->{column};
7102          $self->{column}++;
7103          $self->{nc}
7104              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7105        } else {
7106          $self->{set_nc}->($self);
7107        }
7108      
7109            redo A;
7110          } elsif ($self->{nc} == 0x0027) { # '
7111            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7112            $self->{ca}->{value} = '';
7113            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7114            
7115        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7116          $self->{line_prev} = $self->{line};
7117          $self->{column_prev} = $self->{column};
7118          $self->{column}++;
7119          $self->{nc}
7120              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7121        } else {
7122          $self->{set_nc}->($self);
7123        }
7124      
7125            redo A;
7126          } elsif ($self->{nc} == 0x003E) { # >
7127            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7128            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7129            
7130        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7131          $self->{line_prev} = $self->{line};
7132          $self->{column_prev} = $self->{column};
7133          $self->{column}++;
7134          $self->{nc}
7135              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7136        } else {
7137          $self->{set_nc}->($self);
7138        }
7139      
7140            return  ($self->{ct}); # ATTLIST
7141            redo A;
7142          } elsif ($self->{nc} == -1) {
7143            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7144            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7145            
7146        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7147          $self->{line_prev} = $self->{line};
7148          $self->{column_prev} = $self->{column};
7149          $self->{column}++;
7150          $self->{nc}
7151              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7152        } else {
7153          $self->{set_nc}->($self);
7154        }
7155      
7156            return  ($self->{ct});
7157            redo A;
7158          } else {
7159            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7160            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7161            ## Reconsume.
7162            redo A;
7163          }
7164        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7165          if ($is_space->{$self->{nc}}) {
7166            ## Stay in the state.
7167            
7168        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7169          $self->{line_prev} = $self->{line};
7170          $self->{column_prev} = $self->{column};
7171          $self->{column}++;
7172          $self->{nc}
7173              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7174        } else {
7175          $self->{set_nc}->($self);
7176        }
7177      
7178            redo A;
7179          } elsif ($self->{nc} == 0x0023) { # #
7180            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7181            
7182        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7183          $self->{line_prev} = $self->{line};
7184          $self->{column_prev} = $self->{column};
7185          $self->{column}++;
7186          $self->{nc}
7187              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7188        } else {
7189          $self->{set_nc}->($self);
7190        }
7191      
7192            redo A;
7193          } elsif ($self->{nc} == 0x0022) { # "
7194            $self->{ca}->{value} = '';
7195            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7196            
7197        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7198          $self->{line_prev} = $self->{line};
7199          $self->{column_prev} = $self->{column};
7200          $self->{column}++;
7201          $self->{nc}
7202              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7203        } else {
7204          $self->{set_nc}->($self);
7205        }
7206      
7207            redo A;
7208          } elsif ($self->{nc} == 0x0027) { # '
7209            $self->{ca}->{value} = '';
7210            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7211            
7212        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7213          $self->{line_prev} = $self->{line};
7214          $self->{column_prev} = $self->{column};
7215          $self->{column}++;
7216          $self->{nc}
7217              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7218        } else {
7219          $self->{set_nc}->($self);
7220        }
7221      
7222            redo A;
7223          } elsif ($self->{nc} == 0x003E) { # >
7224            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7225            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7226            
7227        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7228          $self->{line_prev} = $self->{line};
7229          $self->{column_prev} = $self->{column};
7230          $self->{column}++;
7231          $self->{nc}
7232              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7233        } else {
7234          $self->{set_nc}->($self);
7235        }
7236      
7237            return  ($self->{ct}); # ATTLIST
7238            redo A;
7239          } elsif ($self->{nc} == -1) {
7240            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7241            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7242            
7243        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7244          $self->{line_prev} = $self->{line};
7245          $self->{column_prev} = $self->{column};
7246          $self->{column}++;
7247          $self->{nc}
7248              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7249        } else {
7250          $self->{set_nc}->($self);
7251        }
7252      
7253            return  ($self->{ct});
7254            redo A;
7255          } else {
7256            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7257            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7258            ## Reconsume.
7259            redo A;
7260          }
7261        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7262          if ($is_space->{$self->{nc}}) {
7263            ## XML5: No parse error.
7264            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7265            $self->{state} = BOGUS_MD_STATE;
7266            ## Reconsume.
7267            redo A;
7268          } elsif ($self->{nc} == 0x0022) { # "
7269            ## XML5: Same as "anything else".
7270            $self->{ca}->{value} = '';
7271            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7272            
7273        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7274          $self->{line_prev} = $self->{line};
7275          $self->{column_prev} = $self->{column};
7276          $self->{column}++;
7277          $self->{nc}
7278              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7279        } else {
7280          $self->{set_nc}->($self);
7281        }
7282      
7283            redo A;
7284          } elsif ($self->{nc} == 0x0027) { # '
7285            ## XML5: Same as "anything else".
7286            $self->{ca}->{value} = '';
7287            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7288            
7289        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7290          $self->{line_prev} = $self->{line};
7291          $self->{column_prev} = $self->{column};
7292          $self->{column}++;
7293          $self->{nc}
7294              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7295        } else {
7296          $self->{set_nc}->($self);
7297        }
7298      
7299            redo A;
7300          } elsif ($self->{nc} == 0x003E) { # >
7301            ## XML5: Same as "anything else".
7302            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7303            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7304            
7305        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7306          $self->{line_prev} = $self->{line};
7307          $self->{column_prev} = $self->{column};
7308          $self->{column}++;
7309          $self->{nc}
7310              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7311        } else {
7312          $self->{set_nc}->($self);
7313        }
7314      
7315            return  ($self->{ct}); # ATTLIST
7316            redo A;
7317          } elsif ($self->{nc} == -1) {
7318            ## XML5: No parse error.
7319            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7320            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7321            
7322        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7323          $self->{line_prev} = $self->{line};
7324          $self->{column_prev} = $self->{column};
7325          $self->{column}++;
7326          $self->{nc}
7327              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7328        } else {
7329          $self->{set_nc}->($self);
7330        }
7331      
7332            return  ($self->{ct});
7333            redo A;
7334          } else {
7335            $self->{ca}->{default} = chr $self->{nc};
7336            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7337            
7338        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7339          $self->{line_prev} = $self->{line};
7340          $self->{column_prev} = $self->{column};
7341          $self->{column}++;
7342          $self->{nc}
7343              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7344        } else {
7345          $self->{set_nc}->($self);
7346        }
7347      
7348            redo A;
7349          }
7350        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7351          if ($is_space->{$self->{nc}}) {
7352            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7353            
7354        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7355          $self->{line_prev} = $self->{line};
7356          $self->{column_prev} = $self->{column};
7357          $self->{column}++;
7358          $self->{nc}
7359              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7360        } else {
7361          $self->{set_nc}->($self);
7362        }
7363      
7364            redo A;
7365          } elsif ($self->{nc} == 0x0022) { # "
7366            ## XML5: Same as "anything else".
7367            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7368            $self->{ca}->{value} = '';
7369            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7370            
7371        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7372          $self->{line_prev} = $self->{line};
7373          $self->{column_prev} = $self->{column};
7374          $self->{column}++;
7375          $self->{nc}
7376              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7377        } else {
7378          $self->{set_nc}->($self);
7379        }
7380      
7381            redo A;
7382          } elsif ($self->{nc} == 0x0027) { # '
7383            ## XML5: Same as "anything else".
7384            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7385            $self->{ca}->{value} = '';
7386            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7387            
7388        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7389          $self->{line_prev} = $self->{line};
7390          $self->{column_prev} = $self->{column};
7391          $self->{column}++;
7392          $self->{nc}
7393              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7394        } else {
7395          $self->{set_nc}->($self);
7396        }
7397      
7398            redo A;
7399          } elsif ($self->{nc} == 0x003E) { # >
7400            ## XML5: Same as "anything else".
7401            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7402            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7403            
7404        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7405          $self->{line_prev} = $self->{line};
7406          $self->{column_prev} = $self->{column};
7407          $self->{column}++;
7408          $self->{nc}
7409              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7410        } else {
7411          $self->{set_nc}->($self);
7412        }
7413      
7414            return  ($self->{ct}); # ATTLIST
7415            redo A;
7416          } elsif ($self->{nc} == -1) {
7417            ## XML5: No parse error.
7418            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7419            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7420            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7421            
7422        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7423          $self->{line_prev} = $self->{line};
7424          $self->{column_prev} = $self->{column};
7425          $self->{column}++;
7426          $self->{nc}
7427              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7428        } else {
7429          $self->{set_nc}->($self);
7430        }
7431      
7432            return  ($self->{ct});
7433            redo A;
7434          } else {
7435            $self->{ca}->{default} .= chr $self->{nc};
7436            ## Stay in the state.
7437            
7438        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7439          $self->{line_prev} = $self->{line};
7440          $self->{column_prev} = $self->{column};
7441          $self->{column}++;
7442          $self->{nc}
7443              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7444        } else {
7445          $self->{set_nc}->($self);
7446        }
7447      
7448            redo A;
7449          }
7450        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7451          if ($is_space->{$self->{nc}}) {
7452            ## Stay in the state.
7453            
7454        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7455          $self->{line_prev} = $self->{line};
7456          $self->{column_prev} = $self->{column};
7457          $self->{column}++;
7458          $self->{nc}
7459              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7460        } else {
7461          $self->{set_nc}->($self);
7462        }
7463      
7464            redo A;
7465          } elsif ($self->{nc} == 0x0022) { # "
7466            $self->{ca}->{value} = '';
7467            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7468            
7469        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7470          $self->{line_prev} = $self->{line};
7471          $self->{column_prev} = $self->{column};
7472          $self->{column}++;
7473          $self->{nc}
7474              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7475        } else {
7476          $self->{set_nc}->($self);
7477        }
7478      
7479            redo A;
7480          } elsif ($self->{nc} == 0x0027) { # '
7481            $self->{ca}->{value} = '';
7482            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7483            
7484        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7485          $self->{line_prev} = $self->{line};
7486          $self->{column_prev} = $self->{column};
7487          $self->{column}++;
7488          $self->{nc}
7489              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7490        } else {
7491          $self->{set_nc}->($self);
7492        }
7493      
7494            redo A;
7495          } elsif ($self->{nc} == 0x003E) { # >
7496            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7497            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7498            
7499        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7500          $self->{line_prev} = $self->{line};
7501          $self->{column_prev} = $self->{column};
7502          $self->{column}++;
7503          $self->{nc}
7504              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7505        } else {
7506          $self->{set_nc}->($self);
7507        }
7508      
7509            return  ($self->{ct}); # ATTLIST
7510            redo A;
7511          } elsif ($self->{nc} == -1) {
7512            ## XML5: No parse error.
7513            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7514            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7515            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7516            
7517        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7518          $self->{line_prev} = $self->{line};
7519          $self->{column_prev} = $self->{column};
7520          $self->{column}++;
7521          $self->{nc}
7522              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7523        } else {
7524          $self->{set_nc}->($self);
7525        }
7526      
7527            return  ($self->{ct});
7528            redo A;
7529          } else {
7530            ## XML5: Not defined yet.
7531            if ($self->{ca}->{default} eq 'FIXED') {
7532              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7533            } else {
7534              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7535              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7536            }
7537            ## Reconsume.
7538            redo A;
7539          }
7540        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7541          if ($is_space->{$self->{nc}} or
7542              $self->{nc} == -1 or
7543              $self->{nc} == 0x003E) { # >
7544            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7545            ## Reconsume.
7546            redo A;
7547          } else {
7548            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7549            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7550            ## Reconsume.
7551            redo A;
7552          }
7553        } elsif ($self->{state} == NDATA_STATE) {
7554          ## ASCII case-insensitive
7555          if ($self->{nc} == [
7556                undef,
7557                0x0044, # D
7558                0x0041, # A
7559                0x0054, # T
7560              ]->[length $self->{kwd}] or
7561              $self->{nc} == [
7562                undef,
7563                0x0064, # d
7564                0x0061, # a
7565                0x0074, # t
7566              ]->[length $self->{kwd}]) {
7567            
7568            ## Stay in the state.
7569            $self->{kwd} .= chr $self->{nc};
7570            
7571        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7572          $self->{line_prev} = $self->{line};
7573          $self->{column_prev} = $self->{column};
7574          $self->{column}++;
7575          $self->{nc}
7576              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7577        } else {
7578          $self->{set_nc}->($self);
7579        }
7580      
7581            redo A;
7582          } elsif ((length $self->{kwd}) == 4 and
7583                   ($self->{nc} == 0x0041 or # A
7584                    $self->{nc} == 0x0061)) { # a
7585            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7586              
7587              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7588                              text => 'NDATA',
7589                              line => $self->{line_prev},
7590                              column => $self->{column_prev} - 4);
7591            } else {
7592              
7593            }
7594            $self->{state} = AFTER_NDATA_STATE;
7595            
7596        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7597          $self->{line_prev} = $self->{line};
7598          $self->{column_prev} = $self->{column};
7599          $self->{column}++;
7600          $self->{nc}
7601              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7602        } else {
7603          $self->{set_nc}->($self);
7604        }
7605      
7606            redo A;
7607          } else {
7608            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7609                            line => $self->{line_prev},
7610                            column => $self->{column_prev} + 1
7611                                - length $self->{kwd});
7612            
7613            $self->{state} = BOGUS_MD_STATE;
7614            ## Reconsume.
7615            redo A;
7616          }
7617        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7618          if ($is_space->{$self->{nc}}) {
7619            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7620            
7621        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7622          $self->{line_prev} = $self->{line};
7623          $self->{column_prev} = $self->{column};
7624          $self->{column}++;
7625          $self->{nc}
7626              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7627        } else {
7628          $self->{set_nc}->($self);
7629        }
7630      
7631            redo A;
7632          } elsif ($self->{nc} == 0x003E) { # >
7633            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7634            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7635            
7636        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7637          $self->{line_prev} = $self->{line};
7638          $self->{column_prev} = $self->{column};
7639          $self->{column}++;
7640          $self->{nc}
7641              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7642        } else {
7643          $self->{set_nc}->($self);
7644        }
7645      
7646            return  ($self->{ct}); # ENTITY
7647            redo A;
7648          } elsif ($self->{nc} == -1) {
7649            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7650            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7651            
7652        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7653          $self->{line_prev} = $self->{line};
7654          $self->{column_prev} = $self->{column};
7655          $self->{column}++;
7656          $self->{nc}
7657              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7658        } else {
7659          $self->{set_nc}->($self);
7660        }
7661      
7662            return  ($self->{ct}); # ENTITY
7663            redo A;
7664          } else {
7665            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7666                            line => $self->{line_prev},
7667                            column => $self->{column_prev} + 1
7668                                - length $self->{kwd});
7669            $self->{state} = BOGUS_MD_STATE;
7670            ## Reconsume.
7671            redo A;
7672          }
7673        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7674          if ($is_space->{$self->{nc}}) {
7675            ## Stay in the state.
7676            
7677        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7678          $self->{line_prev} = $self->{line};
7679          $self->{column_prev} = $self->{column};
7680          $self->{column}++;
7681          $self->{nc}
7682              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7683        } else {
7684          $self->{set_nc}->($self);
7685        }
7686      
7687            redo A;
7688          } elsif ($self->{nc} == 0x003E) { # >
7689            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7690            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7691            
7692        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7693          $self->{line_prev} = $self->{line};
7694          $self->{column_prev} = $self->{column};
7695          $self->{column}++;
7696          $self->{nc}
7697              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7698        } else {
7699          $self->{set_nc}->($self);
7700        }
7701      
7702            return  ($self->{ct}); # ENTITY
7703            redo A;
7704          } elsif ($self->{nc} == -1) {
7705            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7706            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7707            
7708        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7709          $self->{line_prev} = $self->{line};
7710          $self->{column_prev} = $self->{column};
7711          $self->{column}++;
7712          $self->{nc}
7713              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7714        } else {
7715          $self->{set_nc}->($self);
7716        }
7717      
7718            return  ($self->{ct}); # ENTITY
7719            redo A;
7720          } else {
7721            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7722            $self->{state} = NOTATION_NAME_STATE;
7723            
7724        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7725          $self->{line_prev} = $self->{line};
7726          $self->{column_prev} = $self->{column};
7727          $self->{column}++;
7728          $self->{nc}
7729              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7730        } else {
7731          $self->{set_nc}->($self);
7732        }
7733      
7734            redo A;
7735          }
7736        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7737          if ($is_space->{$self->{nc}}) {
7738            $self->{state} = AFTER_MD_DEF_STATE;
7739            
7740        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7741          $self->{line_prev} = $self->{line};
7742          $self->{column_prev} = $self->{column};
7743          $self->{column}++;
7744          $self->{nc}
7745              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7746        } else {
7747          $self->{set_nc}->($self);
7748        }
7749      
7750            redo A;
7751          } elsif ($self->{nc} == 0x003E) { # >
7752            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7753            
7754        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7755          $self->{line_prev} = $self->{line};
7756          $self->{column_prev} = $self->{column};
7757          $self->{column}++;
7758          $self->{nc}
7759              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7760        } else {
7761          $self->{set_nc}->($self);
7762        }
7763      
7764            return  ($self->{ct}); # ENTITY
7765            redo A;
7766          } elsif ($self->{nc} == -1) {
7767            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7768            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7769            
7770        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7771          $self->{line_prev} = $self->{line};
7772          $self->{column_prev} = $self->{column};
7773          $self->{column}++;
7774          $self->{nc}
7775              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7776        } else {
7777          $self->{set_nc}->($self);
7778        }
7779      
7780            return  ($self->{ct}); # ENTITY
7781            redo A;
7782          } else {
7783            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7784            ## Stay in the state.
7785            
7786        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7787          $self->{line_prev} = $self->{line};
7788          $self->{column_prev} = $self->{column};
7789          $self->{column}++;
7790          $self->{nc}
7791              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7792        } else {
7793          $self->{set_nc}->($self);
7794        }
7795      
7796            redo A;
7797          }
7798        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7799          if ($self->{nc} == 0x0022) { # "
7800            $self->{state} = AFTER_MD_DEF_STATE;
7801            
7802        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7803          $self->{line_prev} = $self->{line};
7804          $self->{column_prev} = $self->{column};
7805          $self->{column}++;
7806          $self->{nc}
7807              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7808        } else {
7809          $self->{set_nc}->($self);
7810        }
7811      
7812            redo A;
7813          } elsif ($self->{nc} == 0x0026) { # &
7814            $self->{prev_state} = $self->{state};
7815            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7816            $self->{entity_add} = 0x0022; # "
7817            
7818        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7819          $self->{line_prev} = $self->{line};
7820          $self->{column_prev} = $self->{column};
7821          $self->{column}++;
7822          $self->{nc}
7823              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7824        } else {
7825          $self->{set_nc}->($self);
7826        }
7827      
7828            redo A;
7829    ## TODO: %
7830          } elsif ($self->{nc} == -1) {
7831            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7832            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7833            ## Reconsume.
7834            return  ($self->{ct}); # ENTITY
7835            redo A;
7836          } else {
7837            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7838            
7839        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7840          $self->{line_prev} = $self->{line};
7841          $self->{column_prev} = $self->{column};
7842          $self->{column}++;
7843          $self->{nc}
7844              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7845        } else {
7846          $self->{set_nc}->($self);
7847        }
7848      
7849            redo A;
7850          }
7851        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7852          if ($self->{nc} == 0x0027) { # '
7853            $self->{state} = AFTER_MD_DEF_STATE;
7854            
7855        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7856          $self->{line_prev} = $self->{line};
7857          $self->{column_prev} = $self->{column};
7858          $self->{column}++;
7859          $self->{nc}
7860              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7861        } else {
7862          $self->{set_nc}->($self);
7863        }
7864      
7865            redo A;
7866          } elsif ($self->{nc} == 0x0026) { # &
7867            $self->{prev_state} = $self->{state};
7868            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7869            $self->{entity_add} = 0x0027; # '
7870            
7871        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7872          $self->{line_prev} = $self->{line};
7873          $self->{column_prev} = $self->{column};
7874          $self->{column}++;
7875          $self->{nc}
7876              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7877        } else {
7878          $self->{set_nc}->($self);
7879        }
7880      
7881            redo A;
7882    ## TODO: %
7883          } elsif ($self->{nc} == -1) {
7884            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7885            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7886            ## Reconsume.
7887            return  ($self->{ct}); # ENTITY
7888            redo A;
7889          } else {
7890            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7891            
7892        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7893          $self->{line_prev} = $self->{line};
7894          $self->{column_prev} = $self->{column};
7895          $self->{column}++;
7896          $self->{nc}
7897              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7898        } else {
7899          $self->{set_nc}->($self);
7900        }
7901      
7902            redo A;
7903          }
7904        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7905          if ($is_space->{$self->{nc}} or
7906              {
7907                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7908                $self->{entity_add} => 1,
7909              }->{$self->{nc}}) {
7910            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
7911                            line => $self->{line_prev},
7912                            column => $self->{column_prev}
7913                                + ($self->{nc} == -1 ? 1 : 0));
7914            ## Don't consume
7915            ## Return nothing.
7916            #
7917          } elsif ($self->{nc} == 0x0023) { # #
7918            $self->{ca} = $self->{ct};
7919            $self->{state} = ENTITY_HASH_STATE;
7920            $self->{kwd} = '#';
7921            
7922        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7923          $self->{line_prev} = $self->{line};
7924          $self->{column_prev} = $self->{column};
7925          $self->{column}++;
7926          $self->{nc}
7927              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7928        } else {
7929          $self->{set_nc}->($self);
7930        }
7931      
7932            redo A;
7933          } else {
7934            #
7935          }
7936    
7937          $self->{ct}->{value} .= '&';
7938          $self->{state} = $self->{prev_state};
7939          ## Reconsume.
7940          redo A;
7941        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
7942          if ($is_space->{$self->{nc}}) {
7943            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
7944            
7945        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7946          $self->{line_prev} = $self->{line};
7947          $self->{column_prev} = $self->{column};
7948          $self->{column}++;
7949          $self->{nc}
7950              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7951        } else {
7952          $self->{set_nc}->($self);
7953        }
7954      
7955            redo A;
7956          } elsif ($self->{nc} == 0x0028) { # (
7957            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
7958            $self->{ct}->{content} = ['('];
7959            $self->{group_depth} = 1;
7960            
7961        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7962          $self->{line_prev} = $self->{line};
7963          $self->{column_prev} = $self->{column};
7964          $self->{column}++;
7965          $self->{nc}
7966              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7967        } else {
7968          $self->{set_nc}->($self);
7969        }
7970      
7971            redo A;
7972          } elsif ($self->{nc} == 0x003E) { # >
7973            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
7974            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7975            
7976        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7977          $self->{line_prev} = $self->{line};
7978          $self->{column_prev} = $self->{column};
7979          $self->{column}++;
7980          $self->{nc}
7981              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7982        } else {
7983          $self->{set_nc}->($self);
7984        }
7985      
7986            return  ($self->{ct}); # ELEMENT
7987            redo A;
7988          } elsif ($self->{nc} == -1) {
7989            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7990            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7991            
7992        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7993          $self->{line_prev} = $self->{line};
7994          $self->{column_prev} = $self->{column};
7995          $self->{column}++;
7996          $self->{nc}
7997              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7998        } else {
7999          $self->{set_nc}->($self);
8000        }
8001      
8002            return  ($self->{ct}); # ELEMENT
8003            redo A;
8004          } else {
8005            $self->{ct}->{content} = [chr $self->{nc}];
8006            $self->{state} = CONTENT_KEYWORD_STATE;
8007            
8008        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8009          $self->{line_prev} = $self->{line};
8010          $self->{column_prev} = $self->{column};
8011          $self->{column}++;
8012          $self->{nc}
8013              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8014        } else {
8015          $self->{set_nc}->($self);
8016        }
8017      
8018            redo A;
8019          }
8020        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8021          if ($is_space->{$self->{nc}}) {
8022            $self->{state} = AFTER_MD_DEF_STATE;
8023            
8024        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8025          $self->{line_prev} = $self->{line};
8026          $self->{column_prev} = $self->{column};
8027          $self->{column}++;
8028          $self->{nc}
8029              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8030        } else {
8031          $self->{set_nc}->($self);
8032        }
8033      
8034            redo A;
8035          } elsif ($self->{nc} == 0x003E) { # >
8036            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8037            
8038        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8039          $self->{line_prev} = $self->{line};
8040          $self->{column_prev} = $self->{column};
8041          $self->{column}++;
8042          $self->{nc}
8043              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8044        } else {
8045          $self->{set_nc}->($self);
8046        }
8047      
8048            return  ($self->{ct}); # ELEMENT
8049            redo A;
8050          } elsif ($self->{nc} == -1) {
8051            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8052            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8053            
8054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8055          $self->{line_prev} = $self->{line};
8056          $self->{column_prev} = $self->{column};
8057          $self->{column}++;
8058          $self->{nc}
8059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8060        } else {
8061          $self->{set_nc}->($self);
8062        }
8063      
8064            return  ($self->{ct}); # ELEMENT
8065            redo A;
8066          } else {
8067            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8068            ## Stay in the state.
8069            
8070        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8071          $self->{line_prev} = $self->{line};
8072          $self->{column_prev} = $self->{column};
8073          $self->{column}++;
8074          $self->{nc}
8075              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8076        } else {
8077          $self->{set_nc}->($self);
8078        }
8079      
8080            redo A;
8081          }
8082        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8083          if ($is_space->{$self->{nc}}) {
8084            ## Stay in the state.
8085            
8086        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8087          $self->{line_prev} = $self->{line};
8088          $self->{column_prev} = $self->{column};
8089          $self->{column}++;
8090          $self->{nc}
8091              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8092        } else {
8093          $self->{set_nc}->($self);
8094        }
8095      
8096            redo A;
8097          } elsif ($self->{nc} == 0x0028) { # (
8098            $self->{group_depth}++;
8099            push @{$self->{ct}->{content}}, chr $self->{nc};
8100            ## Stay in the state.
8101            
8102        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8103          $self->{line_prev} = $self->{line};
8104          $self->{column_prev} = $self->{column};
8105          $self->{column}++;
8106          $self->{nc}
8107              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8108        } else {
8109          $self->{set_nc}->($self);
8110        }
8111      
8112            redo A;
8113          } elsif ($self->{nc} == 0x007C or # |
8114                   $self->{nc} == 0x002C) { # ,
8115            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8116            ## Stay in the state.
8117            
8118        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8119          $self->{line_prev} = $self->{line};
8120          $self->{column_prev} = $self->{column};
8121          $self->{column}++;
8122          $self->{nc}
8123              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8124        } else {
8125          $self->{set_nc}->($self);
8126        }
8127      
8128            redo A;
8129          } elsif ($self->{nc} == 0x0029) { # )
8130            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8131            push @{$self->{ct}->{content}}, chr $self->{nc};
8132            $self->{group_depth}--;
8133            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8134            
8135        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8136          $self->{line_prev} = $self->{line};
8137          $self->{column_prev} = $self->{column};
8138          $self->{column}++;
8139          $self->{nc}
8140              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8141        } else {
8142          $self->{set_nc}->($self);
8143        }
8144      
8145            redo A;
8146          } elsif ($self->{nc} == 0x003E) { # >
8147            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8148            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8149            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8150            
8151        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8152          $self->{line_prev} = $self->{line};
8153          $self->{column_prev} = $self->{column};
8154          $self->{column}++;
8155          $self->{nc}
8156              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8157        } else {
8158          $self->{set_nc}->($self);
8159        }
8160      
8161            return  ($self->{ct}); # ELEMENT
8162            redo A;
8163          } elsif ($self->{nc} == -1) {
8164            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8165            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8166            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8167            
8168        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8169          $self->{line_prev} = $self->{line};
8170          $self->{column_prev} = $self->{column};
8171          $self->{column}++;
8172          $self->{nc}
8173              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8174        } else {
8175          $self->{set_nc}->($self);
8176        }
8177      
8178            return  ($self->{ct}); # ELEMENT
8179            redo A;
8180          } else {
8181            push @{$self->{ct}->{content}}, chr $self->{nc};
8182            $self->{state} = CM_ELEMENT_NAME_STATE;
8183            
8184        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8185          $self->{line_prev} = $self->{line};
8186          $self->{column_prev} = $self->{column};
8187          $self->{column}++;
8188          $self->{nc}
8189              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8190        } else {
8191          $self->{set_nc}->($self);
8192        }
8193      
8194            redo A;
8195          }
8196        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8197          if ($is_space->{$self->{nc}}) {
8198            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8199            
8200        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8201          $self->{line_prev} = $self->{line};
8202          $self->{column_prev} = $self->{column};
8203          $self->{column}++;
8204          $self->{nc}
8205              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8206        } else {
8207          $self->{set_nc}->($self);
8208        }
8209      
8210            redo A;
8211          } elsif ($self->{nc} == 0x002A or # *
8212                   $self->{nc} == 0x002B or # +
8213                   $self->{nc} == 0x003F) { # ?
8214            push @{$self->{ct}->{content}}, chr $self->{nc};
8215            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8216            
8217        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8218          $self->{line_prev} = $self->{line};
8219          $self->{column_prev} = $self->{column};
8220          $self->{column}++;
8221          $self->{nc}
8222              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8223        } else {
8224          $self->{set_nc}->($self);
8225        }
8226      
8227            redo A;
8228          } elsif ($self->{nc} == 0x007C or # |
8229                   $self->{nc} == 0x002C) { # ,
8230            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8231            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8232            
8233        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8234          $self->{line_prev} = $self->{line};
8235          $self->{column_prev} = $self->{column};
8236          $self->{column}++;
8237          $self->{nc}
8238              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8239        } else {
8240          $self->{set_nc}->($self);
8241        }
8242      
8243            redo A;
8244          } elsif ($self->{nc} == 0x0029) { # )
8245            $self->{group_depth}--;
8246            push @{$self->{ct}->{content}}, chr $self->{nc};
8247            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8248            
8249        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8250          $self->{line_prev} = $self->{line};
8251          $self->{column_prev} = $self->{column};
8252          $self->{column}++;
8253          $self->{nc}
8254              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8255        } else {
8256          $self->{set_nc}->($self);
8257        }
8258      
8259            redo A;
8260          } elsif ($self->{nc} == 0x003E) { # >
8261            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8262            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8263            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8264            
8265        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8266          $self->{line_prev} = $self->{line};
8267          $self->{column_prev} = $self->{column};
8268          $self->{column}++;
8269          $self->{nc}
8270              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8271        } else {
8272          $self->{set_nc}->($self);
8273        }
8274      
8275            return  ($self->{ct}); # ELEMENT
8276            redo A;
8277          } elsif ($self->{nc} == -1) {
8278            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8279            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8280            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8281            
8282        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8283          $self->{line_prev} = $self->{line};
8284          $self->{column_prev} = $self->{column};
8285          $self->{column}++;
8286          $self->{nc}
8287              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8288        } else {
8289          $self->{set_nc}->($self);
8290        }
8291      
8292            return  ($self->{ct}); # ELEMENT
8293            redo A;
8294          } else {
8295            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8296            ## Stay in the state.
8297            
8298        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8299          $self->{line_prev} = $self->{line};
8300          $self->{column_prev} = $self->{column};
8301          $self->{column}++;
8302          $self->{nc}
8303              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8304        } else {
8305          $self->{set_nc}->($self);
8306        }
8307      
8308            redo A;
8309          }
8310        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8311          if ($is_space->{$self->{nc}}) {
8312            ## Stay in the state.
8313            
8314        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8315          $self->{line_prev} = $self->{line};
8316          $self->{column_prev} = $self->{column};
8317          $self->{column}++;
8318          $self->{nc}
8319              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8320        } else {
8321          $self->{set_nc}->($self);
8322        }
8323      
8324            redo A;
8325          } elsif ($self->{nc} == 0x007C or # |
8326                   $self->{nc} == 0x002C) { # ,
8327            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8328            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8329            
8330        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8331          $self->{line_prev} = $self->{line};
8332          $self->{column_prev} = $self->{column};
8333          $self->{column}++;
8334          $self->{nc}
8335              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8336        } else {
8337          $self->{set_nc}->($self);
8338        }
8339      
8340            redo A;
8341          } elsif ($self->{nc} == 0x0029) { # )
8342            $self->{group_depth}--;
8343            push @{$self->{ct}->{content}}, chr $self->{nc};
8344            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8345            
8346        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8347          $self->{line_prev} = $self->{line};
8348          $self->{column_prev} = $self->{column};
8349          $self->{column}++;
8350          $self->{nc}
8351              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8352        } else {
8353          $self->{set_nc}->($self);
8354        }
8355      
8356            redo A;
8357          } elsif ($self->{nc} == 0x003E) { # >
8358            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8359            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8360            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8361            
8362        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8363          $self->{line_prev} = $self->{line};
8364          $self->{column_prev} = $self->{column};
8365          $self->{column}++;
8366          $self->{nc}
8367              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8368        } else {
8369          $self->{set_nc}->($self);
8370        }
8371      
8372            return  ($self->{ct}); # ELEMENT
8373            redo A;
8374          } elsif ($self->{nc} == -1) {
8375            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8376            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8377            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8378            
8379        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8380          $self->{line_prev} = $self->{line};
8381          $self->{column_prev} = $self->{column};
8382          $self->{column}++;
8383          $self->{nc}
8384              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8385        } else {
8386          $self->{set_nc}->($self);
8387        }
8388      
8389            return  ($self->{ct}); # ELEMENT
8390            redo A;
8391          } else {
8392            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8393            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8394            $self->{state} = BOGUS_MD_STATE;
8395            
8396        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8397          $self->{line_prev} = $self->{line};
8398          $self->{column_prev} = $self->{column};
8399          $self->{column}++;
8400          $self->{nc}
8401              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8402        } else {
8403          $self->{set_nc}->($self);
8404        }
8405      
8406            redo A;
8407          }
8408        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8409          if ($is_space->{$self->{nc}}) {
8410            if ($self->{group_depth}) {
8411              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8412            } else {
8413              $self->{state} = AFTER_MD_DEF_STATE;
8414            }
8415            
8416        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8417          $self->{line_prev} = $self->{line};
8418          $self->{column_prev} = $self->{column};
8419          $self->{column}++;
8420          $self->{nc}
8421              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8422        } else {
8423          $self->{set_nc}->($self);
8424        }
8425      
8426            redo A;
8427          } elsif ($self->{nc} == 0x002A or # *
8428                   $self->{nc} == 0x002B or # +
8429                   $self->{nc} == 0x003F) { # ?
8430            push @{$self->{ct}->{content}}, chr $self->{nc};
8431            if ($self->{group_depth}) {
8432              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8433            } else {
8434              $self->{state} = AFTER_MD_DEF_STATE;
8435            }
8436            
8437        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8438          $self->{line_prev} = $self->{line};
8439          $self->{column_prev} = $self->{column};
8440          $self->{column}++;
8441          $self->{nc}
8442              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8443        } else {
8444          $self->{set_nc}->($self);
8445        }
8446      
8447            redo A;
8448          } elsif ($self->{nc} == 0x0029) { # )
8449            if ($self->{group_depth}) {
8450              $self->{group_depth}--;
8451              push @{$self->{ct}->{content}}, chr $self->{nc};
8452              ## Stay in the state.
8453              
8454        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8455          $self->{line_prev} = $self->{line};
8456          $self->{column_prev} = $self->{column};
8457          $self->{column}++;
8458          $self->{nc}
8459              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8460        } else {
8461          $self->{set_nc}->($self);
8462        }
8463      
8464              redo A;
8465            } else {
8466              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8467              $self->{state} = BOGUS_MD_STATE;
8468              ## Reconsume.
8469              redo A;
8470            }
8471          } elsif ($self->{nc} == 0x003E) { # >
8472            if ($self->{group_depth}) {
8473              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8474              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8475            }
8476            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8477            
8478        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8479          $self->{line_prev} = $self->{line};
8480          $self->{column_prev} = $self->{column};
8481          $self->{column}++;
8482          $self->{nc}
8483              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8484        } else {
8485          $self->{set_nc}->($self);
8486        }
8487      
8488            return  ($self->{ct}); # ELEMENT
8489            redo A;
8490          } elsif ($self->{nc} == -1) {
8491            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8492            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8493            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8494            
8495        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8496          $self->{line_prev} = $self->{line};
8497          $self->{column_prev} = $self->{column};
8498          $self->{column}++;
8499          $self->{nc}
8500              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8501        } else {
8502          $self->{set_nc}->($self);
8503        }
8504      
8505            return  ($self->{ct}); # ELEMENT
8506            redo A;
8507          } else {
8508            if ($self->{group_depth}) {
8509              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8510            } else {
8511              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8512              $self->{state} = BOGUS_MD_STATE;
8513            }
8514            ## Reconsume.
8515            redo A;
8516          }
8517        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8518          if ($is_space->{$self->{nc}}) {
8519            ## Stay in the state.
8520            
8521        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8522          $self->{line_prev} = $self->{line};
8523          $self->{column_prev} = $self->{column};
8524          $self->{column}++;
8525          $self->{nc}
8526              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8527        } else {
8528          $self->{set_nc}->($self);
8529        }
8530      
8531            redo A;
8532          } elsif ($self->{nc} == 0x003E) { # >
8533            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8534            
8535        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8536          $self->{line_prev} = $self->{line};
8537          $self->{column_prev} = $self->{column};
8538          $self->{column}++;
8539          $self->{nc}
8540              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8541        } else {
8542          $self->{set_nc}->($self);
8543        }
8544      
8545            return  ($self->{ct}); # ENTITY/ELEMENT
8546            redo A;
8547          } elsif ($self->{nc} == -1) {
8548            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8549            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8550            
8551        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8552          $self->{line_prev} = $self->{line};
8553          $self->{column_prev} = $self->{column};
8554          $self->{column}++;
8555          $self->{nc}
8556              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8557        } else {
8558          $self->{set_nc}->($self);
8559        }
8560      
8561            return  ($self->{ct}); # ENTITY/ELEMENT
8562            redo A;
8563          } else {
8564            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8565            $self->{state} = BOGUS_MD_STATE;
8566            ## Reconsume.
8567            redo A;
8568          }
8569        } elsif ($self->{state} == BOGUS_MD_STATE) {
8570          if ($self->{nc} == 0x003E) { # >
8571            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8572            
8573        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8574          $self->{line_prev} = $self->{line};
8575          $self->{column_prev} = $self->{column};
8576          $self->{column}++;
8577          $self->{nc}
8578              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8579        } else {
8580          $self->{set_nc}->($self);
8581        }
8582      
8583            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8584            redo A;
8585          } elsif ($self->{nc} == -1) {
8586            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8587            ## Reconsume.
8588            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8589            redo A;
8590          } else {
8591            ## Stay in the state.
8592            
8593        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8594          $self->{line_prev} = $self->{line};
8595          $self->{column_prev} = $self->{column};
8596          $self->{column}++;
8597          $self->{nc}
8598              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8599        } else {
8600          $self->{set_nc}->($self);
8601        }
8602      
8603            redo A;
8604          }
8605      } else {      } else {
8606        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8607      }      }
# Line 4252  sub _get_next_token ($) { Line 8612  sub _get_next_token ($) {
8612    
8613  1;  1;
8614  ## $Date$  ## $Date$
8615                                    

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.23

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24