/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Tue Oct 14 14:38:59 2008 UTC revision 1.24 by wakaba, Sun Oct 19 14:05:20 2008 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145    ## XML-only states
146    sub PI_STATE () { 51 }
147    sub PI_TARGET_STATE () { 52 }
148    sub PI_TARGET_AFTER_STATE () { 53 }
149    sub PI_DATA_STATE () { 54 }
150    sub PI_AFTER_STATE () { 55 }
151    sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
200    
# Line 178  sub _initialize_tokenizer ($) { Line 259  sub _initialize_tokenizer ($) {
259    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
260    
261    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
262    $self->{s_kwd} = ''; # state keyword    $self->{s_kwd} = ''; # Data state keyword
263      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
264    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
265    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
266    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 208  sub _initialize_tokenizer ($) { Line 290  sub _initialize_tokenizer ($) {
290    
291  ## A token has:  ## A token has:
292  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
293  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
294  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
295  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
296    ##   ->{target} (PI_TOKEN)
297  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
298  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
299  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 218  sub _initialize_tokenizer ($) { Line 301  sub _initialize_tokenizer ($) {
301  ##        ->{name}  ##        ->{name}
302  ##        ->{value}  ##        ->{value}
303  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
304  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
305    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
306    ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
307    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
308    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
309    
310  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
311  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
312  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 238  my $is_space = { Line 326  my $is_space = {
326    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
327    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
328    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
329    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
330    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
331    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
332  };  };
# Line 498  sub _get_next_token ($) { Line 586  sub _get_next_token ($) {
586        return  ($token);        return  ($token);
587        redo A;        redo A;
588      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
589          ## XML5: "tag state".
590    
591        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
592          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
593                        
# Line 516  sub _get_next_token ($) { Line 606  sub _get_next_token ($) {
606            redo A;            redo A;
607          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
608                        
609            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
610            #            #
611          } else {          } else {
612                        
613              $self->{s_kwd} = '';
614            #            #
615          }          }
616    
617          ## reconsume          ## reconsume
618          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
         $self->{s_kwd} = '';  
619          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
620                    line => $self->{line_prev},                    line => $self->{line_prev},
621                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 629  sub _get_next_token ($) { Line 719  sub _get_next_token ($) {
719    
720            redo A;            redo A;
721          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
722                        if ($self->{is_xml}) {
723            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
724                            line => $self->{line_prev},              $self->{state} = PI_STATE;
725                            column => $self->{column_prev});              
726            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
727            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
728                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
729                                      column => $self->{column_prev},        $self->{column}++;
730                                     };        $self->{nc}
731            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
732            redo A;      } else {
733          } else {        $self->{set_nc}->($self);
734        }
735      
736                redo A;
737              } else {
738                
739                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
740                                line => $self->{line_prev},
741                                column => $self->{column_prev});
742                $self->{state} = BOGUS_COMMENT_STATE;
743                $self->{ct} = {type => COMMENT_TOKEN, data => '',
744                               line => $self->{line_prev},
745                               column => $self->{column_prev},
746                              };
747                ## $self->{nc} is intentionally left as is
748                redo A;
749              }
750            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
751                        
752            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
753                            line => $self->{line_prev},                            line => $self->{line_prev},
# Line 655  sub _get_next_token ($) { Line 762  sub _get_next_token ($) {
762                     });                     });
763    
764            redo A;            redo A;
765            } else {
766              ## XML5: "<:" is a parse error.
767              
768              $self->{ct} = {type => START_TAG_TOKEN,
769                                        tag_name => chr ($self->{nc}),
770                                        line => $self->{line_prev},
771                                        column => $self->{column_prev}};
772              $self->{state} = TAG_NAME_STATE;
773              
774        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
775          $self->{line_prev} = $self->{line};
776          $self->{column_prev} = $self->{column};
777          $self->{column}++;
778          $self->{nc}
779              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
780        } else {
781          $self->{set_nc}->($self);
782        }
783      
784              redo A;
785          }          }
786        } else {        } else {
787          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 663  sub _get_next_token ($) { Line 790  sub _get_next_token ($) {
790        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
791        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
792    
793          ## XML5: "end tag state".
794    
795        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
796        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
797          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
798            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
799            $self->{s_kwd} = '';            $self->{kwd} = '';
800            ## Reconsume.            ## Reconsume.
801            redo A;            redo A;
802          } else {          } else {
# Line 724  sub _get_next_token ($) { Line 853  sub _get_next_token ($) {
853        
854          redo A;          redo A;
855        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
856          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
857                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
858                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
859          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
860          $self->{s_kwd} = '';          $self->{s_kwd} = '';
861                    if ($self->{is_xml}) {
862              
863              ## XML5: No parse error.
864              
865              ## NOTE: This parser raises a parse error, since it supports
866              ## XML1, not XML5.
867    
868              ## NOTE: A short end tag token.
869              my $ct = {type => END_TAG_TOKEN,
870                        tag_name => '',
871                        line => $self->{line_prev},
872                        column => $self->{column_prev} - 1,
873                       };
874              
875        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
876          $self->{line_prev} = $self->{line};
877          $self->{column_prev} = $self->{column};
878          $self->{column}++;
879          $self->{nc}
880              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
881        } else {
882          $self->{set_nc}->($self);
883        }
884      
885              return  ($ct);
886            } else {
887              
888              
889      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
890        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
891        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 741  sub _get_next_token ($) { Line 896  sub _get_next_token ($) {
896        $self->{set_nc}->($self);        $self->{set_nc}->($self);
897      }      }
898        
899            }
900          redo A;          redo A;
901        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
902                    
# Line 754  sub _get_next_token ($) { Line 910  sub _get_next_token ($) {
910                   });                   });
911    
912          redo A;          redo A;
913        } else {        } elsif (not $self->{is_xml} or
914                   $is_space->{$self->{nc}}) {
915                    
916          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
917                            line => $self->{line_prev}, # "<" of "</"
918                            column => $self->{column_prev} - 1);
919          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
920          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
921                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 769  sub _get_next_token ($) { Line 928  sub _get_next_token ($) {
928          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
929          ## "bogus comment state" entry.          ## "bogus comment state" entry.
930          redo A;          redo A;
931          } else {
932            ## XML5: "</:" is a parse error.
933            
934            $self->{ct} = {type => END_TAG_TOKEN,
935                           tag_name => chr ($self->{nc}),
936                           line => $l, column => $c};
937            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
938            
939        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
940          $self->{line_prev} = $self->{line};
941          $self->{column_prev} = $self->{column};
942          $self->{column}++;
943          $self->{nc}
944              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
945        } else {
946          $self->{set_nc}->($self);
947        }
948      
949            redo A;
950        }        }
951      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
952        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
953        if (length $ch) {        if (length $ch) {
954          my $CH = $ch;          my $CH = $ch;
955          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 779  sub _get_next_token ($) { Line 957  sub _get_next_token ($) {
957          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
958                        
959            ## Stay in the state.            ## Stay in the state.
960            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
961                        
962      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
963        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 798  sub _get_next_token ($) { Line 976  sub _get_next_token ($) {
976            $self->{s_kwd} = '';            $self->{s_kwd} = '';
977            ## Reconsume.            ## Reconsume.
978            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
979                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
980                      line => $self->{line_prev},                      line => $self->{line_prev},
981                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
982                     });                     });
983            redo A;            redo A;
984          }          }
# Line 816  sub _get_next_token ($) { Line 994  sub _get_next_token ($) {
994            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
995            $self->{s_kwd} = '';            $self->{s_kwd} = '';
996            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
997                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
998                      line => $self->{line_prev},                      line => $self->{line_prev},
999                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
1000                     });                     });
1001            redo A;            redo A;
1002          } else {          } else {
# Line 827  sub _get_next_token ($) { Line 1005  sub _get_next_token ($) {
1005                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
1006                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
1007                   line => $self->{line_prev},                   line => $self->{line_prev},
1008                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
1009            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
1010            ## Reconsume.            ## Reconsume.
1011            redo A;            redo A;
# Line 959  sub _get_next_token ($) { Line 1137  sub _get_next_token ($) {
1137          redo A;          redo A;
1138        }        }
1139      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1140          ## XML5: "Tag attribute name before state".
1141    
1142        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1143                    
1144          ## Stay in the state          ## Stay in the state
# Line 1071  sub _get_next_token ($) { Line 1251  sub _get_next_token ($) {
1251               0x003D => 1, # =               0x003D => 1, # =
1252              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1253                        
1254              ## XML5: Not a parse error.
1255            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1256          } else {          } else {
1257                        
1258              ## XML5: ":" raises a parse error and is ignored.
1259          }          }
1260          $self->{ca}          $self->{ca}
1261              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1094  sub _get_next_token ($) { Line 1276  sub _get_next_token ($) {
1276          redo A;          redo A;
1277        }        }
1278      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1279          ## XML5: "Tag attribute name state".
1280    
1281        my $before_leave = sub {        my $before_leave = sub {
1282          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1283              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1104  sub _get_next_token ($) { Line 1288  sub _get_next_token ($) {
1288                        
1289            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1290              = $self->{ca};              = $self->{ca};
1291              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1292          }          }
1293        }; # $before_leave        }; # $before_leave
1294    
# Line 1140  sub _get_next_token ($) { Line 1325  sub _get_next_token ($) {
1325        
1326          redo A;          redo A;
1327        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1328            if ($self->{is_xml}) {
1329              
1330              ## XML5: Not a parse error.
1331              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1332            } else {
1333              
1334            }
1335    
1336          $before_leave->();          $before_leave->();
1337          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1338                        
# Line 1189  sub _get_next_token ($) { Line 1382  sub _get_next_token ($) {
1382        
1383          redo A;          redo A;
1384        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1385            if ($self->{is_xml}) {
1386              
1387              ## XML5: Not a parse error.
1388              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1389            } else {
1390              
1391            }
1392                    
1393          $before_leave->();          $before_leave->();
1394          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1233  sub _get_next_token ($) { Line 1433  sub _get_next_token ($) {
1433          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1434              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1435                        
1436              ## XML5: Not a parse error.
1437            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1438          } else {          } else {
1439                        
# Line 1253  sub _get_next_token ($) { Line 1454  sub _get_next_token ($) {
1454          redo A;          redo A;
1455        }        }
1456      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1457          ## XML5: "Tag attribute name after state".
1458          
1459        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1460                    
1461          ## Stay in the state          ## Stay in the state
# Line 1284  sub _get_next_token ($) { Line 1487  sub _get_next_token ($) {
1487        
1488          redo A;          redo A;
1489        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1490            if ($self->{is_xml}) {
1491              
1492              ## XML5: Not a parse error.
1493              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1494            } else {
1495              
1496            }
1497    
1498          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1499                        
1500            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1337  sub _get_next_token ($) { Line 1548  sub _get_next_token ($) {
1548        
1549          redo A;          redo A;
1550        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1551            if ($self->{is_xml}) {
1552              
1553              ## XML5: Not a parse error.
1554              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1555            } else {
1556              
1557            }
1558                    
1559          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1560                    
# Line 1376  sub _get_next_token ($) { Line 1594  sub _get_next_token ($) {
1594    
1595          redo A;          redo A;
1596        } else {        } else {
1597            if ($self->{is_xml}) {
1598              
1599              ## XML5: Not a parse error.
1600              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1601            } else {
1602              
1603            }
1604    
1605          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1606              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1607                        
1608              ## XML5: Not a parse error.
1609            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1610          } else {          } else {
1611                        
# Line 1402  sub _get_next_token ($) { Line 1629  sub _get_next_token ($) {
1629          redo A;                  redo A;        
1630        }        }
1631      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1632          ## XML5: "Tag attribute value before state".
1633    
1634        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1635                    
1636          ## Stay in the state          ## Stay in the state
# Line 1513  sub _get_next_token ($) { Line 1742  sub _get_next_token ($) {
1742        } else {        } else {
1743          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D) { # =
1744                        
1745              ## XML5: Not a parse error.
1746            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1747            } elsif ($self->{is_xml}) {
1748              
1749              ## XML5: No parse error.
1750              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1751          } else {          } else {
1752                        
1753          }          }
# Line 1533  sub _get_next_token ($) { Line 1767  sub _get_next_token ($) {
1767          redo A;          redo A;
1768        }        }
1769      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1770          ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1771          ## ATTLIST attribute value double quoted state".
1772          
1773        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1774                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1775          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1776              ## XML5: "DOCTYPE ATTLIST name after state".
1777              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1778              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1779            } else {
1780              
1781              ## XML5: "Tag attribute name before state".
1782              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1783            }
1784                    
1785      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1786        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1550  sub _get_next_token ($) { Line 1795  sub _get_next_token ($) {
1795          redo A;          redo A;
1796        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1797                    
1798            ## XML5: Not defined yet.
1799    
1800          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1801          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1802          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1574  sub _get_next_token ($) { Line 1821  sub _get_next_token ($) {
1821          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1822                        
1823            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1824    
1825              $self->{state} = DATA_STATE;
1826              $self->{s_kwd} = '';
1827              ## reconsume
1828              return  ($self->{ct}); # start tag
1829              redo A;
1830          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1831            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1832            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1583  sub _get_next_token ($) { Line 1836  sub _get_next_token ($) {
1836              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1837                            
1838            }            }
1839    
1840              $self->{state} = DATA_STATE;
1841              $self->{s_kwd} = '';
1842              ## reconsume
1843              return  ($self->{ct}); # end tag
1844              redo A;
1845            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1846              ## XML5: No parse error above; not defined yet.
1847              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1848              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1849              ## Reconsume.
1850              return  ($self->{ct}); # ATTLIST
1851              redo A;
1852          } else {          } else {
1853            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1854          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1855        } else {        } else {
1856                    ## XML5 [ATTLIST]: Not defined yet.
1857            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1858              
1859              ## XML5: Not a parse error.
1860              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1861            } else {
1862              
1863            }
1864          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1865          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1866                                q["&],                                q["&<],
1867                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1868    
1869          ## Stay in the state          ## Stay in the state
# Line 1615  sub _get_next_token ($) { Line 1881  sub _get_next_token ($) {
1881          redo A;          redo A;
1882        }        }
1883      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1884          ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1885          ## ATTLIST attribute value single quoted state".
1886    
1887        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1888                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1889          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1890              ## XML5: "DOCTYPE ATTLIST name after state".
1891              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1892              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1893            } else {
1894              
1895              ## XML5: "Before attribute name state" (sic).
1896              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1897            }
1898                    
1899      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1900        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1632  sub _get_next_token ($) { Line 1909  sub _get_next_token ($) {
1909          redo A;          redo A;
1910        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1911                    
1912            ## XML5: Not defined yet.
1913    
1914          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1915          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1916          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1656  sub _get_next_token ($) { Line 1935  sub _get_next_token ($) {
1935          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1936                        
1937            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1938    
1939              $self->{state} = DATA_STATE;
1940              $self->{s_kwd} = '';
1941              ## reconsume
1942              return  ($self->{ct}); # start tag
1943              redo A;
1944          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1945            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1946            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1665  sub _get_next_token ($) { Line 1950  sub _get_next_token ($) {
1950              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1951                            
1952            }            }
1953    
1954              $self->{state} = DATA_STATE;
1955              $self->{s_kwd} = '';
1956              ## reconsume
1957              return  ($self->{ct}); # end tag
1958              redo A;
1959            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1960              ## XML5: No parse error above; not defined yet.
1961              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1962              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1963              ## Reconsume.
1964              return  ($self->{ct}); # ATTLIST
1965              redo A;
1966          } else {          } else {
1967            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1968          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1969        } else {        } else {
1970                    ## XML5 [ATTLIST]: Not defined yet.
1971            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1972              
1973              ## XML5: Not a parse error.
1974              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1975            } else {
1976              
1977            }
1978          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1979          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1980                                q['&],                                q['&<],
1981                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1982    
1983          ## Stay in the state          ## Stay in the state
# Line 1697  sub _get_next_token ($) { Line 1995  sub _get_next_token ($) {
1995          redo A;          redo A;
1996        }        }
1997      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1998          ## XML5: "Tag attribute value unquoted state".
1999    
2000        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2001                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2002          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            
2003              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2004              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2005            } else {
2006              
2007              ## XML5: "Tag attribute name before state".
2008              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2009            }
2010                    
2011      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2012        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1714  sub _get_next_token ($) { Line 2021  sub _get_next_token ($) {
2021          redo A;          redo A;
2022        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
2023                    
2024    
2025            ## XML5: Not defined yet.
2026    
2027          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
2028          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
2029          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1737  sub _get_next_token ($) { Line 2047  sub _get_next_token ($) {
2047          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2048                        
2049            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2050    
2051              $self->{state} = DATA_STATE;
2052              $self->{s_kwd} = '';
2053              
2054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2055          $self->{line_prev} = $self->{line};
2056          $self->{column_prev} = $self->{column};
2057          $self->{column}++;
2058          $self->{nc}
2059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2060        } else {
2061          $self->{set_nc}->($self);
2062        }
2063      
2064              return  ($self->{ct}); # start tag
2065              redo A;
2066          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2067            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2068            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1746  sub _get_next_token ($) { Line 2072  sub _get_next_token ($) {
2072              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2073                            
2074            }            }
2075          } else {  
2076            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2077          }            $self->{s_kwd} = '';
2078          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2079      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2080        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2081        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1762  sub _get_next_token ($) { Line 2086  sub _get_next_token ($) {
2086        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2087      }      }
2088        
2089              return  ($self->{ct}); # end tag
2090          return  ($self->{ct}); # start tag or end tag            redo A;
2091            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2092          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2093              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2094              
2095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2096          $self->{line_prev} = $self->{line};
2097          $self->{column_prev} = $self->{column};
2098          $self->{column}++;
2099          $self->{nc}
2100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2101        } else {
2102          $self->{set_nc}->($self);
2103        }
2104      
2105              return  ($self->{ct}); # ATTLIST
2106              redo A;
2107            } else {
2108              die "$0: $self->{ct}->{type}: Unknown token type";
2109            }
2110        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2111          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2112                        
2113              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2114            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2115    
2116              $self->{state} = DATA_STATE;
2117              $self->{s_kwd} = '';
2118              ## reconsume
2119              return  ($self->{ct}); # start tag
2120              redo A;
2121          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2122              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2123            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2124            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2125                            
# Line 1780  sub _get_next_token ($) { Line 2128  sub _get_next_token ($) {
2128              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2129                            
2130            }            }
2131    
2132              $self->{state} = DATA_STATE;
2133              $self->{s_kwd} = '';
2134              ## reconsume
2135              return  ($self->{ct}); # end tag
2136              redo A;
2137            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2138              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2139              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2140              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2141              ## Reconsume.
2142              return  ($self->{ct}); # ATTLIST
2143              redo A;
2144          } else {          } else {
2145            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2146          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2147        } else {        } else {
2148          if ({          if ({
2149               0x0022 => 1, # "               0x0022 => 1, # "
# Line 1797  sub _get_next_token ($) { Line 2151  sub _get_next_token ($) {
2151               0x003D => 1, # =               0x003D => 1, # =
2152              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2153                        
2154              ## XML5: Not a parse error.
2155            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
2156          } else {          } else {
2157                        
# Line 1913  sub _get_next_token ($) { Line 2268  sub _get_next_token ($) {
2268          redo A;          redo A;
2269        }        }
2270      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2271          ## XML5: "Empty tag state".
2272    
2273        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2274          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2275                        
# Line 1964  sub _get_next_token ($) { Line 2321  sub _get_next_token ($) {
2321          } else {          } else {
2322            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2323          }          }
2324            ## XML5: "Tag attribute name before state".
2325          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2326          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2327          ## Reconsume.          ## Reconsume.
# Line 1978  sub _get_next_token ($) { Line 2336  sub _get_next_token ($) {
2336          redo A;          redo A;
2337        }        }
2338      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2339        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2340    
2341        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2342        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2343                
2344        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2345                    if ($self->{in_subset}) {
2346          $self->{state} = DATA_STATE;            
2347          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2348            } else {
2349              
2350              $self->{state} = DATA_STATE;
2351              $self->{s_kwd} = '';
2352            }
2353                    
2354      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2355        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2002  sub _get_next_token ($) { Line 2365  sub _get_next_token ($) {
2365          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2366          redo A;          redo A;
2367        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2368                    if ($self->{in_subset}) {
2369          $self->{state} = DATA_STATE;            
2370          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2371            } else {
2372              
2373              $self->{state} = DATA_STATE;
2374              $self->{s_kwd} = '';
2375            }
2376          ## reconsume          ## reconsume
2377    
2378          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2031  sub _get_next_token ($) { Line 2399  sub _get_next_token ($) {
2399          redo A;          redo A;
2400        }        }
2401      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2402        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2403                
2404        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2405                    
# Line 2053  sub _get_next_token ($) { Line 2421  sub _get_next_token ($) {
2421          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2422                    
2423          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2424          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2425                    
2426      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2427        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2072  sub _get_next_token ($) { Line 2440  sub _get_next_token ($) {
2440                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2441                                                    
2442          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2443          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2444                    
2445      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2446        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2106  sub _get_next_token ($) { Line 2474  sub _get_next_token ($) {
2474                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2475                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2476                                   };                                   };
2477          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2478                    
2479      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2480        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2142  sub _get_next_token ($) { Line 2510  sub _get_next_token ($) {
2510              0x0054, # T              0x0054, # T
2511              0x0059, # Y              0x0059, # Y
2512              0x0050, # P              0x0050, # P
2513            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2514            $self->{nc} == [            $self->{nc} == [
2515              undef,              undef,
2516              0x006F, # o              0x006F, # o
# Line 2150  sub _get_next_token ($) { Line 2518  sub _get_next_token ($) {
2518              0x0074, # t              0x0074, # t
2519              0x0079, # y              0x0079, # y
2520              0x0070, # p              0x0070, # p
2521            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2522                    
2523          ## Stay in the state.          ## Stay in the state.
2524          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2525                    
2526      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2527        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2166  sub _get_next_token ($) { Line 2534  sub _get_next_token ($) {
2534      }      }
2535        
2536          redo A;          redo A;
2537        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2538                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2539                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2540                    if ($self->{is_xml} and
2541                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2542              
2543              ## XML5: case-sensitive.
2544              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2545                              text => 'DOCTYPE',
2546                              line => $self->{line_prev},
2547                              column => $self->{column_prev} - 5);
2548            } else {
2549              
2550            }
2551          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2552          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2553                                    quirks => 1,                                    quirks => 1,
# Line 2192  sub _get_next_token ($) { Line 2570  sub _get_next_token ($) {
2570                                    
2571          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2572                          line => $self->{line_prev},                          line => $self->{line_prev},
2573                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2574          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2575          ## Reconsume.          ## Reconsume.
2576          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2577                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2578                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2579                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2580                                   };                                   };
2581          redo A;          redo A;
2582        }        }
# Line 2209  sub _get_next_token ($) { Line 2587  sub _get_next_token ($) {
2587              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2588              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2589              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2590            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2591                    
2592          ## Stay in the state.          ## Stay in the state.
2593          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2594                    
2595      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2596        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2225  sub _get_next_token ($) { Line 2603  sub _get_next_token ($) {
2603      }      }
2604        
2605          redo A;          redo A;
2606        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2607                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2608                    if ($self->{is_xml} and
2609                not $self->{tainted} and
2610                @{$self->{open_elements} or []} == 0) {
2611              
2612              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2613                              line => $self->{line_prev},
2614                              column => $self->{column_prev} - 7);
2615              $self->{tainted} = 1;
2616            } else {
2617              
2618            }
2619    
2620          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2621                                    data => '',                                    data => '',
2622                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2249  sub _get_next_token ($) { Line 2638  sub _get_next_token ($) {
2638                    
2639          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2640                          line => $self->{line_prev},                          line => $self->{line_prev},
2641                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2642          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2643          ## Reconsume.          ## Reconsume.
2644          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2645                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2646                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2647                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2648                                   };                                   };
2649          redo A;          redo A;
2650        }        }
# Line 2276  sub _get_next_token ($) { Line 2665  sub _get_next_token ($) {
2665        
2666          redo A;          redo A;
2667        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2668          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2669          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2670          $self->{s_kwd} = '';            
2671              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2672            } else {
2673              
2674              $self->{state} = DATA_STATE;
2675              $self->{s_kwd} = '';
2676            }
2677                    
2678      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2679        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2296  sub _get_next_token ($) { Line 2690  sub _get_next_token ($) {
2690    
2691          redo A;          redo A;
2692        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2693          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2694          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2695          $self->{s_kwd} = '';            
2696              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2697            } else {
2698              
2699              $self->{state} = DATA_STATE;
2700              $self->{s_kwd} = '';
2701            }
2702          ## reconsume          ## reconsume
2703    
2704          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2340  sub _get_next_token ($) { Line 2739  sub _get_next_token ($) {
2739        
2740          redo A;          redo A;
2741        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2742          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2743          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2744          $self->{s_kwd} = '';            
2745              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2746            } else {
2747              
2748              $self->{state} = DATA_STATE;
2749              $self->{s_kwd} = '';
2750            }
2751                    
2752      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2753        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2360  sub _get_next_token ($) { Line 2764  sub _get_next_token ($) {
2764    
2765          redo A;          redo A;
2766        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2767          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2768          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2769          $self->{s_kwd} = '';            
2770              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2771            } else {
2772              
2773              $self->{state} = DATA_STATE;
2774              $self->{s_kwd} = '';
2775            }
2776          ## reconsume          ## reconsume
2777    
2778          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2388  sub _get_next_token ($) { Line 2797  sub _get_next_token ($) {
2797          redo A;          redo A;
2798        }        }
2799      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2800          ## XML5: "Comment state" and "DOCTYPE comment state".
2801    
2802        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2803                    
2804          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2404  sub _get_next_token ($) { Line 2815  sub _get_next_token ($) {
2815        
2816          redo A;          redo A;
2817        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2818          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2819          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2820          $self->{s_kwd} = '';            
2821              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2822            } else {
2823              
2824              $self->{state} = DATA_STATE;
2825              $self->{s_kwd} = '';
2826            }
2827          ## reconsume          ## reconsume
2828    
2829          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2435  sub _get_next_token ($) { Line 2851  sub _get_next_token ($) {
2851          redo A;          redo A;
2852        }        }
2853      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2854          ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2855    
2856        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2857                    
2858          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2451  sub _get_next_token ($) { Line 2869  sub _get_next_token ($) {
2869        
2870          redo A;          redo A;
2871        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2872          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2873          $self->{s_kwd} = '';          if ($self->{in_subset}) {
2874          $self->{state} = DATA_STATE;            
2875          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2876            } else {
2877              
2878              $self->{state} = DATA_STATE;
2879              $self->{s_kwd} = '';
2880            }
2881          ## reconsume          ## reconsume
2882    
2883          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2479  sub _get_next_token ($) { Line 2901  sub _get_next_token ($) {
2901          redo A;          redo A;
2902        }        }
2903      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2904          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2905    
2906        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2907                    if ($self->{in_subset}) {
2908          $self->{state} = DATA_STATE;            
2909          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2910            } else {
2911              
2912              $self->{state} = DATA_STATE;
2913              $self->{s_kwd} = '';
2914            }
2915                    
2916      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2917        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2500  sub _get_next_token ($) { Line 2929  sub _get_next_token ($) {
2929          redo A;          redo A;
2930        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2931                    
2932            ## XML5: Not a parse error.
2933          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2934                          line => $self->{line_prev},                          line => $self->{line_prev},
2935                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2518  sub _get_next_token ($) { Line 2948  sub _get_next_token ($) {
2948        
2949          redo A;          redo A;
2950        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2951          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2952          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2953          $self->{s_kwd} = '';            
2954              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2955            } else {
2956              
2957              $self->{state} = DATA_STATE;
2958              $self->{s_kwd} = '';
2959            }
2960          ## reconsume          ## reconsume
2961    
2962          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2529  sub _get_next_token ($) { Line 2964  sub _get_next_token ($) {
2964          redo A;          redo A;
2965        } else {        } else {
2966                    
2967            ## XML5: Not a parse error.
2968          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2969                          line => $self->{line_prev},                          line => $self->{line_prev},
2970                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2565  sub _get_next_token ($) { Line 3001  sub _get_next_token ($) {
3001          redo A;          redo A;
3002        } else {        } else {
3003                    
3004            ## XML5: Unless EOF, swith to the bogus comment state.
3005          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3006          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3007          ## reconsume          ## reconsume
3008          redo A;          redo A;
3009        }        }
3010      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
3011          ## XML5: "DOCTYPE root name before state".
3012    
3013        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3014                    
3015          ## Stay in the state          ## Stay in the state
# Line 2588  sub _get_next_token ($) { Line 3027  sub _get_next_token ($) {
3027          redo A;          redo A;
3028        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3029                    
3030            ## XML5: No parse error.
3031          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3032          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3033          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 2616  sub _get_next_token ($) { Line 3056  sub _get_next_token ($) {
3056          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3057    
3058          redo A;          redo A;
3059          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3060            
3061            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3062            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3063            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3064            $self->{in_subset} = 1;
3065            
3066        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3067          $self->{line_prev} = $self->{line};
3068          $self->{column_prev} = $self->{column};
3069          $self->{column}++;
3070          $self->{nc}
3071              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3072        } else {
3073          $self->{set_nc}->($self);
3074        }
3075      
3076            return  ($self->{ct}); # DOCTYPE
3077            redo A;
3078        } else {        } else {
3079                    
3080          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2635  sub _get_next_token ($) { Line 3094  sub _get_next_token ($) {
3094          redo A;          redo A;
3095        }        }
3096      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3097  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3098    
3099          ## ISSUE: Redundant "First," in the spec.
3100    
3101        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3102                    
3103          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2681  sub _get_next_token ($) { Line 3143  sub _get_next_token ($) {
3143          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3144    
3145          redo A;          redo A;
3146          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3147            
3148            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3149            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3150            $self->{in_subset} = 1;
3151            
3152        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3153          $self->{line_prev} = $self->{line};
3154          $self->{column_prev} = $self->{column};
3155          $self->{column}++;
3156          $self->{nc}
3157              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3158        } else {
3159          $self->{set_nc}->($self);
3160        }
3161      
3162            return  ($self->{ct}); # DOCTYPE
3163            redo A;
3164        } else {        } else {
3165                    
3166          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2700  sub _get_next_token ($) { Line 3180  sub _get_next_token ($) {
3180          redo A;          redo A;
3181        }        }
3182      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3183          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3184          ## state", but implemented differently.
3185    
3186        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3187                    
3188          ## Stay in the state          ## Stay in the state
# Line 2716  sub _get_next_token ($) { Line 3199  sub _get_next_token ($) {
3199        
3200          redo A;          redo A;
3201        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3202            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3203              
3204              $self->{state} = DATA_STATE;
3205              $self->{s_kwd} = '';
3206            } else {
3207              
3208              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3209              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3210            }
3211                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3212                    
3213      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3214        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2730  sub _get_next_token ($) { Line 3220  sub _get_next_token ($) {
3220        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3221      }      }
3222        
3223            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3224          redo A;          redo A;
3225        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3226            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3227              
3228              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3229              $self->{state} = DATA_STATE;
3230              $self->{s_kwd} = '';
3231              $self->{ct}->{quirks} = 1;
3232            } else {
3233              
3234              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3235              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3236            }
3237                    
3238          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3239          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3240          redo A;          redo A;
3241        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3242                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3243            
3244          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3245          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3246                    
3247      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3248        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2763  sub _get_next_token ($) { Line 3257  sub _get_next_token ($) {
3257          redo A;          redo A;
3258        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3259                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3260            
3261          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3262          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3263                    
3264      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3265        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2777  sub _get_next_token ($) { Line 3272  sub _get_next_token ($) {
3272      }      }
3273        
3274          redo A;          redo A;
3275        } else {        } elsif ($self->{nc} == 0x0022 and # "
3276                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3277                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3278                    
3279          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3280          $self->{ct}->{quirks} = 1;          $self->{ct}->{value} = ''; # ENTITY
3281            
3282        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3283          $self->{line_prev} = $self->{line};
3284          $self->{column_prev} = $self->{column};
3285          $self->{column}++;
3286          $self->{nc}
3287              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3288        } else {
3289          $self->{set_nc}->($self);
3290        }
3291      
3292            redo A;
3293          } elsif ($self->{nc} == 0x0027 and # '
3294                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3295                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3296            
3297            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3298            $self->{ct}->{value} = ''; # ENTITY
3299            
3300        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3301          $self->{line_prev} = $self->{line};
3302          $self->{column_prev} = $self->{column};
3303          $self->{column}++;
3304          $self->{nc}
3305              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3306        } else {
3307          $self->{set_nc}->($self);
3308        }
3309      
3310            redo A;
3311          } elsif ($self->{is_xml} and
3312                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3313                   $self->{nc} == 0x005B) { # [
3314            
3315            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3316            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3317            $self->{in_subset} = 1;
3318            
3319        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3320          $self->{line_prev} = $self->{line};
3321          $self->{column_prev} = $self->{column};
3322          $self->{column}++;
3323          $self->{nc}
3324              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3325        } else {
3326          $self->{set_nc}->($self);
3327        }
3328      
3329            return  ($self->{ct}); # DOCTYPE
3330            redo A;
3331          } else {
3332            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3333    
3334            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3335              
3336              $self->{ct}->{quirks} = 1;
3337              $self->{state} = BOGUS_DOCTYPE_STATE;
3338            } else {
3339              
3340              $self->{state} = BOGUS_MD_STATE;
3341            }
3342    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3343                    
3344      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3345        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2804  sub _get_next_token ($) { Line 3361  sub _get_next_token ($) {
3361              0x0042, # B              0x0042, # B
3362              0x004C, # L              0x004C, # L
3363              0x0049, # I              0x0049, # I
3364            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3365            $self->{nc} == [            $self->{nc} == [
3366              undef,              undef,
3367              0x0075, # u              0x0075, # u
3368              0x0062, # b              0x0062, # b
3369              0x006C, # l              0x006C, # l
3370              0x0069, # i              0x0069, # i
3371            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3372                    
3373          ## Stay in the state.          ## Stay in the state.
3374          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3375                    
3376      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3377        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2827  sub _get_next_token ($) { Line 3384  sub _get_next_token ($) {
3384      }      }
3385        
3386          redo A;          redo A;
3387        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3388                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3389                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3390                    if ($self->{is_xml} and
3391                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3392              
3393              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3394                              text => 'PUBLIC',
3395                              line => $self->{line_prev},
3396                              column => $self->{column_prev} - 4);
3397            } else {
3398              
3399            }
3400          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3401                    
3402      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2845  sub _get_next_token ($) { Line 3411  sub _get_next_token ($) {
3411        
3412          redo A;          redo A;
3413        } else {        } else {
3414                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3415                          line => $self->{line_prev},                          line => $self->{line_prev},
3416                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3417          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3418              
3419          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3420              $self->{state} = BOGUS_DOCTYPE_STATE;
3421            } else {
3422              
3423              $self->{state} = BOGUS_MD_STATE;
3424            }
3425          ## Reconsume.          ## Reconsume.
3426          redo A;          redo A;
3427        }        }
# Line 2863  sub _get_next_token ($) { Line 3433  sub _get_next_token ($) {
3433              0x0053, # S              0x0053, # S
3434              0x0054, # T              0x0054, # T
3435              0x0045, # E              0x0045, # E
3436            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3437            $self->{nc} == [            $self->{nc} == [
3438              undef,              undef,
3439              0x0079, # y              0x0079, # y
3440              0x0073, # s              0x0073, # s
3441              0x0074, # t              0x0074, # t
3442              0x0065, # e              0x0065, # e
3443            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3444                    
3445          ## Stay in the state.          ## Stay in the state.
3446          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3447                    
3448      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3449        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2886  sub _get_next_token ($) { Line 3456  sub _get_next_token ($) {
3456      }      }
3457        
3458          redo A;          redo A;
3459        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3460                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3461                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3462                    if ($self->{is_xml} and
3463                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3464              
3465              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3466                              text => 'SYSTEM',
3467                              line => $self->{line_prev},
3468                              column => $self->{column_prev} - 4);
3469            } else {
3470              
3471            }
3472          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3473                    
3474      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2904  sub _get_next_token ($) { Line 3483  sub _get_next_token ($) {
3483        
3484          redo A;          redo A;
3485        } else {        } else {
3486                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3487                          line => $self->{line_prev},                          line => $self->{line_prev},
3488                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3489          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3490              
3491          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3492              $self->{state} = BOGUS_DOCTYPE_STATE;
3493            } else {
3494              
3495              $self->{state} = BOGUS_MD_STATE;
3496            }
3497          ## Reconsume.          ## Reconsume.
3498          redo A;          redo A;
3499        }        }
# Line 2963  sub _get_next_token ($) { Line 3546  sub _get_next_token ($) {
3546        
3547          redo A;          redo A;
3548        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3549          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3550            
3551          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3552          $self->{s_kwd} = '';            
3553              $self->{state} = DATA_STATE;
3554              $self->{s_kwd} = '';
3555              $self->{ct}->{quirks} = 1;
3556            } else {
3557              
3558              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3559            }
3560            
3561                    
3562      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3563        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2979  sub _get_next_token ($) { Line 3569  sub _get_next_token ($) {
3569        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3570      }      }
3571        
3572            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3573          redo A;          redo A;
3574        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3575            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3576              
3577              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3578              $self->{state} = DATA_STATE;
3579              $self->{s_kwd} = '';
3580              $self->{ct}->{quirks} = 1;
3581            } else {
3582              
3583              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3584              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3585            }
3586                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3587          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3588          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3589          redo A;          redo A;
3590        } else {        } elsif ($self->{is_xml} and
3591                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3592                   $self->{nc} == 0x005B) { # [
3593                    
3594            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3595            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3596            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3597            $self->{in_subset} = 1;
3598            
3599        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3600          $self->{line_prev} = $self->{line};
3601          $self->{column_prev} = $self->{column};
3602          $self->{column}++;
3603          $self->{nc}
3604              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3605        } else {
3606          $self->{set_nc}->($self);
3607        }
3608      
3609            return  ($self->{ct}); # DOCTYPE
3610            redo A;
3611          } else {
3612          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3613    
3614          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3615              
3616              $self->{ct}->{quirks} = 1;
3617              $self->{state} = BOGUS_DOCTYPE_STATE;
3618            } else {
3619              
3620              $self->{state} = BOGUS_MD_STATE;
3621            }
3622    
3623                    
3624      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3625        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3032  sub _get_next_token ($) { Line 3650  sub _get_next_token ($) {
3650        
3651          redo A;          redo A;
3652        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3653          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3654    
3655          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3656          $self->{s_kwd} = '';            
3657              $self->{state} = DATA_STATE;
3658              $self->{s_kwd} = '';
3659              $self->{ct}->{quirks} = 1;
3660            } else {
3661              
3662              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3663            }
3664    
3665                    
3666      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3667        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3048  sub _get_next_token ($) { Line 3673  sub _get_next_token ($) {
3673        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3674      }      }
3675        
3676            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3677          redo A;          redo A;
3678        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3679          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3680    
3681          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3682          $self->{s_kwd} = '';            
3683          ## reconsume            $self->{state} = DATA_STATE;
3684              $self->{s_kwd} = '';
3685          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3686            } else {
3687              
3688              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3689            }
3690            
3691            ## Reconsume.
3692          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3693          redo A;          redo A;
3694        } else {        } else {
3695                    
3696          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3697          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3698                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3699    
# Line 3103  sub _get_next_token ($) { Line 3728  sub _get_next_token ($) {
3728        
3729          redo A;          redo A;
3730        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3731          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3732    
3733          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3734          $self->{s_kwd} = '';            
3735              $self->{state} = DATA_STATE;
3736              $self->{s_kwd} = '';
3737              $self->{ct}->{quirks} = 1;
3738            } else {
3739              
3740              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3741            }
3742    
3743                    
3744      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3745        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3119  sub _get_next_token ($) { Line 3751  sub _get_next_token ($) {
3751        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3752      }      }
3753        
3754            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3755          redo A;          redo A;
3756        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3757          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3758    
3759          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3760          $self->{s_kwd} = '';            
3761              $self->{state} = DATA_STATE;
3762              $self->{s_kwd} = '';
3763              $self->{ct}->{quirks} = 1;
3764            } else {
3765              
3766              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3767            }
3768          
3769          ## reconsume          ## reconsume
3770            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3771          redo A;          redo A;
3772        } else {        } else {
3773                    
3774          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3775          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3776                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3777    
# Line 3175  sub _get_next_token ($) { Line 3807  sub _get_next_token ($) {
3807          redo A;          redo A;
3808        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3809                    
3810          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3811          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3812                    
3813      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3191  sub _get_next_token ($) { Line 3823  sub _get_next_token ($) {
3823          redo A;          redo A;
3824        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3825                    
3826          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3827          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3828                    
3829      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3206  sub _get_next_token ($) { Line 3838  sub _get_next_token ($) {
3838        
3839          redo A;          redo A;
3840        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3841            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3842              if ($self->{is_xml}) {
3843                
3844                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3845              } else {
3846                
3847              }
3848              $self->{state} = DATA_STATE;
3849              $self->{s_kwd} = '';
3850            } else {
3851              if ($self->{ct}->{type} == NOTATION_TOKEN) {
3852                
3853              } else {
3854                
3855                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3856              }
3857              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3858            }
3859                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3860                    
3861      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3862        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3220  sub _get_next_token ($) { Line 3868  sub _get_next_token ($) {
3868        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3869      }      }
3870        
3871            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3872          redo A;          redo A;
3873        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3874            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3875              
3876              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3877              
3878              $self->{state} = DATA_STATE;
3879              $self->{s_kwd} = '';
3880              $self->{ct}->{quirks} = 1;
3881            } else {
3882              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3883              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3884            }
3885                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3886          ## reconsume          ## reconsume
3887            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3888          $self->{ct}->{quirks} = 1;          redo A;
3889          } elsif ($self->{is_xml} and
3890                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3891                   $self->{nc} == 0x005B) { # [
3892            
3893            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3894            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3895            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3896            $self->{in_subset} = 1;
3897            
3898        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3899          $self->{line_prev} = $self->{line};
3900          $self->{column_prev} = $self->{column};
3901          $self->{column}++;
3902          $self->{nc}
3903              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3904        } else {
3905          $self->{set_nc}->($self);
3906        }
3907      
3908          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3909          redo A;          redo A;
3910        } else {        } else {
           
3911          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3912    
3913          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3914              
3915              $self->{ct}->{quirks} = 1;
3916              $self->{state} = BOGUS_DOCTYPE_STATE;
3917            } else {
3918              
3919              $self->{state} = BOGUS_MD_STATE;
3920            }
3921    
3922                    
3923      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3924        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3304  sub _get_next_token ($) { Line 3981  sub _get_next_token ($) {
3981        
3982          redo A;          redo A;
3983        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3984          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3985                    
3986      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3987        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3320  sub _get_next_token ($) { Line 3994  sub _get_next_token ($) {
3994      }      }
3995        
3996    
3997          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3998          return  ($self->{ct}); # DOCTYPE            
3999              $self->{state} = DATA_STATE;
4000              $self->{s_kwd} = '';
4001              $self->{ct}->{quirks} = 1;
4002            } else {
4003              
4004              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4005            }
4006    
4007            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4008          redo A;          redo A;
4009        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4010            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4011              
4012              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4013              $self->{state} = DATA_STATE;
4014              $self->{s_kwd} = '';
4015              $self->{ct}->{quirks} = 1;
4016            } else {
4017              
4018              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4019              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4020            }
4021                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4022          ## reconsume          ## reconsume
4023            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4024            redo A;
4025          } elsif ($self->{is_xml} and
4026                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4027                   $self->{nc} == 0x005B) { # [
4028            
4029            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4030    
4031          $self->{ct}->{quirks} = 1;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4032            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4033            $self->{in_subset} = 1;
4034            
4035        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4036          $self->{line_prev} = $self->{line};
4037          $self->{column_prev} = $self->{column};
4038          $self->{column}++;
4039          $self->{nc}
4040              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4041        } else {
4042          $self->{set_nc}->($self);
4043        }
4044      
4045          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4046          redo A;          redo A;
4047        } else {        } else {
           
4048          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4049    
4050          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4051                        
4052              $self->{ct}->{quirks} = 1;
4053              $self->{state} = BOGUS_DOCTYPE_STATE;
4054            } else {
4055              
4056              $self->{state} = BOGUS_MD_STATE;
4057            }
4058    
4059                    
4060      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4061        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3371  sub _get_next_token ($) { Line 4085  sub _get_next_token ($) {
4085      }      }
4086        
4087          redo A;          redo A;
4088        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4089          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4090    
4091          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4092          $self->{s_kwd} = '';            
4093              $self->{state} = DATA_STATE;
4094              $self->{s_kwd} = '';
4095              $self->{ct}->{quirks} = 1;
4096            } else {
4097              
4098              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4099            }
4100            
4101                    
4102      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4103        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3388  sub _get_next_token ($) { Line 4109  sub _get_next_token ($) {
4109        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4110      }      }
4111        
4112            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4113          redo A;          redo A;
4114        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4115          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4116    
4117          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4118          $self->{s_kwd} = '';            
4119              $self->{state} = DATA_STATE;
4120              $self->{s_kwd} = '';
4121              $self->{ct}->{quirks} = 1;
4122            } else {
4123              
4124              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4125            }
4126            
4127          ## reconsume          ## reconsume
4128            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4129          redo A;          redo A;
4130        } else {        } else {
4131                    
4132          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4133          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4134                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4135    
# Line 3442  sub _get_next_token ($) { Line 4163  sub _get_next_token ($) {
4163      }      }
4164        
4165          redo A;          redo A;
4166        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4167                    
4168          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4169    
# Line 3465  sub _get_next_token ($) { Line 4186  sub _get_next_token ($) {
4186    
4187          redo A;          redo A;
4188        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4189          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4190    
4191          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4192          $self->{s_kwd} = '';            
4193          ## reconsume            $self->{state} = DATA_STATE;
4194              $self->{s_kwd} = '';
4195          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4196          return  ($self->{ct}); # DOCTYPE          } else {
4197              
4198              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4199            }
4200    
4201            ## reconsume
4202            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4203          redo A;          redo A;
4204        } else {        } else {
4205                    
4206          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4207          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4208                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4209    
# Line 3499  sub _get_next_token ($) { Line 4223  sub _get_next_token ($) {
4223        }        }
4224      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4225        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4226                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4227          ## Stay in the state            
4228              $self->{state} = BEFORE_NDATA_STATE;
4229            } else {
4230              
4231              ## Stay in the state
4232            }
4233                    
4234      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4235        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3514  sub _get_next_token ($) { Line 4243  sub _get_next_token ($) {
4243        
4244          redo A;          redo A;
4245        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4246            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4247              
4248              $self->{state} = DATA_STATE;
4249              $self->{s_kwd} = '';
4250            } else {
4251              
4252              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4253            }
4254    
4255                    
4256          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4257          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4258          $self->{column_prev} = $self->{column};
4259          $self->{column}++;
4260          $self->{nc}
4261              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4262        } else {
4263          $self->{set_nc}->($self);
4264        }
4265      
4266            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4267            redo A;
4268          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4269                   ($self->{nc} == 0x004E or # N
4270                    $self->{nc} == 0x006E)) { # n
4271            
4272            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4273            $self->{state} = NDATA_STATE;
4274            $self->{kwd} = chr $self->{nc};
4275                    
4276      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4277        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3528  sub _get_next_token ($) { Line 4283  sub _get_next_token ($) {
4283        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4284      }      }
4285        
4286            redo A;
4287          } elsif ($self->{nc} == -1) {
4288            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4289              
4290              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4291              $self->{state} = DATA_STATE;
4292              $self->{s_kwd} = '';
4293              $self->{ct}->{quirks} = 1;
4294            } else {
4295              
4296              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4297              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4298            }
4299    
4300            ## reconsume
4301            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4302            redo A;
4303          } elsif ($self->{is_xml} and
4304                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4305                   $self->{nc} == 0x005B) { # [
4306            
4307            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4308            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4309            $self->{in_subset} = 1;
4310            
4311        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4312          $self->{line_prev} = $self->{line};
4313          $self->{column_prev} = $self->{column};
4314          $self->{column}++;
4315          $self->{nc}
4316              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4317        } else {
4318          $self->{set_nc}->($self);
4319        }
4320      
4321          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4322            redo A;
4323          } else {
4324            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4325    
4326            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4327              
4328              #$self->{ct}->{quirks} = 1;
4329              $self->{state} = BOGUS_DOCTYPE_STATE;
4330            } else {
4331              
4332              $self->{state} = BOGUS_MD_STATE;
4333            }
4334    
4335            
4336        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4337          $self->{line_prev} = $self->{line};
4338          $self->{column_prev} = $self->{column};
4339          $self->{column}++;
4340          $self->{nc}
4341              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4342        } else {
4343          $self->{set_nc}->($self);
4344        }
4345      
4346            redo A;
4347          }
4348        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4349          if ($is_space->{$self->{nc}}) {
4350            
4351            ## Stay in the state.
4352            
4353        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4354          $self->{line_prev} = $self->{line};
4355          $self->{column_prev} = $self->{column};
4356          $self->{column}++;
4357          $self->{nc}
4358              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4359        } else {
4360          $self->{set_nc}->($self);
4361        }
4362      
4363            redo A;
4364          } elsif ($self->{nc} == 0x003E) { # >
4365            
4366            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4367            
4368        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4369          $self->{line_prev} = $self->{line};
4370          $self->{column_prev} = $self->{column};
4371          $self->{column}++;
4372          $self->{nc}
4373              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4374        } else {
4375          $self->{set_nc}->($self);
4376        }
4377      
4378            return  ($self->{ct}); # ENTITY
4379            redo A;
4380          } elsif ($self->{nc} == 0x004E or # N
4381                   $self->{nc} == 0x006E) { # n
4382            
4383            $self->{state} = NDATA_STATE;
4384            $self->{kwd} = chr $self->{nc};
4385            
4386        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4387          $self->{line_prev} = $self->{line};
4388          $self->{column_prev} = $self->{column};
4389          $self->{column}++;
4390          $self->{nc}
4391              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4392        } else {
4393          $self->{set_nc}->($self);
4394        }
4395      
4396          redo A;          redo A;
4397        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4398                    
4399          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4400          $self->{state} = DATA_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
         $self->{s_kwd} = '';  
4401          ## reconsume          ## reconsume
4402            return  ($self->{ct}); # ENTITY
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4403          redo A;          redo A;
4404        } else {        } else {
4405                    
4406          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4407          #$self->{ct}->{quirks} = 1;          $self->{state} = BOGUS_MD_STATE;
   
         $self->{state} = BOGUS_DOCTYPE_STATE;  
4408                    
4409      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4410        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3582  sub _get_next_token ($) { Line 4438  sub _get_next_token ($) {
4438          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4439    
4440          redo A;          redo A;
4441          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4442            
4443            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4444            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4445            $self->{in_subset} = 1;
4446            
4447        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4448          $self->{line_prev} = $self->{line};
4449          $self->{column_prev} = $self->{column};
4450          $self->{column}++;
4451          $self->{nc}
4452              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4453        } else {
4454          $self->{set_nc}->($self);
4455        }
4456      
4457            return  ($self->{ct}); # DOCTYPE
4458            redo A;
4459        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4460                    
4461          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3594  sub _get_next_token ($) { Line 4468  sub _get_next_token ($) {
4468        } else {        } else {
4469                    
4470          my $s = '';          my $s = '';
4471          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4472    
4473          ## Stay in the state          ## Stay in the state
4474                    
# Line 3614  sub _get_next_token ($) { Line 4488  sub _get_next_token ($) {
4488        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
4489        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
4490        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
4491    
4492          ## XML5: "CDATA state".
4493                
4494        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4495                    
# Line 3631  sub _get_next_token ($) { Line 4507  sub _get_next_token ($) {
4507        
4508          redo A;          redo A;
4509        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4510            if ($self->{is_xml}) {
4511              
4512              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
4513            } else {
4514              
4515            }
4516    
4517          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4518          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4519                    ## Reconsume.
     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {  
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
4520          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
4521                        
4522            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3676  sub _get_next_token ($) { Line 4549  sub _get_next_token ($) {
4549    
4550        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
4551      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
4552          ## XML5: "CDATA bracket state".
4553    
4554        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4555                    
4556          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3693  sub _get_next_token ($) { Line 4568  sub _get_next_token ($) {
4568          redo A;          redo A;
4569        } else {        } else {
4570                    
4571            ## XML5: If EOF, "]" is not appended and changed to the data state.
4572          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
4573          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
4574          ## Reconsume.          ## Reconsume.
4575          redo A;          redo A;
4576        }        }
4577      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
4578          ## XML5: "CDATA end state".
4579    
4580        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4581          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4582          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 3741  sub _get_next_token ($) { Line 4619  sub _get_next_token ($) {
4619                    
4620          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
4621          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
4622          ## Reconsume.          ## Reconsume. ## XML5: Emit.
4623          redo A;          redo A;
4624        }        }
4625      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3750  sub _get_next_token ($) { Line 4628  sub _get_next_token ($) {
4628              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4629              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4630            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4631                    if ($self->{is_xml}) {
4632              
4633              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4634                              line => $self->{line_prev},
4635                              column => $self->{column_prev}
4636                                  + ($self->{nc} == -1 ? 1 : 0));
4637            } else {
4638              
4639              ## No error
4640            }
4641          ## Don't consume          ## Don't consume
         ## No error  
4642          ## Return nothing.          ## Return nothing.
4643          #          #
4644        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4645                    
4646          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4647          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4648                    
4649      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4650        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3771  sub _get_next_token ($) { Line 4657  sub _get_next_token ($) {
4657      }      }
4658        
4659          redo A;          redo A;
4660        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4661                   (0x0041 <= $self->{nc} and
4662                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4663                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4664                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
4665                    
4666          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4667          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4668          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4669          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4670          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4671                    
4672      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3825  sub _get_next_token ($) { Line 4712  sub _get_next_token ($) {
4712          redo A;          redo A;
4713        }        }
4714      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4715        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
           $self->{nc} == 0x0058) { # X  
4716                    
4717          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4718          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4719            
4720        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4721          $self->{line_prev} = $self->{line};
4722          $self->{column_prev} = $self->{column};
4723          $self->{column}++;
4724          $self->{nc}
4725              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4726        } else {
4727          $self->{set_nc}->($self);
4728        }
4729      
4730            redo A;
4731          } elsif ($self->{nc} == 0x0058) { # X
4732            
4733            if ($self->{is_xml}) {
4734              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4735            }
4736            $self->{state} = HEXREF_X_STATE;
4737            $self->{kwd} .= chr $self->{nc};
4738                    
4739      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4740        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3846  sub _get_next_token ($) { Line 4751  sub _get_next_token ($) {
4751                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4752                    
4753          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4754          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4755                    
4756      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4757        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3892  sub _get_next_token ($) { Line 4797  sub _get_next_token ($) {
4797        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4798            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4799                    
4800          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4801          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4802                    
4803          ## Stay in the state.          ## Stay in the state.
4804                    
# Line 3929  sub _get_next_token ($) { Line 4834  sub _get_next_token ($) {
4834          #          #
4835        }        }
4836    
4837        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4838        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4839        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4840        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 3952  sub _get_next_token ($) { Line 4857  sub _get_next_token ($) {
4857          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4858          ## Reconsume.          ## Reconsume.
4859          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4860                      has_reference => 1,
4861                    line => $l, column => $c,                    line => $l, column => $c,
4862                   });                   });
4863          redo A;          redo A;
# Line 3971  sub _get_next_token ($) { Line 4877  sub _get_next_token ($) {
4877          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4878                    
4879          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4880          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4881          ## Reconsume.          ## Reconsume.
4882          redo A;          redo A;
4883        } else {        } else {
# Line 3989  sub _get_next_token ($) { Line 4895  sub _get_next_token ($) {
4895            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4896            ## Reconsume.            ## Reconsume.
4897            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4898                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4899                      line => $self->{line_prev},                      line => $self->{line_prev},
4900                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4901                     });                     });
4902            redo A;            redo A;
4903          } else {          } else {
4904                        
4905            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4906            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4907            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4908            ## Reconsume.            ## Reconsume.
# Line 4007  sub _get_next_token ($) { Line 4913  sub _get_next_token ($) {
4913        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4914          # 0..9          # 0..9
4915                    
4916          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4917          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4918          ## Stay in the state.          ## Stay in the state.
4919                    
4920      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4025  sub _get_next_token ($) { Line 4931  sub _get_next_token ($) {
4931        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4932                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4933                    
4934          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4935          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4936          ## Stay in the state.          ## Stay in the state.
4937                    
4938      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4043  sub _get_next_token ($) { Line 4949  sub _get_next_token ($) {
4949        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4950                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4951                    
4952          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4953          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4954          ## Stay in the state.          ## Stay in the state.
4955                    
4956      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4081  sub _get_next_token ($) { Line 4987  sub _get_next_token ($) {
4987          #          #
4988        }        }
4989    
4990        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4991        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4992        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4993        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4104  sub _get_next_token ($) { Line 5010  sub _get_next_token ($) {
5010          $self->{s_kwd} = '';          $self->{s_kwd} = '';
5011          ## Reconsume.          ## Reconsume.
5012          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
5013                      has_reference => 1,
5014                    line => $l, column => $c,                    line => $l, column => $c,
5015                   });                   });
5016          redo A;          redo A;
# Line 4117  sub _get_next_token ($) { Line 5024  sub _get_next_token ($) {
5024          redo A;          redo A;
5025        }        }
5026      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5027        if (length $self->{s_kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5028            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5029            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5030              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5031             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5032              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5033             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5034              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5035             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5036                    {
5037                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5038                      $self->{entity_add} => 1,
5039                    }->{$self->{nc}}))) {
5040          our $EntityChar;          our $EntityChar;
5041          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5042          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5043                $self->{ge}->{$self->{kwd}}) {
5044            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5045                            if (defined $self->{ge}->{$self->{kwd}}) {
5046              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5047                    
5048                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5049                  } else {
5050                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5051                      
5052                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5053                                      value => $self->{kwd});
5054                    } else {
5055                      
5056                    }
5057                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5058                  }
5059                } else {
5060                  if ($self->{is_xml}) {
5061                    
5062                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5063                                    value => $self->{kwd},
5064                                    level => {
5065                                              'amp;' => $self->{level}->{warn},
5066                                              'quot;' => $self->{level}->{warn},
5067                                              'lt;' => $self->{level}->{warn},
5068                                              'gt;' => $self->{level}->{warn},
5069                                              'apos;' => $self->{level}->{warn},
5070                                             }->{$self->{kwd}} ||
5071                                             $self->{level}->{must});
5072                  } else {
5073                    
5074                  }
5075                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5076                }
5077              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5078                            
5079      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4147  sub _get_next_token ($) { Line 5089  sub _get_next_token ($) {
5089              #              #
5090            } else {            } else {
5091                            
5092              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
5093              $self->{entity__match} = -1;              $self->{entity__match} = -1;
5094              ## Stay in the state.              ## Stay in the state.
5095                            
# Line 4195  sub _get_next_token ($) { Line 5137  sub _get_next_token ($) {
5137          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
5138              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
5139                        
5140            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
5141            #            #
5142          } else {          } else {
5143                        
# Line 4207  sub _get_next_token ($) { Line 5149  sub _get_next_token ($) {
5149                    
5150          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
5151                          line => $self->{line_prev},                          line => $self->{line_prev},
5152                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
5153          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
5154          #          #
5155        }        }
5156        
# Line 4229  sub _get_next_token ($) { Line 5171  sub _get_next_token ($) {
5171          ## Reconsume.          ## Reconsume.
5172          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
5173                    data => $data,                    data => $data,
5174                      has_reference => $has_ref,
5175                    line => $self->{line_prev},                    line => $self->{line_prev},
5176                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
5177                   });                   });
5178          redo A;          redo A;
5179        } else {        } else {
# Line 4242  sub _get_next_token ($) { Line 5185  sub _get_next_token ($) {
5185          ## Reconsume.          ## Reconsume.
5186          redo A;          redo A;
5187        }        }
5188    
5189        ## XML-only states
5190    
5191        } elsif ($self->{state} == PI_STATE) {
5192          ## XML5: "Pi state" and "DOCTYPE pi state".
5193    
5194          if ($is_space->{$self->{nc}} or
5195              $self->{nc} == 0x003F or # ?
5196              $self->{nc} == -1) {
5197            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5198            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5199            ## "DOCTYPE pi state": Parse error, switch to the "data
5200            ## state".
5201            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5202                            line => $self->{line_prev},
5203                            column => $self->{column_prev}
5204                                - 1 * ($self->{nc} != -1));
5205            $self->{state} = BOGUS_COMMENT_STATE;
5206            ## Reconsume.
5207            $self->{ct} = {type => COMMENT_TOKEN,
5208                           data => '?',
5209                           line => $self->{line_prev},
5210                           column => $self->{column_prev}
5211                               - 1 * ($self->{nc} != -1),
5212                          };
5213            redo A;
5214          } else {
5215            ## XML5: "DOCTYPE pi state": Stay in the state.
5216            $self->{ct} = {type => PI_TOKEN,
5217                           target => chr $self->{nc},
5218                           data => '',
5219                           line => $self->{line_prev},
5220                           column => $self->{column_prev} - 1,
5221                          };
5222            $self->{state} = PI_TARGET_STATE;
5223            
5224        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5225          $self->{line_prev} = $self->{line};
5226          $self->{column_prev} = $self->{column};
5227          $self->{column}++;
5228          $self->{nc}
5229              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5230        } else {
5231          $self->{set_nc}->($self);
5232        }
5233      
5234            redo A;
5235          }
5236        } elsif ($self->{state} == PI_TARGET_STATE) {
5237          if ($is_space->{$self->{nc}}) {
5238            $self->{state} = PI_TARGET_AFTER_STATE;
5239            
5240        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5241          $self->{line_prev} = $self->{line};
5242          $self->{column_prev} = $self->{column};
5243          $self->{column}++;
5244          $self->{nc}
5245              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5246        } else {
5247          $self->{set_nc}->($self);
5248        }
5249      
5250            redo A;
5251          } elsif ($self->{nc} == -1) {
5252            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5253            if ($self->{in_subset}) {
5254              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5255            } else {
5256              $self->{state} = DATA_STATE;
5257              $self->{s_kwd} = '';
5258            }
5259            ## Reconsume.
5260            return  ($self->{ct}); # pi
5261            redo A;
5262          } elsif ($self->{nc} == 0x003F) { # ?
5263            $self->{state} = PI_AFTER_STATE;
5264            
5265        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5266          $self->{line_prev} = $self->{line};
5267          $self->{column_prev} = $self->{column};
5268          $self->{column}++;
5269          $self->{nc}
5270              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5271        } else {
5272          $self->{set_nc}->($self);
5273        }
5274      
5275            redo A;
5276          } else {
5277            ## XML5: typo ("tag name" -> "target")
5278            $self->{ct}->{target} .= chr $self->{nc}; # pi
5279            
5280        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5281          $self->{line_prev} = $self->{line};
5282          $self->{column_prev} = $self->{column};
5283          $self->{column}++;
5284          $self->{nc}
5285              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5286        } else {
5287          $self->{set_nc}->($self);
5288        }
5289      
5290            redo A;
5291          }
5292        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
5293          if ($is_space->{$self->{nc}}) {
5294            ## Stay in the state.
5295            
5296        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5297          $self->{line_prev} = $self->{line};
5298          $self->{column_prev} = $self->{column};
5299          $self->{column}++;
5300          $self->{nc}
5301              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5302        } else {
5303          $self->{set_nc}->($self);
5304        }
5305      
5306            redo A;
5307          } else {
5308            $self->{state} = PI_DATA_STATE;
5309            ## Reprocess.
5310            redo A;
5311          }
5312        } elsif ($self->{state} == PI_DATA_STATE) {
5313          if ($self->{nc} == 0x003F) { # ?
5314            $self->{state} = PI_DATA_AFTER_STATE;
5315            
5316        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5317          $self->{line_prev} = $self->{line};
5318          $self->{column_prev} = $self->{column};
5319          $self->{column}++;
5320          $self->{nc}
5321              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5322        } else {
5323          $self->{set_nc}->($self);
5324        }
5325      
5326            redo A;
5327          } elsif ($self->{nc} == -1) {
5328            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5329            if ($self->{in_subset}) {
5330              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5331            } else {
5332              $self->{state} = DATA_STATE;
5333              $self->{s_kwd} = '';
5334            }
5335            ## Reprocess.
5336            return  ($self->{ct}); # pi
5337            redo A;
5338          } else {
5339            $self->{ct}->{data} .= chr $self->{nc}; # pi
5340            $self->{read_until}->($self->{ct}->{data}, q[?],
5341                                  length $self->{ct}->{data});
5342            ## Stay in the state.
5343            
5344        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5345          $self->{line_prev} = $self->{line};
5346          $self->{column_prev} = $self->{column};
5347          $self->{column}++;
5348          $self->{nc}
5349              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5350        } else {
5351          $self->{set_nc}->($self);
5352        }
5353      
5354            ## Reprocess.
5355            redo A;
5356          }
5357        } elsif ($self->{state} == PI_AFTER_STATE) {
5358          ## XML5: Part of "Pi after state".
5359    
5360          if ($self->{nc} == 0x003E) { # >
5361            if ($self->{in_subset}) {
5362              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5363            } else {
5364              $self->{state} = DATA_STATE;
5365              $self->{s_kwd} = '';
5366            }
5367            
5368        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5369          $self->{line_prev} = $self->{line};
5370          $self->{column_prev} = $self->{column};
5371          $self->{column}++;
5372          $self->{nc}
5373              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5374        } else {
5375          $self->{set_nc}->($self);
5376        }
5377      
5378            return  ($self->{ct}); # pi
5379            redo A;
5380          } elsif ($self->{nc} == 0x003F) { # ?
5381            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5382                            line => $self->{line_prev},
5383                            column => $self->{column_prev}); ## XML5: no error
5384            $self->{ct}->{data} .= '?';
5385            $self->{state} = PI_DATA_AFTER_STATE;
5386            
5387        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5388          $self->{line_prev} = $self->{line};
5389          $self->{column_prev} = $self->{column};
5390          $self->{column}++;
5391          $self->{nc}
5392              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5393        } else {
5394          $self->{set_nc}->($self);
5395        }
5396      
5397            redo A;
5398          } else {
5399            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
5400                            line => $self->{line_prev},
5401                            column => $self->{column_prev}
5402                                + 1 * ($self->{nc} == -1)); ## XML5: no error
5403            $self->{ct}->{data} .= '?'; ## XML5: not appended
5404            $self->{state} = PI_DATA_STATE;
5405            ## Reprocess.
5406            redo A;
5407          }
5408        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5409          ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5410    
5411          if ($self->{nc} == 0x003E) { # >
5412            if ($self->{in_subset}) {
5413              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5414            } else {
5415              $self->{state} = DATA_STATE;
5416              $self->{s_kwd} = '';
5417            }
5418            
5419        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5420          $self->{line_prev} = $self->{line};
5421          $self->{column_prev} = $self->{column};
5422          $self->{column}++;
5423          $self->{nc}
5424              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5425        } else {
5426          $self->{set_nc}->($self);
5427        }
5428      
5429            return  ($self->{ct}); # pi
5430            redo A;
5431          } elsif ($self->{nc} == 0x003F) { # ?
5432            $self->{ct}->{data} .= '?';
5433            ## Stay in the state.
5434            
5435        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5436          $self->{line_prev} = $self->{line};
5437          $self->{column_prev} = $self->{column};
5438          $self->{column}++;
5439          $self->{nc}
5440              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5441        } else {
5442          $self->{set_nc}->($self);
5443        }
5444      
5445            redo A;
5446          } else {
5447            $self->{ct}->{data} .= '?'; ## XML5: not appended
5448            $self->{state} = PI_DATA_STATE;
5449            ## Reprocess.
5450            redo A;
5451          }
5452    
5453        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5454          if ($self->{nc} == 0x003C) { # <
5455            $self->{state} = DOCTYPE_TAG_STATE;
5456            
5457        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5458          $self->{line_prev} = $self->{line};
5459          $self->{column_prev} = $self->{column};
5460          $self->{column}++;
5461          $self->{nc}
5462              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5463        } else {
5464          $self->{set_nc}->($self);
5465        }
5466      
5467            redo A;
5468          } elsif ($self->{nc} == 0x0025) { # %
5469            ## XML5: Not defined yet.
5470    
5471            ## TODO:
5472    
5473            if (not $self->{stop_processing} and
5474                not $self->{document}->xml_standalone) {
5475              $self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type
5476                              level => $self->{level}->{info});
5477              $self->{stop_processing} = 1;
5478            }
5479    
5480            
5481        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5482          $self->{line_prev} = $self->{line};
5483          $self->{column_prev} = $self->{column};
5484          $self->{column}++;
5485          $self->{nc}
5486              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5487        } else {
5488          $self->{set_nc}->($self);
5489        }
5490      
5491            redo A;
5492          } elsif ($self->{nc} == 0x005D) { # ]
5493            delete $self->{in_subset};
5494            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5495            
5496        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5497          $self->{line_prev} = $self->{line};
5498          $self->{column_prev} = $self->{column};
5499          $self->{column}++;
5500          $self->{nc}
5501              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5502        } else {
5503          $self->{set_nc}->($self);
5504        }
5505      
5506            redo A;
5507          } elsif ($is_space->{$self->{nc}}) {
5508            ## Stay in the state.
5509            
5510        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5511          $self->{line_prev} = $self->{line};
5512          $self->{column_prev} = $self->{column};
5513          $self->{column}++;
5514          $self->{nc}
5515              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5516        } else {
5517          $self->{set_nc}->($self);
5518        }
5519      
5520            redo A;
5521          } elsif ($self->{nc} == -1) {
5522            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5523            delete $self->{in_subset};
5524            $self->{state} = DATA_STATE;
5525            $self->{s_kwd} = '';
5526            ## Reconsume.
5527            return  ({type => END_OF_DOCTYPE_TOKEN});
5528            redo A;
5529          } else {
5530            unless ($self->{internal_subset_tainted}) {
5531              ## XML5: No parse error.
5532              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5533              $self->{internal_subset_tainted} = 1;
5534            }
5535            ## Stay in the state.
5536            
5537        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5538          $self->{line_prev} = $self->{line};
5539          $self->{column_prev} = $self->{column};
5540          $self->{column}++;
5541          $self->{nc}
5542              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5543        } else {
5544          $self->{set_nc}->($self);
5545        }
5546      
5547            redo A;
5548          }
5549        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5550          if ($self->{nc} == 0x003E) { # >
5551            $self->{state} = DATA_STATE;
5552            $self->{s_kwd} = '';
5553            
5554        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5555          $self->{line_prev} = $self->{line};
5556          $self->{column_prev} = $self->{column};
5557          $self->{column}++;
5558          $self->{nc}
5559              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5560        } else {
5561          $self->{set_nc}->($self);
5562        }
5563      
5564            return  ({type => END_OF_DOCTYPE_TOKEN});
5565            redo A;
5566          } elsif ($self->{nc} == -1) {
5567            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5568            $self->{state} = DATA_STATE;
5569            $self->{s_kwd} = '';
5570            ## Reconsume.
5571            return  ({type => END_OF_DOCTYPE_TOKEN});
5572            redo A;
5573          } else {
5574            ## XML5: No parse error and stay in the state.
5575            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5576    
5577            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5578            
5579        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5580          $self->{line_prev} = $self->{line};
5581          $self->{column_prev} = $self->{column};
5582          $self->{column}++;
5583          $self->{nc}
5584              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5585        } else {
5586          $self->{set_nc}->($self);
5587        }
5588      
5589            redo A;
5590          }
5591        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5592          if ($self->{nc} == 0x003E) { # >
5593            $self->{state} = DATA_STATE;
5594            $self->{s_kwd} = '';
5595            
5596        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5597          $self->{line_prev} = $self->{line};
5598          $self->{column_prev} = $self->{column};
5599          $self->{column}++;
5600          $self->{nc}
5601              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5602        } else {
5603          $self->{set_nc}->($self);
5604        }
5605      
5606            return  ({type => END_OF_DOCTYPE_TOKEN});
5607            redo A;
5608          } elsif ($self->{nc} == -1) {
5609            $self->{state} = DATA_STATE;
5610            $self->{s_kwd} = '';
5611            ## Reconsume.
5612            return  ({type => END_OF_DOCTYPE_TOKEN});
5613            redo A;
5614          } else {
5615            ## Stay in the state.
5616            
5617        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5618          $self->{line_prev} = $self->{line};
5619          $self->{column_prev} = $self->{column};
5620          $self->{column}++;
5621          $self->{nc}
5622              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5623        } else {
5624          $self->{set_nc}->($self);
5625        }
5626      
5627            redo A;
5628          }
5629        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5630          if ($self->{nc} == 0x0021) { # !
5631            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5632            
5633        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5634          $self->{line_prev} = $self->{line};
5635          $self->{column_prev} = $self->{column};
5636          $self->{column}++;
5637          $self->{nc}
5638              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5639        } else {
5640          $self->{set_nc}->($self);
5641        }
5642      
5643            redo A;
5644          } elsif ($self->{nc} == 0x003F) { # ?
5645            $self->{state} = PI_STATE;
5646            
5647        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5648          $self->{line_prev} = $self->{line};
5649          $self->{column_prev} = $self->{column};
5650          $self->{column}++;
5651          $self->{nc}
5652              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5653        } else {
5654          $self->{set_nc}->($self);
5655        }
5656      
5657            redo A;
5658          } elsif ($self->{nc} == -1) {
5659            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5660            $self->{state} = DATA_STATE;
5661            $self->{s_kwd} = '';
5662            ## Reconsume.
5663            redo A;
5664          } else {
5665            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5666                            line => $self->{line_prev},
5667                            column => $self->{column_prev});
5668            $self->{state} = BOGUS_COMMENT_STATE;
5669            $self->{ct} = {type => COMMENT_TOKEN,
5670                           data => '',
5671                          }; ## NOTE: Will be discarded.
5672            
5673        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5674          $self->{line_prev} = $self->{line};
5675          $self->{column_prev} = $self->{column};
5676          $self->{column}++;
5677          $self->{nc}
5678              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5679        } else {
5680          $self->{set_nc}->($self);
5681        }
5682      
5683            redo A;
5684          }
5685        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5686          ## XML5: "DOCTYPE markup declaration state".
5687          
5688          if ($self->{nc} == 0x002D) { # -
5689            $self->{state} = MD_HYPHEN_STATE;
5690            
5691        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5692          $self->{line_prev} = $self->{line};
5693          $self->{column_prev} = $self->{column};
5694          $self->{column}++;
5695          $self->{nc}
5696              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5697        } else {
5698          $self->{set_nc}->($self);
5699        }
5700      
5701            redo A;
5702          } elsif ($self->{nc} == 0x0045 or # E
5703                   $self->{nc} == 0x0065) { # e
5704            $self->{state} = MD_E_STATE;
5705            $self->{kwd} = chr $self->{nc};
5706            
5707        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5708          $self->{line_prev} = $self->{line};
5709          $self->{column_prev} = $self->{column};
5710          $self->{column}++;
5711          $self->{nc}
5712              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5713        } else {
5714          $self->{set_nc}->($self);
5715        }
5716      
5717            redo A;
5718          } elsif ($self->{nc} == 0x0041 or # A
5719                   $self->{nc} == 0x0061) { # a
5720            $self->{state} = MD_ATTLIST_STATE;
5721            $self->{kwd} = chr $self->{nc};
5722            
5723        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5724          $self->{line_prev} = $self->{line};
5725          $self->{column_prev} = $self->{column};
5726          $self->{column}++;
5727          $self->{nc}
5728              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5729        } else {
5730          $self->{set_nc}->($self);
5731        }
5732      
5733            redo A;
5734          } elsif ($self->{nc} == 0x004E or # N
5735                   $self->{nc} == 0x006E) { # n
5736            $self->{state} = MD_NOTATION_STATE;
5737            $self->{kwd} = chr $self->{nc};
5738            
5739        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5740          $self->{line_prev} = $self->{line};
5741          $self->{column_prev} = $self->{column};
5742          $self->{column}++;
5743          $self->{nc}
5744              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5745        } else {
5746          $self->{set_nc}->($self);
5747        }
5748      
5749            redo A;
5750          } else {
5751            #
5752          }
5753          
5754          ## XML5: No parse error.
5755          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5756                          line => $self->{line_prev},
5757                          column => $self->{column_prev} - 1);
5758          ## Reconsume.
5759          $self->{state} = BOGUS_COMMENT_STATE;
5760          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5761          redo A;
5762        } elsif ($self->{state} == MD_E_STATE) {
5763          if ($self->{nc} == 0x004E or # N
5764              $self->{nc} == 0x006E) { # n
5765            $self->{state} = MD_ENTITY_STATE;
5766            $self->{kwd} .= chr $self->{nc};
5767            
5768        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5769          $self->{line_prev} = $self->{line};
5770          $self->{column_prev} = $self->{column};
5771          $self->{column}++;
5772          $self->{nc}
5773              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5774        } else {
5775          $self->{set_nc}->($self);
5776        }
5777      
5778            redo A;
5779          } elsif ($self->{nc} == 0x004C or # L
5780                   $self->{nc} == 0x006C) { # l
5781            ## XML5: <!ELEMENT> not supported.
5782            $self->{state} = MD_ELEMENT_STATE;
5783            $self->{kwd} .= chr $self->{nc};
5784            
5785        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5786          $self->{line_prev} = $self->{line};
5787          $self->{column_prev} = $self->{column};
5788          $self->{column}++;
5789          $self->{nc}
5790              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5791        } else {
5792          $self->{set_nc}->($self);
5793        }
5794      
5795            redo A;
5796          } else {
5797            ## XML5: No parse error.
5798            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5799                            line => $self->{line_prev},
5800                            column => $self->{column_prev} - 2
5801                                + 1 * ($self->{nc} == -1));
5802            ## Reconsume.
5803            $self->{state} = BOGUS_COMMENT_STATE;
5804            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5805            redo A;
5806          }
5807        } elsif ($self->{state} == MD_ENTITY_STATE) {
5808          if ($self->{nc} == [
5809                undef,
5810                undef,
5811                0x0054, # T
5812                0x0049, # I
5813                0x0054, # T
5814              ]->[length $self->{kwd}] or
5815              $self->{nc} == [
5816                undef,
5817                undef,
5818                0x0074, # t
5819                0x0069, # i
5820                0x0074, # t
5821              ]->[length $self->{kwd}]) {
5822            ## Stay in the state.
5823            $self->{kwd} .= chr $self->{nc};
5824            
5825        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5826          $self->{line_prev} = $self->{line};
5827          $self->{column_prev} = $self->{column};
5828          $self->{column}++;
5829          $self->{nc}
5830              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5831        } else {
5832          $self->{set_nc}->($self);
5833        }
5834      
5835            redo A;
5836          } elsif ((length $self->{kwd}) == 5 and
5837                   ($self->{nc} == 0x0059 or # Y
5838                    $self->{nc} == 0x0079)) { # y
5839            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5840              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5841                              text => 'ENTITY',
5842                              line => $self->{line_prev},
5843                              column => $self->{column_prev} - 4);
5844            }
5845            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5846                           line => $self->{line_prev},
5847                           column => $self->{column_prev} - 6};
5848            $self->{state} = DOCTYPE_MD_STATE;
5849            
5850        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5851          $self->{line_prev} = $self->{line};
5852          $self->{column_prev} = $self->{column};
5853          $self->{column}++;
5854          $self->{nc}
5855              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5856        } else {
5857          $self->{set_nc}->($self);
5858        }
5859      
5860            redo A;
5861          } else {
5862            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5863                            line => $self->{line_prev},
5864                            column => $self->{column_prev} - 1
5865                                - (length $self->{kwd})
5866                                + 1 * ($self->{nc} == -1));
5867            $self->{state} = BOGUS_COMMENT_STATE;
5868            ## Reconsume.
5869            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5870            redo A;
5871          }
5872        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5873          if ($self->{nc} == [
5874               undef,
5875               undef,
5876               0x0045, # E
5877               0x004D, # M
5878               0x0045, # E
5879               0x004E, # N
5880              ]->[length $self->{kwd}] or
5881              $self->{nc} == [
5882               undef,
5883               undef,
5884               0x0065, # e
5885               0x006D, # m
5886               0x0065, # e
5887               0x006E, # n
5888              ]->[length $self->{kwd}]) {
5889            ## Stay in the state.
5890            $self->{kwd} .= chr $self->{nc};
5891            
5892        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5893          $self->{line_prev} = $self->{line};
5894          $self->{column_prev} = $self->{column};
5895          $self->{column}++;
5896          $self->{nc}
5897              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5898        } else {
5899          $self->{set_nc}->($self);
5900        }
5901      
5902            redo A;
5903          } elsif ((length $self->{kwd}) == 6 and
5904                   ($self->{nc} == 0x0054 or # T
5905                    $self->{nc} == 0x0074)) { # t
5906            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5907              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5908                              text => 'ELEMENT',
5909                              line => $self->{line_prev},
5910                              column => $self->{column_prev} - 5);
5911            }
5912            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5913                           line => $self->{line_prev},
5914                           column => $self->{column_prev} - 7};
5915            $self->{state} = DOCTYPE_MD_STATE;
5916            
5917        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5918          $self->{line_prev} = $self->{line};
5919          $self->{column_prev} = $self->{column};
5920          $self->{column}++;
5921          $self->{nc}
5922              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5923        } else {
5924          $self->{set_nc}->($self);
5925        }
5926      
5927            redo A;
5928          } else {
5929            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5930                            line => $self->{line_prev},
5931                            column => $self->{column_prev} - 1
5932                                - (length $self->{kwd})
5933                                + 1 * ($self->{nc} == -1));
5934            $self->{state} = BOGUS_COMMENT_STATE;
5935            ## Reconsume.
5936            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5937            redo A;
5938          }
5939        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5940          if ($self->{nc} == [
5941               undef,
5942               0x0054, # T
5943               0x0054, # T
5944               0x004C, # L
5945               0x0049, # I
5946               0x0053, # S
5947              ]->[length $self->{kwd}] or
5948              $self->{nc} == [
5949               undef,
5950               0x0074, # t
5951               0x0074, # t
5952               0x006C, # l
5953               0x0069, # i
5954               0x0073, # s
5955              ]->[length $self->{kwd}]) {
5956            ## Stay in the state.
5957            $self->{kwd} .= chr $self->{nc};
5958            
5959        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5960          $self->{line_prev} = $self->{line};
5961          $self->{column_prev} = $self->{column};
5962          $self->{column}++;
5963          $self->{nc}
5964              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5965        } else {
5966          $self->{set_nc}->($self);
5967        }
5968      
5969            redo A;
5970          } elsif ((length $self->{kwd}) == 6 and
5971                   ($self->{nc} == 0x0054 or # T
5972                    $self->{nc} == 0x0074)) { # t
5973            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
5974              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5975                              text => 'ATTLIST',
5976                              line => $self->{line_prev},
5977                              column => $self->{column_prev} - 5);
5978            }
5979            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5980                           attrdefs => [],
5981                           line => $self->{line_prev},
5982                           column => $self->{column_prev} - 7};
5983            $self->{state} = DOCTYPE_MD_STATE;
5984            
5985        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5986          $self->{line_prev} = $self->{line};
5987          $self->{column_prev} = $self->{column};
5988          $self->{column}++;
5989          $self->{nc}
5990              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5991        } else {
5992          $self->{set_nc}->($self);
5993        }
5994      
5995            redo A;
5996          } else {
5997            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5998                            line => $self->{line_prev},
5999                            column => $self->{column_prev} - 1
6000                                 - (length $self->{kwd})
6001                                 + 1 * ($self->{nc} == -1));
6002            $self->{state} = BOGUS_COMMENT_STATE;
6003            ## Reconsume.
6004            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6005            redo A;
6006          }
6007        } elsif ($self->{state} == MD_NOTATION_STATE) {
6008          if ($self->{nc} == [
6009               undef,
6010               0x004F, # O
6011               0x0054, # T
6012               0x0041, # A
6013               0x0054, # T
6014               0x0049, # I
6015               0x004F, # O
6016              ]->[length $self->{kwd}] or
6017              $self->{nc} == [
6018               undef,
6019               0x006F, # o
6020               0x0074, # t
6021               0x0061, # a
6022               0x0074, # t
6023               0x0069, # i
6024               0x006F, # o
6025              ]->[length $self->{kwd}]) {
6026            ## Stay in the state.
6027            $self->{kwd} .= chr $self->{nc};
6028            
6029        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6030          $self->{line_prev} = $self->{line};
6031          $self->{column_prev} = $self->{column};
6032          $self->{column}++;
6033          $self->{nc}
6034              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6035        } else {
6036          $self->{set_nc}->($self);
6037        }
6038      
6039            redo A;
6040          } elsif ((length $self->{kwd}) == 7 and
6041                   ($self->{nc} == 0x004E or # N
6042                    $self->{nc} == 0x006E)) { # n
6043            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6044              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6045                              text => 'NOTATION',
6046                              line => $self->{line_prev},
6047                              column => $self->{column_prev} - 6);
6048            }
6049            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6050                           line => $self->{line_prev},
6051                           column => $self->{column_prev} - 8};
6052            $self->{state} = DOCTYPE_MD_STATE;
6053            
6054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6055          $self->{line_prev} = $self->{line};
6056          $self->{column_prev} = $self->{column};
6057          $self->{column}++;
6058          $self->{nc}
6059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6060        } else {
6061          $self->{set_nc}->($self);
6062        }
6063      
6064            redo A;
6065          } else {
6066            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6067                            line => $self->{line_prev},
6068                            column => $self->{column_prev} - 1
6069                                - (length $self->{kwd})
6070                                + 1 * ($self->{nc} == -1));
6071            $self->{state} = BOGUS_COMMENT_STATE;
6072            ## Reconsume.
6073            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6074            redo A;
6075          }
6076        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6077          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6078          ## "DOCTYPE NOTATION state".
6079    
6080          if ($is_space->{$self->{nc}}) {
6081            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6082            $self->{state} = BEFORE_MD_NAME_STATE;
6083            
6084        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6085          $self->{line_prev} = $self->{line};
6086          $self->{column_prev} = $self->{column};
6087          $self->{column}++;
6088          $self->{nc}
6089              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6090        } else {
6091          $self->{set_nc}->($self);
6092        }
6093      
6094            redo A;
6095          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6096                   $self->{nc} == 0x0025) { # %
6097            ## XML5: Switch to the "DOCTYPE bogus comment state".
6098            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6099            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6100            
6101        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6102          $self->{line_prev} = $self->{line};
6103          $self->{column_prev} = $self->{column};
6104          $self->{column}++;
6105          $self->{nc}
6106              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6107        } else {
6108          $self->{set_nc}->($self);
6109        }
6110      
6111            redo A;
6112          } elsif ($self->{nc} == -1) {
6113            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6114            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6115            ## Reconsume.
6116            redo A;
6117          } elsif ($self->{nc} == 0x003E) { # >
6118            ## XML5: Switch to the "DOCTYPE bogus comment state".
6119            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6120            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6121            
6122        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6123          $self->{line_prev} = $self->{line};
6124          $self->{column_prev} = $self->{column};
6125          $self->{column}++;
6126          $self->{nc}
6127              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6128        } else {
6129          $self->{set_nc}->($self);
6130        }
6131      
6132            redo A;
6133          } else {
6134            ## XML5: Switch to the "DOCTYPE bogus comment state".
6135            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6136            $self->{state} = BEFORE_MD_NAME_STATE;
6137            redo A;
6138          }
6139        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6140          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6141          ## before state", "DOCTYPE ATTLIST name before state".
6142    
6143          if ($is_space->{$self->{nc}}) {
6144            ## Stay in the state.
6145            
6146        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6147          $self->{line_prev} = $self->{line};
6148          $self->{column_prev} = $self->{column};
6149          $self->{column}++;
6150          $self->{nc}
6151              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6152        } else {
6153          $self->{set_nc}->($self);
6154        }
6155      
6156            redo A;
6157          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6158                   $self->{nc} == 0x0025) { # %
6159            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6160            
6161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6162          $self->{line_prev} = $self->{line};
6163          $self->{column_prev} = $self->{column};
6164          $self->{column}++;
6165          $self->{nc}
6166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6167        } else {
6168          $self->{set_nc}->($self);
6169        }
6170      
6171            redo A;
6172          } elsif ($self->{nc} == 0x003E) { # >
6173            ## XML5: Same as "Anything else".
6174            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6175            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6176            
6177        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6178          $self->{line_prev} = $self->{line};
6179          $self->{column_prev} = $self->{column};
6180          $self->{column}++;
6181          $self->{nc}
6182              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6183        } else {
6184          $self->{set_nc}->($self);
6185        }
6186      
6187            redo A;
6188          } elsif ($self->{nc} == -1) {
6189            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6190            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6191            ## Reconsume.
6192            redo A;
6193          } else {
6194            ## XML5: [ATTLIST] Not defined yet.
6195            $self->{ct}->{name} .= chr $self->{nc};
6196            $self->{state} = MD_NAME_STATE;
6197            
6198        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6199          $self->{line_prev} = $self->{line};
6200          $self->{column_prev} = $self->{column};
6201          $self->{column}++;
6202          $self->{nc}
6203              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6204        } else {
6205          $self->{set_nc}->($self);
6206        }
6207      
6208            redo A;
6209          }
6210        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6211          if ($is_space->{$self->{nc}}) {
6212            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6213            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6214            $self->{state} = BEFORE_MD_NAME_STATE;
6215            
6216        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6217          $self->{line_prev} = $self->{line};
6218          $self->{column_prev} = $self->{column};
6219          $self->{column}++;
6220          $self->{nc}
6221              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6222        } else {
6223          $self->{set_nc}->($self);
6224        }
6225      
6226            redo A;
6227          } elsif ($self->{nc} == 0x003E) { # >
6228            ## XML5: Same as "Anything else".
6229            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6230            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6231            
6232        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6233          $self->{line_prev} = $self->{line};
6234          $self->{column_prev} = $self->{column};
6235          $self->{column}++;
6236          $self->{nc}
6237              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6238        } else {
6239          $self->{set_nc}->($self);
6240        }
6241      
6242            redo A;
6243          } elsif ($self->{nc} == -1) {
6244            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6245            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6246            ## Reconsume.
6247            redo A;
6248          } else {
6249            ## XML5: No parse error.
6250            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6251            $self->{state} = BOGUS_COMMENT_STATE;
6252            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6253            ## Reconsume.
6254            redo A;
6255          }
6256        } elsif ($self->{state} == MD_NAME_STATE) {
6257          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6258          
6259          if ($is_space->{$self->{nc}}) {
6260            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6261              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6262            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6263              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6264            } else { # ENTITY/NOTATION
6265              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6266            }
6267            
6268        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6269          $self->{line_prev} = $self->{line};
6270          $self->{column_prev} = $self->{column};
6271          $self->{column}++;
6272          $self->{nc}
6273              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6274        } else {
6275          $self->{set_nc}->($self);
6276        }
6277      
6278            redo A;
6279          } elsif ($self->{nc} == 0x003E) { # >
6280            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6281              #
6282            } else {
6283              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6284            }
6285            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6286            
6287        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6288          $self->{line_prev} = $self->{line};
6289          $self->{column_prev} = $self->{column};
6290          $self->{column}++;
6291          $self->{nc}
6292              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6293        } else {
6294          $self->{set_nc}->($self);
6295        }
6296      
6297            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6298            redo A;
6299          } elsif ($self->{nc} == -1) {
6300            ## XML5: [ATTLIST] No parse error.
6301            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6302            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6303            ## Reconsume.
6304            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6305            redo A;
6306          } else {
6307            ## XML5: [ATTLIST] Not defined yet.
6308            $self->{ct}->{name} .= chr $self->{nc};
6309            ## Stay in the state.
6310            
6311        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6312          $self->{line_prev} = $self->{line};
6313          $self->{column_prev} = $self->{column};
6314          $self->{column}++;
6315          $self->{nc}
6316              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6317        } else {
6318          $self->{set_nc}->($self);
6319        }
6320      
6321            redo A;
6322          }
6323        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6324          if ($is_space->{$self->{nc}}) {
6325            ## Stay in the state.
6326            
6327        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6328          $self->{line_prev} = $self->{line};
6329          $self->{column_prev} = $self->{column};
6330          $self->{column}++;
6331          $self->{nc}
6332              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6333        } else {
6334          $self->{set_nc}->($self);
6335        }
6336      
6337            redo A;
6338          } elsif ($self->{nc} == 0x003E) { # >
6339            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6340            
6341        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6342          $self->{line_prev} = $self->{line};
6343          $self->{column_prev} = $self->{column};
6344          $self->{column}++;
6345          $self->{nc}
6346              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6347        } else {
6348          $self->{set_nc}->($self);
6349        }
6350      
6351            return  ($self->{ct}); # ATTLIST
6352            redo A;
6353          } elsif ($self->{nc} == -1) {
6354            ## XML5: No parse error.
6355            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6356            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6357            return  ($self->{ct});
6358            redo A;
6359          } else {
6360            ## XML5: Not defined yet.
6361            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6362                           tokens => [],
6363                           line => $self->{line}, column => $self->{column}};
6364            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6365            
6366        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6367          $self->{line_prev} = $self->{line};
6368          $self->{column_prev} = $self->{column};
6369          $self->{column}++;
6370          $self->{nc}
6371              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6372        } else {
6373          $self->{set_nc}->($self);
6374        }
6375      
6376            redo A;
6377          }
6378        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6379          if ($is_space->{$self->{nc}}) {
6380            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6381            
6382        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6383          $self->{line_prev} = $self->{line};
6384          $self->{column_prev} = $self->{column};
6385          $self->{column}++;
6386          $self->{nc}
6387              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6388        } else {
6389          $self->{set_nc}->($self);
6390        }
6391      
6392            redo A;
6393          } elsif ($self->{nc} == 0x003E) { # >
6394            ## XML5: Same as "anything else".
6395            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6396            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6397            
6398        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6399          $self->{line_prev} = $self->{line};
6400          $self->{column_prev} = $self->{column};
6401          $self->{column}++;
6402          $self->{nc}
6403              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6404        } else {
6405          $self->{set_nc}->($self);
6406        }
6407      
6408            return  ($self->{ct}); # ATTLIST
6409            redo A;
6410          } elsif ($self->{nc} == 0x0028) { # (
6411            ## XML5: Same as "anything else".
6412            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6413            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6414            
6415        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6416          $self->{line_prev} = $self->{line};
6417          $self->{column_prev} = $self->{column};
6418          $self->{column}++;
6419          $self->{nc}
6420              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6421        } else {
6422          $self->{set_nc}->($self);
6423        }
6424      
6425            redo A;
6426          } elsif ($self->{nc} == -1) {
6427            ## XML5: No parse error.
6428            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6429            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6430            
6431        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6432          $self->{line_prev} = $self->{line};
6433          $self->{column_prev} = $self->{column};
6434          $self->{column}++;
6435          $self->{nc}
6436              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6437        } else {
6438          $self->{set_nc}->($self);
6439        }
6440      
6441            return  ($self->{ct}); # ATTLIST
6442            redo A;
6443          } else {
6444            ## XML5: Not defined yet.
6445            $self->{ca}->{name} .= chr $self->{nc};
6446            ## Stay in the state.
6447            
6448        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6449          $self->{line_prev} = $self->{line};
6450          $self->{column_prev} = $self->{column};
6451          $self->{column}++;
6452          $self->{nc}
6453              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6454        } else {
6455          $self->{set_nc}->($self);
6456        }
6457      
6458            redo A;
6459          }
6460        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6461          if ($is_space->{$self->{nc}}) {
6462            ## Stay in the state.
6463            
6464        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6465          $self->{line_prev} = $self->{line};
6466          $self->{column_prev} = $self->{column};
6467          $self->{column}++;
6468          $self->{nc}
6469              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6470        } else {
6471          $self->{set_nc}->($self);
6472        }
6473      
6474            redo A;
6475          } elsif ($self->{nc} == 0x003E) { # >
6476            ## XML5: Same as "anything else".
6477            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6478            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6479            
6480        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6481          $self->{line_prev} = $self->{line};
6482          $self->{column_prev} = $self->{column};
6483          $self->{column}++;
6484          $self->{nc}
6485              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6486        } else {
6487          $self->{set_nc}->($self);
6488        }
6489      
6490            return  ($self->{ct}); # ATTLIST
6491            redo A;
6492          } elsif ($self->{nc} == 0x0028) { # (
6493            ## XML5: Same as "anything else".
6494            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6495            
6496        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6497          $self->{line_prev} = $self->{line};
6498          $self->{column_prev} = $self->{column};
6499          $self->{column}++;
6500          $self->{nc}
6501              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6502        } else {
6503          $self->{set_nc}->($self);
6504        }
6505      
6506            redo A;
6507          } elsif ($self->{nc} == -1) {
6508            ## XML5: No parse error.
6509            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6510            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6511            
6512        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6513          $self->{line_prev} = $self->{line};
6514          $self->{column_prev} = $self->{column};
6515          $self->{column}++;
6516          $self->{nc}
6517              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6518        } else {
6519          $self->{set_nc}->($self);
6520        }
6521      
6522            return  ($self->{ct});
6523            redo A;
6524          } else {
6525            ## XML5: Not defined yet.
6526            $self->{ca}->{type} = chr $self->{nc};
6527            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6528            
6529        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6530          $self->{line_prev} = $self->{line};
6531          $self->{column_prev} = $self->{column};
6532          $self->{column}++;
6533          $self->{nc}
6534              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6535        } else {
6536          $self->{set_nc}->($self);
6537        }
6538      
6539            redo A;
6540          }
6541        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6542          if ($is_space->{$self->{nc}}) {
6543            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6544            
6545        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6546          $self->{line_prev} = $self->{line};
6547          $self->{column_prev} = $self->{column};
6548          $self->{column}++;
6549          $self->{nc}
6550              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6551        } else {
6552          $self->{set_nc}->($self);
6553        }
6554      
6555            redo A;
6556          } elsif ($self->{nc} == 0x0023) { # #
6557            ## XML5: Same as "anything else".
6558            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6559            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6560            
6561        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6562          $self->{line_prev} = $self->{line};
6563          $self->{column_prev} = $self->{column};
6564          $self->{column}++;
6565          $self->{nc}
6566              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6567        } else {
6568          $self->{set_nc}->($self);
6569        }
6570      
6571            redo A;
6572          } elsif ($self->{nc} == 0x0022) { # "
6573            ## XML5: Same as "anything else".
6574            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6575            $self->{ca}->{value} = '';
6576            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6577            
6578        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6579          $self->{line_prev} = $self->{line};
6580          $self->{column_prev} = $self->{column};
6581          $self->{column}++;
6582          $self->{nc}
6583              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6584        } else {
6585          $self->{set_nc}->($self);
6586        }
6587      
6588            redo A;
6589          } elsif ($self->{nc} == 0x0027) { # '
6590            ## XML5: Same as "anything else".
6591            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6592            $self->{ca}->{value} = '';
6593            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6594            
6595        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6596          $self->{line_prev} = $self->{line};
6597          $self->{column_prev} = $self->{column};
6598          $self->{column}++;
6599          $self->{nc}
6600              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6601        } else {
6602          $self->{set_nc}->($self);
6603        }
6604      
6605            redo A;
6606          } elsif ($self->{nc} == 0x003E) { # >
6607            ## XML5: Same as "anything else".
6608            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6609            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6610            
6611        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6612          $self->{line_prev} = $self->{line};
6613          $self->{column_prev} = $self->{column};
6614          $self->{column}++;
6615          $self->{nc}
6616              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6617        } else {
6618          $self->{set_nc}->($self);
6619        }
6620      
6621            return  ($self->{ct}); # ATTLIST
6622            redo A;
6623          } elsif ($self->{nc} == 0x0028) { # (
6624            ## XML5: Same as "anything else".
6625            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6626            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6627            
6628        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6629          $self->{line_prev} = $self->{line};
6630          $self->{column_prev} = $self->{column};
6631          $self->{column}++;
6632          $self->{nc}
6633              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6634        } else {
6635          $self->{set_nc}->($self);
6636        }
6637      
6638            redo A;
6639          } elsif ($self->{nc} == -1) {
6640            ## XML5: No parse error.
6641            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6642            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6643            
6644        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6645          $self->{line_prev} = $self->{line};
6646          $self->{column_prev} = $self->{column};
6647          $self->{column}++;
6648          $self->{nc}
6649              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6650        } else {
6651          $self->{set_nc}->($self);
6652        }
6653      
6654            return  ($self->{ct});
6655            redo A;
6656          } else {
6657            ## XML5: Not defined yet.
6658            $self->{ca}->{type} .= chr $self->{nc};
6659            ## Stay in the state.
6660            
6661        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6662          $self->{line_prev} = $self->{line};
6663          $self->{column_prev} = $self->{column};
6664          $self->{column}++;
6665          $self->{nc}
6666              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6667        } else {
6668          $self->{set_nc}->($self);
6669        }
6670      
6671            redo A;
6672          }
6673        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6674          if ($is_space->{$self->{nc}}) {
6675            ## Stay in the state.
6676            
6677        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6678          $self->{line_prev} = $self->{line};
6679          $self->{column_prev} = $self->{column};
6680          $self->{column}++;
6681          $self->{nc}
6682              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6683        } else {
6684          $self->{set_nc}->($self);
6685        }
6686      
6687            redo A;
6688          } elsif ($self->{nc} == 0x0028) { # (
6689            ## XML5: Same as "anything else".
6690            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6691            
6692        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6693          $self->{line_prev} = $self->{line};
6694          $self->{column_prev} = $self->{column};
6695          $self->{column}++;
6696          $self->{nc}
6697              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6698        } else {
6699          $self->{set_nc}->($self);
6700        }
6701      
6702            redo A;
6703          } elsif ($self->{nc} == 0x0023) { # #
6704            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6705            
6706        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6707          $self->{line_prev} = $self->{line};
6708          $self->{column_prev} = $self->{column};
6709          $self->{column}++;
6710          $self->{nc}
6711              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6712        } else {
6713          $self->{set_nc}->($self);
6714        }
6715      
6716            redo A;
6717          } elsif ($self->{nc} == 0x0022) { # "
6718            ## XML5: Same as "anything else".
6719            $self->{ca}->{value} = '';
6720            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6721            
6722        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6723          $self->{line_prev} = $self->{line};
6724          $self->{column_prev} = $self->{column};
6725          $self->{column}++;
6726          $self->{nc}
6727              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6728        } else {
6729          $self->{set_nc}->($self);
6730        }
6731      
6732            redo A;
6733          } elsif ($self->{nc} == 0x0027) { # '
6734            ## XML5: Same as "anything else".
6735            $self->{ca}->{value} = '';
6736            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6737            
6738        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6739          $self->{line_prev} = $self->{line};
6740          $self->{column_prev} = $self->{column};
6741          $self->{column}++;
6742          $self->{nc}
6743              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6744        } else {
6745          $self->{set_nc}->($self);
6746        }
6747      
6748            redo A;
6749          } elsif ($self->{nc} == 0x003E) { # >
6750            ## XML5: Same as "anything else".
6751            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6752            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6753            
6754        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6755          $self->{line_prev} = $self->{line};
6756          $self->{column_prev} = $self->{column};
6757          $self->{column}++;
6758          $self->{nc}
6759              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6760        } else {
6761          $self->{set_nc}->($self);
6762        }
6763      
6764            return  ($self->{ct}); # ATTLIST
6765            redo A;
6766          } elsif ($self->{nc} == -1) {
6767            ## XML5: No parse error.
6768            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6769            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6770            
6771        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6772          $self->{line_prev} = $self->{line};
6773          $self->{column_prev} = $self->{column};
6774          $self->{column}++;
6775          $self->{nc}
6776              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6777        } else {
6778          $self->{set_nc}->($self);
6779        }
6780      
6781            return  ($self->{ct});
6782            redo A;
6783          } else {
6784            ## XML5: Switch to the "DOCTYPE bogus comment state".
6785            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6786            $self->{ca}->{value} = '';
6787            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6788            ## Reconsume.
6789            redo A;
6790          }
6791        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6792          if ($is_space->{$self->{nc}}) {
6793            ## Stay in the state.
6794            
6795        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6796          $self->{line_prev} = $self->{line};
6797          $self->{column_prev} = $self->{column};
6798          $self->{column}++;
6799          $self->{nc}
6800              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6801        } else {
6802          $self->{set_nc}->($self);
6803        }
6804      
6805            redo A;
6806          } elsif ($self->{nc} == 0x007C) { # |
6807            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6808            ## Stay in the state.
6809            
6810        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6811          $self->{line_prev} = $self->{line};
6812          $self->{column_prev} = $self->{column};
6813          $self->{column}++;
6814          $self->{nc}
6815              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6816        } else {
6817          $self->{set_nc}->($self);
6818        }
6819      
6820            redo A;
6821          } elsif ($self->{nc} == 0x0029) { # )
6822            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6823            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6824            
6825        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6826          $self->{line_prev} = $self->{line};
6827          $self->{column_prev} = $self->{column};
6828          $self->{column}++;
6829          $self->{nc}
6830              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6831        } else {
6832          $self->{set_nc}->($self);
6833        }
6834      
6835            redo A;
6836          } elsif ($self->{nc} == 0x003E) { # >
6837            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6838            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6839            
6840        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6841          $self->{line_prev} = $self->{line};
6842          $self->{column_prev} = $self->{column};
6843          $self->{column}++;
6844          $self->{nc}
6845              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6846        } else {
6847          $self->{set_nc}->($self);
6848        }
6849      
6850            return  ($self->{ct}); # ATTLIST
6851            redo A;
6852          } elsif ($self->{nc} == -1) {
6853            ## XML5: No parse error.
6854            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6855            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6856            
6857        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6858          $self->{line_prev} = $self->{line};
6859          $self->{column_prev} = $self->{column};
6860          $self->{column}++;
6861          $self->{nc}
6862              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6863        } else {
6864          $self->{set_nc}->($self);
6865        }
6866      
6867            return  ($self->{ct});
6868            redo A;
6869          } else {
6870            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6871            $self->{state} = ALLOWED_TOKEN_STATE;
6872            
6873        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6874          $self->{line_prev} = $self->{line};
6875          $self->{column_prev} = $self->{column};
6876          $self->{column}++;
6877          $self->{nc}
6878              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6879        } else {
6880          $self->{set_nc}->($self);
6881        }
6882      
6883            redo A;
6884          }
6885        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6886          if ($is_space->{$self->{nc}}) {
6887            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6888            
6889        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6890          $self->{line_prev} = $self->{line};
6891          $self->{column_prev} = $self->{column};
6892          $self->{column}++;
6893          $self->{nc}
6894              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6895        } else {
6896          $self->{set_nc}->($self);
6897        }
6898      
6899            redo A;
6900          } elsif ($self->{nc} == 0x007C) { # |
6901            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6902            
6903        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6904          $self->{line_prev} = $self->{line};
6905          $self->{column_prev} = $self->{column};
6906          $self->{column}++;
6907          $self->{nc}
6908              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6909        } else {
6910          $self->{set_nc}->($self);
6911        }
6912      
6913            redo A;
6914          } elsif ($self->{nc} == 0x0029) { # )
6915            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6916            
6917        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6918          $self->{line_prev} = $self->{line};
6919          $self->{column_prev} = $self->{column};
6920          $self->{column}++;
6921          $self->{nc}
6922              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6923        } else {
6924          $self->{set_nc}->($self);
6925        }
6926      
6927            redo A;
6928          } elsif ($self->{nc} == 0x003E) { # >
6929            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6930            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6931            
6932        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6933          $self->{line_prev} = $self->{line};
6934          $self->{column_prev} = $self->{column};
6935          $self->{column}++;
6936          $self->{nc}
6937              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6938        } else {
6939          $self->{set_nc}->($self);
6940        }
6941      
6942            return  ($self->{ct}); # ATTLIST
6943            redo A;
6944          } elsif ($self->{nc} == -1) {
6945            ## XML5: No parse error.
6946            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6947            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6948            
6949        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6950          $self->{line_prev} = $self->{line};
6951          $self->{column_prev} = $self->{column};
6952          $self->{column}++;
6953          $self->{nc}
6954              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6955        } else {
6956          $self->{set_nc}->($self);
6957        }
6958      
6959            return  ($self->{ct});
6960            redo A;
6961          } else {
6962            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6963            ## Stay in the state.
6964            
6965        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6966          $self->{line_prev} = $self->{line};
6967          $self->{column_prev} = $self->{column};
6968          $self->{column}++;
6969          $self->{nc}
6970              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6971        } else {
6972          $self->{set_nc}->($self);
6973        }
6974      
6975            redo A;
6976          }
6977        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6978          if ($is_space->{$self->{nc}}) {
6979            ## Stay in the state.
6980            
6981        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6982          $self->{line_prev} = $self->{line};
6983          $self->{column_prev} = $self->{column};
6984          $self->{column}++;
6985          $self->{nc}
6986              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6987        } else {
6988          $self->{set_nc}->($self);
6989        }
6990      
6991            redo A;
6992          } elsif ($self->{nc} == 0x007C) { # |
6993            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6994            
6995        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6996          $self->{line_prev} = $self->{line};
6997          $self->{column_prev} = $self->{column};
6998          $self->{column}++;
6999          $self->{nc}
7000              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7001        } else {
7002          $self->{set_nc}->($self);
7003        }
7004      
7005            redo A;
7006          } elsif ($self->{nc} == 0x0029) { # )
7007            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7008            
7009        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7010          $self->{line_prev} = $self->{line};
7011          $self->{column_prev} = $self->{column};
7012          $self->{column}++;
7013          $self->{nc}
7014              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7015        } else {
7016          $self->{set_nc}->($self);
7017        }
7018      
7019            redo A;
7020          } elsif ($self->{nc} == 0x003E) { # >
7021            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7022            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7023            
7024        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7025          $self->{line_prev} = $self->{line};
7026          $self->{column_prev} = $self->{column};
7027          $self->{column}++;
7028          $self->{nc}
7029              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7030        } else {
7031          $self->{set_nc}->($self);
7032        }
7033      
7034            return  ($self->{ct}); # ATTLIST
7035            redo A;
7036          } elsif ($self->{nc} == -1) {
7037            ## XML5: No parse error.
7038            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7039            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7040            
7041        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7042          $self->{line_prev} = $self->{line};
7043          $self->{column_prev} = $self->{column};
7044          $self->{column}++;
7045          $self->{nc}
7046              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7047        } else {
7048          $self->{set_nc}->($self);
7049        }
7050      
7051            return  ($self->{ct});
7052            redo A;
7053          } else {
7054            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7055                            line => $self->{line_prev},
7056                            column => $self->{column_prev});
7057            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7058            $self->{state} = ALLOWED_TOKEN_STATE;
7059            
7060        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7061          $self->{line_prev} = $self->{line};
7062          $self->{column_prev} = $self->{column};
7063          $self->{column}++;
7064          $self->{nc}
7065              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7066        } else {
7067          $self->{set_nc}->($self);
7068        }
7069      
7070            redo A;
7071          }
7072        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7073          if ($is_space->{$self->{nc}}) {
7074            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7075            
7076        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7077          $self->{line_prev} = $self->{line};
7078          $self->{column_prev} = $self->{column};
7079          $self->{column}++;
7080          $self->{nc}
7081              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7082        } else {
7083          $self->{set_nc}->($self);
7084        }
7085      
7086            redo A;
7087          } elsif ($self->{nc} == 0x0023) { # #
7088            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7089            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7090            
7091        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7092          $self->{line_prev} = $self->{line};
7093          $self->{column_prev} = $self->{column};
7094          $self->{column}++;
7095          $self->{nc}
7096              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7097        } else {
7098          $self->{set_nc}->($self);
7099        }
7100      
7101            redo A;
7102          } elsif ($self->{nc} == 0x0022) { # "
7103            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7104            $self->{ca}->{value} = '';
7105            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7106            
7107        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7108          $self->{line_prev} = $self->{line};
7109          $self->{column_prev} = $self->{column};
7110          $self->{column}++;
7111          $self->{nc}
7112              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7113        } else {
7114          $self->{set_nc}->($self);
7115        }
7116      
7117            redo A;
7118          } elsif ($self->{nc} == 0x0027) { # '
7119            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7120            $self->{ca}->{value} = '';
7121            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7122            
7123        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7124          $self->{line_prev} = $self->{line};
7125          $self->{column_prev} = $self->{column};
7126          $self->{column}++;
7127          $self->{nc}
7128              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7129        } else {
7130          $self->{set_nc}->($self);
7131        }
7132      
7133            redo A;
7134          } elsif ($self->{nc} == 0x003E) { # >
7135            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7136            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7137            
7138        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7139          $self->{line_prev} = $self->{line};
7140          $self->{column_prev} = $self->{column};
7141          $self->{column}++;
7142          $self->{nc}
7143              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7144        } else {
7145          $self->{set_nc}->($self);
7146        }
7147      
7148            return  ($self->{ct}); # ATTLIST
7149            redo A;
7150          } elsif ($self->{nc} == -1) {
7151            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7152            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7153            
7154        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7155          $self->{line_prev} = $self->{line};
7156          $self->{column_prev} = $self->{column};
7157          $self->{column}++;
7158          $self->{nc}
7159              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7160        } else {
7161          $self->{set_nc}->($self);
7162        }
7163      
7164            return  ($self->{ct});
7165            redo A;
7166          } else {
7167            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7168            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7169            ## Reconsume.
7170            redo A;
7171          }
7172        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7173          if ($is_space->{$self->{nc}}) {
7174            ## Stay in the state.
7175            
7176        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7177          $self->{line_prev} = $self->{line};
7178          $self->{column_prev} = $self->{column};
7179          $self->{column}++;
7180          $self->{nc}
7181              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7182        } else {
7183          $self->{set_nc}->($self);
7184        }
7185      
7186            redo A;
7187          } elsif ($self->{nc} == 0x0023) { # #
7188            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7189            
7190        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7191          $self->{line_prev} = $self->{line};
7192          $self->{column_prev} = $self->{column};
7193          $self->{column}++;
7194          $self->{nc}
7195              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7196        } else {
7197          $self->{set_nc}->($self);
7198        }
7199      
7200            redo A;
7201          } elsif ($self->{nc} == 0x0022) { # "
7202            $self->{ca}->{value} = '';
7203            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7204            
7205        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7206          $self->{line_prev} = $self->{line};
7207          $self->{column_prev} = $self->{column};
7208          $self->{column}++;
7209          $self->{nc}
7210              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7211        } else {
7212          $self->{set_nc}->($self);
7213        }
7214      
7215            redo A;
7216          } elsif ($self->{nc} == 0x0027) { # '
7217            $self->{ca}->{value} = '';
7218            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7219            
7220        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7221          $self->{line_prev} = $self->{line};
7222          $self->{column_prev} = $self->{column};
7223          $self->{column}++;
7224          $self->{nc}
7225              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7226        } else {
7227          $self->{set_nc}->($self);
7228        }
7229      
7230            redo A;
7231          } elsif ($self->{nc} == 0x003E) { # >
7232            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7233            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7234            
7235        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7236          $self->{line_prev} = $self->{line};
7237          $self->{column_prev} = $self->{column};
7238          $self->{column}++;
7239          $self->{nc}
7240              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7241        } else {
7242          $self->{set_nc}->($self);
7243        }
7244      
7245            return  ($self->{ct}); # ATTLIST
7246            redo A;
7247          } elsif ($self->{nc} == -1) {
7248            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7249            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7250            
7251        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7252          $self->{line_prev} = $self->{line};
7253          $self->{column_prev} = $self->{column};
7254          $self->{column}++;
7255          $self->{nc}
7256              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7257        } else {
7258          $self->{set_nc}->($self);
7259        }
7260      
7261            return  ($self->{ct});
7262            redo A;
7263          } else {
7264            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7265            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7266            ## Reconsume.
7267            redo A;
7268          }
7269        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7270          if ($is_space->{$self->{nc}}) {
7271            ## XML5: No parse error.
7272            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7273            $self->{state} = BOGUS_MD_STATE;
7274            ## Reconsume.
7275            redo A;
7276          } elsif ($self->{nc} == 0x0022) { # "
7277            ## XML5: Same as "anything else".
7278            $self->{ca}->{value} = '';
7279            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7280            
7281        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7282          $self->{line_prev} = $self->{line};
7283          $self->{column_prev} = $self->{column};
7284          $self->{column}++;
7285          $self->{nc}
7286              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7287        } else {
7288          $self->{set_nc}->($self);
7289        }
7290      
7291            redo A;
7292          } elsif ($self->{nc} == 0x0027) { # '
7293            ## XML5: Same as "anything else".
7294            $self->{ca}->{value} = '';
7295            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7296            
7297        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7298          $self->{line_prev} = $self->{line};
7299          $self->{column_prev} = $self->{column};
7300          $self->{column}++;
7301          $self->{nc}
7302              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7303        } else {
7304          $self->{set_nc}->($self);
7305        }
7306      
7307            redo A;
7308          } elsif ($self->{nc} == 0x003E) { # >
7309            ## XML5: Same as "anything else".
7310            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7311            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7312            
7313        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7314          $self->{line_prev} = $self->{line};
7315          $self->{column_prev} = $self->{column};
7316          $self->{column}++;
7317          $self->{nc}
7318              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7319        } else {
7320          $self->{set_nc}->($self);
7321        }
7322      
7323            return  ($self->{ct}); # ATTLIST
7324            redo A;
7325          } elsif ($self->{nc} == -1) {
7326            ## XML5: No parse error.
7327            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7328            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7329            
7330        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7331          $self->{line_prev} = $self->{line};
7332          $self->{column_prev} = $self->{column};
7333          $self->{column}++;
7334          $self->{nc}
7335              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7336        } else {
7337          $self->{set_nc}->($self);
7338        }
7339      
7340            return  ($self->{ct});
7341            redo A;
7342          } else {
7343            $self->{ca}->{default} = chr $self->{nc};
7344            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7345            
7346        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7347          $self->{line_prev} = $self->{line};
7348          $self->{column_prev} = $self->{column};
7349          $self->{column}++;
7350          $self->{nc}
7351              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7352        } else {
7353          $self->{set_nc}->($self);
7354        }
7355      
7356            redo A;
7357          }
7358        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7359          if ($is_space->{$self->{nc}}) {
7360            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7361            
7362        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7363          $self->{line_prev} = $self->{line};
7364          $self->{column_prev} = $self->{column};
7365          $self->{column}++;
7366          $self->{nc}
7367              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7368        } else {
7369          $self->{set_nc}->($self);
7370        }
7371      
7372            redo A;
7373          } elsif ($self->{nc} == 0x0022) { # "
7374            ## XML5: Same as "anything else".
7375            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7376            $self->{ca}->{value} = '';
7377            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7378            
7379        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7380          $self->{line_prev} = $self->{line};
7381          $self->{column_prev} = $self->{column};
7382          $self->{column}++;
7383          $self->{nc}
7384              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7385        } else {
7386          $self->{set_nc}->($self);
7387        }
7388      
7389            redo A;
7390          } elsif ($self->{nc} == 0x0027) { # '
7391            ## XML5: Same as "anything else".
7392            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7393            $self->{ca}->{value} = '';
7394            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7395            
7396        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7397          $self->{line_prev} = $self->{line};
7398          $self->{column_prev} = $self->{column};
7399          $self->{column}++;
7400          $self->{nc}
7401              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7402        } else {
7403          $self->{set_nc}->($self);
7404        }
7405      
7406            redo A;
7407          } elsif ($self->{nc} == 0x003E) { # >
7408            ## XML5: Same as "anything else".
7409            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7410            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7411            
7412        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7413          $self->{line_prev} = $self->{line};
7414          $self->{column_prev} = $self->{column};
7415          $self->{column}++;
7416          $self->{nc}
7417              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7418        } else {
7419          $self->{set_nc}->($self);
7420        }
7421      
7422            return  ($self->{ct}); # ATTLIST
7423            redo A;
7424          } elsif ($self->{nc} == -1) {
7425            ## XML5: No parse error.
7426            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7427            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7428            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7429            
7430        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7431          $self->{line_prev} = $self->{line};
7432          $self->{column_prev} = $self->{column};
7433          $self->{column}++;
7434          $self->{nc}
7435              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7436        } else {
7437          $self->{set_nc}->($self);
7438        }
7439      
7440            return  ($self->{ct});
7441            redo A;
7442          } else {
7443            $self->{ca}->{default} .= chr $self->{nc};
7444            ## Stay in the state.
7445            
7446        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7447          $self->{line_prev} = $self->{line};
7448          $self->{column_prev} = $self->{column};
7449          $self->{column}++;
7450          $self->{nc}
7451              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7452        } else {
7453          $self->{set_nc}->($self);
7454        }
7455      
7456            redo A;
7457          }
7458        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7459          if ($is_space->{$self->{nc}}) {
7460            ## Stay in the state.
7461            
7462        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7463          $self->{line_prev} = $self->{line};
7464          $self->{column_prev} = $self->{column};
7465          $self->{column}++;
7466          $self->{nc}
7467              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7468        } else {
7469          $self->{set_nc}->($self);
7470        }
7471      
7472            redo A;
7473          } elsif ($self->{nc} == 0x0022) { # "
7474            $self->{ca}->{value} = '';
7475            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7476            
7477        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7478          $self->{line_prev} = $self->{line};
7479          $self->{column_prev} = $self->{column};
7480          $self->{column}++;
7481          $self->{nc}
7482              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7483        } else {
7484          $self->{set_nc}->($self);
7485        }
7486      
7487            redo A;
7488          } elsif ($self->{nc} == 0x0027) { # '
7489            $self->{ca}->{value} = '';
7490            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7491            
7492        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7493          $self->{line_prev} = $self->{line};
7494          $self->{column_prev} = $self->{column};
7495          $self->{column}++;
7496          $self->{nc}
7497              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7498        } else {
7499          $self->{set_nc}->($self);
7500        }
7501      
7502            redo A;
7503          } elsif ($self->{nc} == 0x003E) { # >
7504            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7505            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7506            
7507        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7508          $self->{line_prev} = $self->{line};
7509          $self->{column_prev} = $self->{column};
7510          $self->{column}++;
7511          $self->{nc}
7512              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7513        } else {
7514          $self->{set_nc}->($self);
7515        }
7516      
7517            return  ($self->{ct}); # ATTLIST
7518            redo A;
7519          } elsif ($self->{nc} == -1) {
7520            ## XML5: No parse error.
7521            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7522            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7523            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7524            
7525        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7526          $self->{line_prev} = $self->{line};
7527          $self->{column_prev} = $self->{column};
7528          $self->{column}++;
7529          $self->{nc}
7530              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7531        } else {
7532          $self->{set_nc}->($self);
7533        }
7534      
7535            return  ($self->{ct});
7536            redo A;
7537          } else {
7538            ## XML5: Not defined yet.
7539            if ($self->{ca}->{default} eq 'FIXED') {
7540              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7541            } else {
7542              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7543              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7544            }
7545            ## Reconsume.
7546            redo A;
7547          }
7548        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7549          if ($is_space->{$self->{nc}} or
7550              $self->{nc} == -1 or
7551              $self->{nc} == 0x003E) { # >
7552            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7553            ## Reconsume.
7554            redo A;
7555          } else {
7556            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7557            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7558            ## Reconsume.
7559            redo A;
7560          }
7561        } elsif ($self->{state} == NDATA_STATE) {
7562          ## ASCII case-insensitive
7563          if ($self->{nc} == [
7564                undef,
7565                0x0044, # D
7566                0x0041, # A
7567                0x0054, # T
7568              ]->[length $self->{kwd}] or
7569              $self->{nc} == [
7570                undef,
7571                0x0064, # d
7572                0x0061, # a
7573                0x0074, # t
7574              ]->[length $self->{kwd}]) {
7575            
7576            ## Stay in the state.
7577            $self->{kwd} .= chr $self->{nc};
7578            
7579        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7580          $self->{line_prev} = $self->{line};
7581          $self->{column_prev} = $self->{column};
7582          $self->{column}++;
7583          $self->{nc}
7584              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7585        } else {
7586          $self->{set_nc}->($self);
7587        }
7588      
7589            redo A;
7590          } elsif ((length $self->{kwd}) == 4 and
7591                   ($self->{nc} == 0x0041 or # A
7592                    $self->{nc} == 0x0061)) { # a
7593            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7594              
7595              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7596                              text => 'NDATA',
7597                              line => $self->{line_prev},
7598                              column => $self->{column_prev} - 4);
7599            } else {
7600              
7601            }
7602            $self->{state} = AFTER_NDATA_STATE;
7603            
7604        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7605          $self->{line_prev} = $self->{line};
7606          $self->{column_prev} = $self->{column};
7607          $self->{column}++;
7608          $self->{nc}
7609              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7610        } else {
7611          $self->{set_nc}->($self);
7612        }
7613      
7614            redo A;
7615          } else {
7616            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7617                            line => $self->{line_prev},
7618                            column => $self->{column_prev} + 1
7619                                - length $self->{kwd});
7620            
7621            $self->{state} = BOGUS_MD_STATE;
7622            ## Reconsume.
7623            redo A;
7624          }
7625        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7626          if ($is_space->{$self->{nc}}) {
7627            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7628            
7629        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7630          $self->{line_prev} = $self->{line};
7631          $self->{column_prev} = $self->{column};
7632          $self->{column}++;
7633          $self->{nc}
7634              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7635        } else {
7636          $self->{set_nc}->($self);
7637        }
7638      
7639            redo A;
7640          } elsif ($self->{nc} == 0x003E) { # >
7641            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7642            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7643            
7644        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7645          $self->{line_prev} = $self->{line};
7646          $self->{column_prev} = $self->{column};
7647          $self->{column}++;
7648          $self->{nc}
7649              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7650        } else {
7651          $self->{set_nc}->($self);
7652        }
7653      
7654            return  ($self->{ct}); # ENTITY
7655            redo A;
7656          } elsif ($self->{nc} == -1) {
7657            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7658            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7659            
7660        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7661          $self->{line_prev} = $self->{line};
7662          $self->{column_prev} = $self->{column};
7663          $self->{column}++;
7664          $self->{nc}
7665              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7666        } else {
7667          $self->{set_nc}->($self);
7668        }
7669      
7670            return  ($self->{ct}); # ENTITY
7671            redo A;
7672          } else {
7673            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7674                            line => $self->{line_prev},
7675                            column => $self->{column_prev} + 1
7676                                - length $self->{kwd});
7677            $self->{state} = BOGUS_MD_STATE;
7678            ## Reconsume.
7679            redo A;
7680          }
7681        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7682          if ($is_space->{$self->{nc}}) {
7683            ## Stay in the state.
7684            
7685        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7686          $self->{line_prev} = $self->{line};
7687          $self->{column_prev} = $self->{column};
7688          $self->{column}++;
7689          $self->{nc}
7690              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7691        } else {
7692          $self->{set_nc}->($self);
7693        }
7694      
7695            redo A;
7696          } elsif ($self->{nc} == 0x003E) { # >
7697            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7698            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7699            
7700        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7701          $self->{line_prev} = $self->{line};
7702          $self->{column_prev} = $self->{column};
7703          $self->{column}++;
7704          $self->{nc}
7705              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7706        } else {
7707          $self->{set_nc}->($self);
7708        }
7709      
7710            return  ($self->{ct}); # ENTITY
7711            redo A;
7712          } elsif ($self->{nc} == -1) {
7713            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7714            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7715            
7716        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7717          $self->{line_prev} = $self->{line};
7718          $self->{column_prev} = $self->{column};
7719          $self->{column}++;
7720          $self->{nc}
7721              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7722        } else {
7723          $self->{set_nc}->($self);
7724        }
7725      
7726            return  ($self->{ct}); # ENTITY
7727            redo A;
7728          } else {
7729            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7730            $self->{state} = NOTATION_NAME_STATE;
7731            
7732        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7733          $self->{line_prev} = $self->{line};
7734          $self->{column_prev} = $self->{column};
7735          $self->{column}++;
7736          $self->{nc}
7737              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7738        } else {
7739          $self->{set_nc}->($self);
7740        }
7741      
7742            redo A;
7743          }
7744        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7745          if ($is_space->{$self->{nc}}) {
7746            $self->{state} = AFTER_MD_DEF_STATE;
7747            
7748        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7749          $self->{line_prev} = $self->{line};
7750          $self->{column_prev} = $self->{column};
7751          $self->{column}++;
7752          $self->{nc}
7753              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7754        } else {
7755          $self->{set_nc}->($self);
7756        }
7757      
7758            redo A;
7759          } elsif ($self->{nc} == 0x003E) { # >
7760            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7761            
7762        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7763          $self->{line_prev} = $self->{line};
7764          $self->{column_prev} = $self->{column};
7765          $self->{column}++;
7766          $self->{nc}
7767              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7768        } else {
7769          $self->{set_nc}->($self);
7770        }
7771      
7772            return  ($self->{ct}); # ENTITY
7773            redo A;
7774          } elsif ($self->{nc} == -1) {
7775            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7776            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7777            
7778        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7779          $self->{line_prev} = $self->{line};
7780          $self->{column_prev} = $self->{column};
7781          $self->{column}++;
7782          $self->{nc}
7783              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7784        } else {
7785          $self->{set_nc}->($self);
7786        }
7787      
7788            return  ($self->{ct}); # ENTITY
7789            redo A;
7790          } else {
7791            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7792            ## Stay in the state.
7793            
7794        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7795          $self->{line_prev} = $self->{line};
7796          $self->{column_prev} = $self->{column};
7797          $self->{column}++;
7798          $self->{nc}
7799              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7800        } else {
7801          $self->{set_nc}->($self);
7802        }
7803      
7804            redo A;
7805          }
7806        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7807          if ($self->{nc} == 0x0022) { # "
7808            $self->{state} = AFTER_MD_DEF_STATE;
7809            
7810        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7811          $self->{line_prev} = $self->{line};
7812          $self->{column_prev} = $self->{column};
7813          $self->{column}++;
7814          $self->{nc}
7815              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7816        } else {
7817          $self->{set_nc}->($self);
7818        }
7819      
7820            redo A;
7821          } elsif ($self->{nc} == 0x0026) { # &
7822            $self->{prev_state} = $self->{state};
7823            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7824            $self->{entity_add} = 0x0022; # "
7825            
7826        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7827          $self->{line_prev} = $self->{line};
7828          $self->{column_prev} = $self->{column};
7829          $self->{column}++;
7830          $self->{nc}
7831              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7832        } else {
7833          $self->{set_nc}->($self);
7834        }
7835      
7836            redo A;
7837    ## TODO: %
7838          } elsif ($self->{nc} == -1) {
7839            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7840            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7841            ## Reconsume.
7842            return  ($self->{ct}); # ENTITY
7843            redo A;
7844          } else {
7845            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7846            
7847        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7848          $self->{line_prev} = $self->{line};
7849          $self->{column_prev} = $self->{column};
7850          $self->{column}++;
7851          $self->{nc}
7852              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7853        } else {
7854          $self->{set_nc}->($self);
7855        }
7856      
7857            redo A;
7858          }
7859        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7860          if ($self->{nc} == 0x0027) { # '
7861            $self->{state} = AFTER_MD_DEF_STATE;
7862            
7863        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7864          $self->{line_prev} = $self->{line};
7865          $self->{column_prev} = $self->{column};
7866          $self->{column}++;
7867          $self->{nc}
7868              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7869        } else {
7870          $self->{set_nc}->($self);
7871        }
7872      
7873            redo A;
7874          } elsif ($self->{nc} == 0x0026) { # &
7875            $self->{prev_state} = $self->{state};
7876            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7877            $self->{entity_add} = 0x0027; # '
7878            
7879        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7880          $self->{line_prev} = $self->{line};
7881          $self->{column_prev} = $self->{column};
7882          $self->{column}++;
7883          $self->{nc}
7884              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7885        } else {
7886          $self->{set_nc}->($self);
7887        }
7888      
7889            redo A;
7890    ## TODO: %
7891          } elsif ($self->{nc} == -1) {
7892            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7893            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7894            ## Reconsume.
7895            return  ($self->{ct}); # ENTITY
7896            redo A;
7897          } else {
7898            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7899            
7900        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7901          $self->{line_prev} = $self->{line};
7902          $self->{column_prev} = $self->{column};
7903          $self->{column}++;
7904          $self->{nc}
7905              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7906        } else {
7907          $self->{set_nc}->($self);
7908        }
7909      
7910            redo A;
7911          }
7912        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7913          if ($is_space->{$self->{nc}} or
7914              {
7915                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7916                $self->{entity_add} => 1,
7917              }->{$self->{nc}}) {
7918            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
7919                            line => $self->{line_prev},
7920                            column => $self->{column_prev}
7921                                + ($self->{nc} == -1 ? 1 : 0));
7922            ## Don't consume
7923            ## Return nothing.
7924            #
7925          } elsif ($self->{nc} == 0x0023) { # #
7926            $self->{ca} = $self->{ct};
7927            $self->{state} = ENTITY_HASH_STATE;
7928            $self->{kwd} = '#';
7929            
7930        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7931          $self->{line_prev} = $self->{line};
7932          $self->{column_prev} = $self->{column};
7933          $self->{column}++;
7934          $self->{nc}
7935              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7936        } else {
7937          $self->{set_nc}->($self);
7938        }
7939      
7940            redo A;
7941          } else {
7942            #
7943          }
7944    
7945          $self->{ct}->{value} .= '&';
7946          $self->{state} = $self->{prev_state};
7947          ## Reconsume.
7948          redo A;
7949        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
7950          if ($is_space->{$self->{nc}}) {
7951            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
7952            
7953        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7954          $self->{line_prev} = $self->{line};
7955          $self->{column_prev} = $self->{column};
7956          $self->{column}++;
7957          $self->{nc}
7958              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7959        } else {
7960          $self->{set_nc}->($self);
7961        }
7962      
7963            redo A;
7964          } elsif ($self->{nc} == 0x0028) { # (
7965            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
7966            $self->{ct}->{content} = ['('];
7967            $self->{group_depth} = 1;
7968            
7969        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7970          $self->{line_prev} = $self->{line};
7971          $self->{column_prev} = $self->{column};
7972          $self->{column}++;
7973          $self->{nc}
7974              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7975        } else {
7976          $self->{set_nc}->($self);
7977        }
7978      
7979            redo A;
7980          } elsif ($self->{nc} == 0x003E) { # >
7981            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
7982            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7983            
7984        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7985          $self->{line_prev} = $self->{line};
7986          $self->{column_prev} = $self->{column};
7987          $self->{column}++;
7988          $self->{nc}
7989              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7990        } else {
7991          $self->{set_nc}->($self);
7992        }
7993      
7994            return  ($self->{ct}); # ELEMENT
7995            redo A;
7996          } elsif ($self->{nc} == -1) {
7997            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7998            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7999            
8000        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8001          $self->{line_prev} = $self->{line};
8002          $self->{column_prev} = $self->{column};
8003          $self->{column}++;
8004          $self->{nc}
8005              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8006        } else {
8007          $self->{set_nc}->($self);
8008        }
8009      
8010            return  ($self->{ct}); # ELEMENT
8011            redo A;
8012          } else {
8013            $self->{ct}->{content} = [chr $self->{nc}];
8014            $self->{state} = CONTENT_KEYWORD_STATE;
8015            
8016        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8017          $self->{line_prev} = $self->{line};
8018          $self->{column_prev} = $self->{column};
8019          $self->{column}++;
8020          $self->{nc}
8021              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8022        } else {
8023          $self->{set_nc}->($self);
8024        }
8025      
8026            redo A;
8027          }
8028        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8029          if ($is_space->{$self->{nc}}) {
8030            $self->{state} = AFTER_MD_DEF_STATE;
8031            
8032        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8033          $self->{line_prev} = $self->{line};
8034          $self->{column_prev} = $self->{column};
8035          $self->{column}++;
8036          $self->{nc}
8037              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8038        } else {
8039          $self->{set_nc}->($self);
8040        }
8041      
8042            redo A;
8043          } elsif ($self->{nc} == 0x003E) { # >
8044            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8045            
8046        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8047          $self->{line_prev} = $self->{line};
8048          $self->{column_prev} = $self->{column};
8049          $self->{column}++;
8050          $self->{nc}
8051              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8052        } else {
8053          $self->{set_nc}->($self);
8054        }
8055      
8056            return  ($self->{ct}); # ELEMENT
8057            redo A;
8058          } elsif ($self->{nc} == -1) {
8059            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8060            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8061            
8062        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8063          $self->{line_prev} = $self->{line};
8064          $self->{column_prev} = $self->{column};
8065          $self->{column}++;
8066          $self->{nc}
8067              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8068        } else {
8069          $self->{set_nc}->($self);
8070        }
8071      
8072            return  ($self->{ct}); # ELEMENT
8073            redo A;
8074          } else {
8075            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8076            ## Stay in the state.
8077            
8078        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8079          $self->{line_prev} = $self->{line};
8080          $self->{column_prev} = $self->{column};
8081          $self->{column}++;
8082          $self->{nc}
8083              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8084        } else {
8085          $self->{set_nc}->($self);
8086        }
8087      
8088            redo A;
8089          }
8090        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8091          if ($is_space->{$self->{nc}}) {
8092            ## Stay in the state.
8093            
8094        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8095          $self->{line_prev} = $self->{line};
8096          $self->{column_prev} = $self->{column};
8097          $self->{column}++;
8098          $self->{nc}
8099              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8100        } else {
8101          $self->{set_nc}->($self);
8102        }
8103      
8104            redo A;
8105          } elsif ($self->{nc} == 0x0028) { # (
8106            $self->{group_depth}++;
8107            push @{$self->{ct}->{content}}, chr $self->{nc};
8108            ## Stay in the state.
8109            
8110        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8111          $self->{line_prev} = $self->{line};
8112          $self->{column_prev} = $self->{column};
8113          $self->{column}++;
8114          $self->{nc}
8115              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8116        } else {
8117          $self->{set_nc}->($self);
8118        }
8119      
8120            redo A;
8121          } elsif ($self->{nc} == 0x007C or # |
8122                   $self->{nc} == 0x002C) { # ,
8123            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8124            ## Stay in the state.
8125            
8126        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8127          $self->{line_prev} = $self->{line};
8128          $self->{column_prev} = $self->{column};
8129          $self->{column}++;
8130          $self->{nc}
8131              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8132        } else {
8133          $self->{set_nc}->($self);
8134        }
8135      
8136            redo A;
8137          } elsif ($self->{nc} == 0x0029) { # )
8138            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8139            push @{$self->{ct}->{content}}, chr $self->{nc};
8140            $self->{group_depth}--;
8141            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8142            
8143        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8144          $self->{line_prev} = $self->{line};
8145          $self->{column_prev} = $self->{column};
8146          $self->{column}++;
8147          $self->{nc}
8148              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8149        } else {
8150          $self->{set_nc}->($self);
8151        }
8152      
8153            redo A;
8154          } elsif ($self->{nc} == 0x003E) { # >
8155            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8156            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8157            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8158            
8159        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8160          $self->{line_prev} = $self->{line};
8161          $self->{column_prev} = $self->{column};
8162          $self->{column}++;
8163          $self->{nc}
8164              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8165        } else {
8166          $self->{set_nc}->($self);
8167        }
8168      
8169            return  ($self->{ct}); # ELEMENT
8170            redo A;
8171          } elsif ($self->{nc} == -1) {
8172            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8173            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8174            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8175            
8176        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8177          $self->{line_prev} = $self->{line};
8178          $self->{column_prev} = $self->{column};
8179          $self->{column}++;
8180          $self->{nc}
8181              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8182        } else {
8183          $self->{set_nc}->($self);
8184        }
8185      
8186            return  ($self->{ct}); # ELEMENT
8187            redo A;
8188          } else {
8189            push @{$self->{ct}->{content}}, chr $self->{nc};
8190            $self->{state} = CM_ELEMENT_NAME_STATE;
8191            
8192        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8193          $self->{line_prev} = $self->{line};
8194          $self->{column_prev} = $self->{column};
8195          $self->{column}++;
8196          $self->{nc}
8197              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8198        } else {
8199          $self->{set_nc}->($self);
8200        }
8201      
8202            redo A;
8203          }
8204        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8205          if ($is_space->{$self->{nc}}) {
8206            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8207            
8208        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8209          $self->{line_prev} = $self->{line};
8210          $self->{column_prev} = $self->{column};
8211          $self->{column}++;
8212          $self->{nc}
8213              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8214        } else {
8215          $self->{set_nc}->($self);
8216        }
8217      
8218            redo A;
8219          } elsif ($self->{nc} == 0x002A or # *
8220                   $self->{nc} == 0x002B or # +
8221                   $self->{nc} == 0x003F) { # ?
8222            push @{$self->{ct}->{content}}, chr $self->{nc};
8223            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8224            
8225        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8226          $self->{line_prev} = $self->{line};
8227          $self->{column_prev} = $self->{column};
8228          $self->{column}++;
8229          $self->{nc}
8230              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8231        } else {
8232          $self->{set_nc}->($self);
8233        }
8234      
8235            redo A;
8236          } elsif ($self->{nc} == 0x007C or # |
8237                   $self->{nc} == 0x002C) { # ,
8238            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8239            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8240            
8241        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8242          $self->{line_prev} = $self->{line};
8243          $self->{column_prev} = $self->{column};
8244          $self->{column}++;
8245          $self->{nc}
8246              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8247        } else {
8248          $self->{set_nc}->($self);
8249        }
8250      
8251            redo A;
8252          } elsif ($self->{nc} == 0x0029) { # )
8253            $self->{group_depth}--;
8254            push @{$self->{ct}->{content}}, chr $self->{nc};
8255            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8256            
8257        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8258          $self->{line_prev} = $self->{line};
8259          $self->{column_prev} = $self->{column};
8260          $self->{column}++;
8261          $self->{nc}
8262              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8263        } else {
8264          $self->{set_nc}->($self);
8265        }
8266      
8267            redo A;
8268          } elsif ($self->{nc} == 0x003E) { # >
8269            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8270            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8271            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8272            
8273        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8274          $self->{line_prev} = $self->{line};
8275          $self->{column_prev} = $self->{column};
8276          $self->{column}++;
8277          $self->{nc}
8278              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8279        } else {
8280          $self->{set_nc}->($self);
8281        }
8282      
8283            return  ($self->{ct}); # ELEMENT
8284            redo A;
8285          } elsif ($self->{nc} == -1) {
8286            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8287            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8288            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8289            
8290        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8291          $self->{line_prev} = $self->{line};
8292          $self->{column_prev} = $self->{column};
8293          $self->{column}++;
8294          $self->{nc}
8295              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8296        } else {
8297          $self->{set_nc}->($self);
8298        }
8299      
8300            return  ($self->{ct}); # ELEMENT
8301            redo A;
8302          } else {
8303            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8304            ## Stay in the state.
8305            
8306        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8307          $self->{line_prev} = $self->{line};
8308          $self->{column_prev} = $self->{column};
8309          $self->{column}++;
8310          $self->{nc}
8311              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8312        } else {
8313          $self->{set_nc}->($self);
8314        }
8315      
8316            redo A;
8317          }
8318        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8319          if ($is_space->{$self->{nc}}) {
8320            ## Stay in the state.
8321            
8322        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8323          $self->{line_prev} = $self->{line};
8324          $self->{column_prev} = $self->{column};
8325          $self->{column}++;
8326          $self->{nc}
8327              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8328        } else {
8329          $self->{set_nc}->($self);
8330        }
8331      
8332            redo A;
8333          } elsif ($self->{nc} == 0x007C or # |
8334                   $self->{nc} == 0x002C) { # ,
8335            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8336            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8337            
8338        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8339          $self->{line_prev} = $self->{line};
8340          $self->{column_prev} = $self->{column};
8341          $self->{column}++;
8342          $self->{nc}
8343              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8344        } else {
8345          $self->{set_nc}->($self);
8346        }
8347      
8348            redo A;
8349          } elsif ($self->{nc} == 0x0029) { # )
8350            $self->{group_depth}--;
8351            push @{$self->{ct}->{content}}, chr $self->{nc};
8352            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8353            
8354        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8355          $self->{line_prev} = $self->{line};
8356          $self->{column_prev} = $self->{column};
8357          $self->{column}++;
8358          $self->{nc}
8359              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8360        } else {
8361          $self->{set_nc}->($self);
8362        }
8363      
8364            redo A;
8365          } elsif ($self->{nc} == 0x003E) { # >
8366            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8367            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8368            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8369            
8370        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8371          $self->{line_prev} = $self->{line};
8372          $self->{column_prev} = $self->{column};
8373          $self->{column}++;
8374          $self->{nc}
8375              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8376        } else {
8377          $self->{set_nc}->($self);
8378        }
8379      
8380            return  ($self->{ct}); # ELEMENT
8381            redo A;
8382          } elsif ($self->{nc} == -1) {
8383            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8384            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8385            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8386            
8387        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8388          $self->{line_prev} = $self->{line};
8389          $self->{column_prev} = $self->{column};
8390          $self->{column}++;
8391          $self->{nc}
8392              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8393        } else {
8394          $self->{set_nc}->($self);
8395        }
8396      
8397            return  ($self->{ct}); # ELEMENT
8398            redo A;
8399          } else {
8400            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8401            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8402            $self->{state} = BOGUS_MD_STATE;
8403            
8404        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8405          $self->{line_prev} = $self->{line};
8406          $self->{column_prev} = $self->{column};
8407          $self->{column}++;
8408          $self->{nc}
8409              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8410        } else {
8411          $self->{set_nc}->($self);
8412        }
8413      
8414            redo A;
8415          }
8416        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8417          if ($is_space->{$self->{nc}}) {
8418            if ($self->{group_depth}) {
8419              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8420            } else {
8421              $self->{state} = AFTER_MD_DEF_STATE;
8422            }
8423            
8424        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8425          $self->{line_prev} = $self->{line};
8426          $self->{column_prev} = $self->{column};
8427          $self->{column}++;
8428          $self->{nc}
8429              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8430        } else {
8431          $self->{set_nc}->($self);
8432        }
8433      
8434            redo A;
8435          } elsif ($self->{nc} == 0x002A or # *
8436                   $self->{nc} == 0x002B or # +
8437                   $self->{nc} == 0x003F) { # ?
8438            push @{$self->{ct}->{content}}, chr $self->{nc};
8439            if ($self->{group_depth}) {
8440              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8441            } else {
8442              $self->{state} = AFTER_MD_DEF_STATE;
8443            }
8444            
8445        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8446          $self->{line_prev} = $self->{line};
8447          $self->{column_prev} = $self->{column};
8448          $self->{column}++;
8449          $self->{nc}
8450              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8451        } else {
8452          $self->{set_nc}->($self);
8453        }
8454      
8455            redo A;
8456          } elsif ($self->{nc} == 0x0029) { # )
8457            if ($self->{group_depth}) {
8458              $self->{group_depth}--;
8459              push @{$self->{ct}->{content}}, chr $self->{nc};
8460              ## Stay in the state.
8461              
8462        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8463          $self->{line_prev} = $self->{line};
8464          $self->{column_prev} = $self->{column};
8465          $self->{column}++;
8466          $self->{nc}
8467              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8468        } else {
8469          $self->{set_nc}->($self);
8470        }
8471      
8472              redo A;
8473            } else {
8474              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8475              $self->{state} = BOGUS_MD_STATE;
8476              ## Reconsume.
8477              redo A;
8478            }
8479          } elsif ($self->{nc} == 0x003E) { # >
8480            if ($self->{group_depth}) {
8481              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8482              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8483            }
8484            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8485            
8486        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8487          $self->{line_prev} = $self->{line};
8488          $self->{column_prev} = $self->{column};
8489          $self->{column}++;
8490          $self->{nc}
8491              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8492        } else {
8493          $self->{set_nc}->($self);
8494        }
8495      
8496            return  ($self->{ct}); # ELEMENT
8497            redo A;
8498          } elsif ($self->{nc} == -1) {
8499            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8500            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8501            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8502            
8503        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8504          $self->{line_prev} = $self->{line};
8505          $self->{column_prev} = $self->{column};
8506          $self->{column}++;
8507          $self->{nc}
8508              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8509        } else {
8510          $self->{set_nc}->($self);
8511        }
8512      
8513            return  ($self->{ct}); # ELEMENT
8514            redo A;
8515          } else {
8516            if ($self->{group_depth}) {
8517              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8518            } else {
8519              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8520              $self->{state} = BOGUS_MD_STATE;
8521            }
8522            ## Reconsume.
8523            redo A;
8524          }
8525        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8526          if ($is_space->{$self->{nc}}) {
8527            ## Stay in the state.
8528            
8529        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8530          $self->{line_prev} = $self->{line};
8531          $self->{column_prev} = $self->{column};
8532          $self->{column}++;
8533          $self->{nc}
8534              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8535        } else {
8536          $self->{set_nc}->($self);
8537        }
8538      
8539            redo A;
8540          } elsif ($self->{nc} == 0x003E) { # >
8541            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8542            
8543        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8544          $self->{line_prev} = $self->{line};
8545          $self->{column_prev} = $self->{column};
8546          $self->{column}++;
8547          $self->{nc}
8548              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8549        } else {
8550          $self->{set_nc}->($self);
8551        }
8552      
8553            return  ($self->{ct}); # ENTITY/ELEMENT
8554            redo A;
8555          } elsif ($self->{nc} == -1) {
8556            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8557            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8558            
8559        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8560          $self->{line_prev} = $self->{line};
8561          $self->{column_prev} = $self->{column};
8562          $self->{column}++;
8563          $self->{nc}
8564              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8565        } else {
8566          $self->{set_nc}->($self);
8567        }
8568      
8569            return  ($self->{ct}); # ENTITY/ELEMENT
8570            redo A;
8571          } else {
8572            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8573            $self->{state} = BOGUS_MD_STATE;
8574            ## Reconsume.
8575            redo A;
8576          }
8577        } elsif ($self->{state} == BOGUS_MD_STATE) {
8578          if ($self->{nc} == 0x003E) { # >
8579            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8580            
8581        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8582          $self->{line_prev} = $self->{line};
8583          $self->{column_prev} = $self->{column};
8584          $self->{column}++;
8585          $self->{nc}
8586              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8587        } else {
8588          $self->{set_nc}->($self);
8589        }
8590      
8591            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8592            redo A;
8593          } elsif ($self->{nc} == -1) {
8594            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8595            ## Reconsume.
8596            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8597            redo A;
8598          } else {
8599            ## Stay in the state.
8600            
8601        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8602          $self->{line_prev} = $self->{line};
8603          $self->{column_prev} = $self->{column};
8604          $self->{column}++;
8605          $self->{nc}
8606              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8607        } else {
8608          $self->{set_nc}->($self);
8609        }
8610      
8611            redo A;
8612          }
8613      } else {      } else {
8614        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8615      }      }
# Line 4252  sub _get_next_token ($) { Line 8620  sub _get_next_token ($) {
8620    
8621  1;  1;
8622  ## $Date$  ## $Date$
8623                                    

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.24

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24