/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11 by wakaba, Wed Oct 15 10:50:38 2008 UTC revision 1.27 by wakaba, Thu Jul 2 22:24:28 2009 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145  ## XML states  ## XML-only states
146  sub PI_STATE () { 51 }  sub PI_STATE () { 51 }
147  sub PI_TARGET_STATE () { 52 }  sub PI_TARGET_STATE () { 52 }
148  sub PI_TARGET_AFTER_STATE () { 53 }  sub PI_TARGET_AFTER_STATE () { 53 }
149  sub PI_DATA_STATE () { 54 }  sub PI_DATA_STATE () { 54 }
150  sub PI_AFTER_STATE () { 55 }  sub PI_AFTER_STATE () { 55 }
151  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
# Line 186  sub _initialize_tokenizer ($) { Line 259  sub _initialize_tokenizer ($) {
259    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
260    
261    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
262    $self->{s_kwd} = ''; # state keyword    $self->{s_kwd} = ''; # Data state keyword
263      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
264    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
265    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
266    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 231  sub _initialize_tokenizer ($) { Line 305  sub _initialize_tokenizer ($) {
305  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
306  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
307  ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.  ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
308    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
309    
310  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
311  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
312  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 250  my $is_space = { Line 326  my $is_space = {
326    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
327    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
328    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
329    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
330    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
331    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
332  };  };
# Line 530  sub _get_next_token ($) { Line 606  sub _get_next_token ($) {
606            redo A;            redo A;
607          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
608                        
609            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
610            #            #
611          } else {          } else {
612                        
613              $self->{s_kwd} = '';
614            #            #
615          }          }
616    
617          ## reconsume          ## reconsume
618          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
         $self->{s_kwd} = '';  
619          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
620                    line => $self->{line_prev},                    line => $self->{line_prev},
621                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 720  sub _get_next_token ($) { Line 796  sub _get_next_token ($) {
796        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
797          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
798            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
799            $self->{s_kwd} = '';            $self->{kwd} = '';
800            ## Reconsume.            ## Reconsume.
801            redo A;            redo A;
802          } else {          } else {
# Line 873  sub _get_next_token ($) { Line 949  sub _get_next_token ($) {
949          redo A;          redo A;
950        }        }
951      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
952        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
953        if (length $ch) {        if (length $ch) {
954          my $CH = $ch;          my $CH = $ch;
955          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 881  sub _get_next_token ($) { Line 957  sub _get_next_token ($) {
957          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
958                        
959            ## Stay in the state.            ## Stay in the state.
960            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
961                        
962      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
963        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 900  sub _get_next_token ($) { Line 976  sub _get_next_token ($) {
976            $self->{s_kwd} = '';            $self->{s_kwd} = '';
977            ## Reconsume.            ## Reconsume.
978            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
979                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
980                      line => $self->{line_prev},                      line => $self->{line_prev},
981                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
982                     });                     });
983            redo A;            redo A;
984          }          }
# Line 918  sub _get_next_token ($) { Line 994  sub _get_next_token ($) {
994            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
995            $self->{s_kwd} = '';            $self->{s_kwd} = '';
996            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
997                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
998                      line => $self->{line_prev},                      line => $self->{line_prev},
999                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
1000                     });                     });
1001            redo A;            redo A;
1002          } else {          } else {
# Line 929  sub _get_next_token ($) { Line 1005  sub _get_next_token ($) {
1005                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
1006                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
1007                   line => $self->{line_prev},                   line => $self->{line_prev},
1008                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
1009            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
1010            ## Reconsume.            ## Reconsume.
1011            redo A;            redo A;
# Line 1664  sub _get_next_token ($) { Line 1740  sub _get_next_token ($) {
1740    
1741          redo A;          redo A;
1742        } else {        } else {
1743          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, <
1744                        
1745            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1746            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
# Line 1691  sub _get_next_token ($) { Line 1767  sub _get_next_token ($) {
1767          redo A;          redo A;
1768        }        }
1769      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1770        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1771          ## ATTLIST attribute value double quoted state".
1772                
1773        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1774                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1775          ## XML5: "Tag attribute name before state".            
1776          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1777              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1778              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1779            } else {
1780              
1781              ## XML5: "Tag attribute name before state".
1782              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1783            }
1784                    
1785      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1786        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1732  sub _get_next_token ($) { Line 1816  sub _get_next_token ($) {
1816      }      }
1817        
1818          redo A;          redo A;
1819          } elsif ($self->{is_xml} and
1820                   $is_space->{$self->{nc}}) {
1821            
1822            $self->{ca}->{value} .= ' ';
1823            ## Stay in the state.
1824            
1825        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1826          $self->{line_prev} = $self->{line};
1827          $self->{column_prev} = $self->{column};
1828          $self->{column}++;
1829          $self->{nc}
1830              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1831        } else {
1832          $self->{set_nc}->($self);
1833        }
1834      
1835            redo A;
1836        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1837          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1838          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1839                        
1840            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1841    
1842              $self->{state} = DATA_STATE;
1843              $self->{s_kwd} = '';
1844              ## reconsume
1845              return  ($self->{ct}); # start tag
1846              redo A;
1847          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1848            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1849            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1746  sub _get_next_token ($) { Line 1853  sub _get_next_token ($) {
1853              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1854                            
1855            }            }
1856    
1857              $self->{state} = DATA_STATE;
1858              $self->{s_kwd} = '';
1859              ## reconsume
1860              return  ($self->{ct}); # end tag
1861              redo A;
1862            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1863              ## XML5: No parse error above; not defined yet.
1864              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1865              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1866              ## Reconsume.
1867              return  ($self->{ct}); # ATTLIST
1868              redo A;
1869          } else {          } else {
1870            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1871          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1872        } else {        } else {
1873            ## XML5 [ATTLIST]: Not defined yet.
1874          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1875                        
1876            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1766  sub _get_next_token ($) { Line 1880  sub _get_next_token ($) {
1880          }          }
1881          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1882          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1883                                q["&<],                                qq["&<\x09\x0C\x20],
1884                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1885    
1886          ## Stay in the state          ## Stay in the state
# Line 1784  sub _get_next_token ($) { Line 1898  sub _get_next_token ($) {
1898          redo A;          redo A;
1899        }        }
1900      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1901        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1902          ## ATTLIST attribute value single quoted state".
1903    
1904        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1905                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1906          ## XML5: "Before attribute name state" (sic).            
1907          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1908              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1909              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1910            } else {
1911              
1912              ## XML5: "Before attribute name state" (sic).
1913              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1914            }
1915                    
1916      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1917        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1825  sub _get_next_token ($) { Line 1947  sub _get_next_token ($) {
1947      }      }
1948        
1949          redo A;          redo A;
1950          } elsif ($self->{is_xml} and
1951                   $is_space->{$self->{nc}}) {
1952            
1953            $self->{ca}->{value} .= ' ';
1954            ## Stay in the state.
1955            
1956        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1957          $self->{line_prev} = $self->{line};
1958          $self->{column_prev} = $self->{column};
1959          $self->{column}++;
1960          $self->{nc}
1961              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1962        } else {
1963          $self->{set_nc}->($self);
1964        }
1965      
1966            redo A;
1967        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1968          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1969          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1970                        
1971            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1972    
1973              $self->{state} = DATA_STATE;
1974              $self->{s_kwd} = '';
1975              ## reconsume
1976              return  ($self->{ct}); # start tag
1977              redo A;
1978          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1979            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1980            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1839  sub _get_next_token ($) { Line 1984  sub _get_next_token ($) {
1984              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1985                            
1986            }            }
1987    
1988              $self->{state} = DATA_STATE;
1989              $self->{s_kwd} = '';
1990              ## reconsume
1991              return  ($self->{ct}); # end tag
1992              redo A;
1993            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1994              ## XML5: No parse error above; not defined yet.
1995              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1996              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1997              ## Reconsume.
1998              return  ($self->{ct}); # ATTLIST
1999              redo A;
2000          } else {          } else {
2001            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2002          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2003        } else {        } else {
2004            ## XML5 [ATTLIST]: Not defined yet.
2005          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
2006                        
2007            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1859  sub _get_next_token ($) { Line 2011  sub _get_next_token ($) {
2011          }          }
2012          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2013          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2014                                q['&<],                                qq['&<\x09\x0C\x20],
2015                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2016    
2017          ## Stay in the state          ## Stay in the state
# Line 1880  sub _get_next_token ($) { Line 2032  sub _get_next_token ($) {
2032        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
2033    
2034        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2035                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2036          ## XML5: "Tag attribute name before state".            
2037          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2038              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2039            } else {
2040              
2041              ## XML5: "Tag attribute name before state".
2042              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2043            }
2044                    
2045      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2046        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1923  sub _get_next_token ($) { Line 2081  sub _get_next_token ($) {
2081          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2082                        
2083            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2084    
2085              $self->{state} = DATA_STATE;
2086              $self->{s_kwd} = '';
2087              
2088        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2089          $self->{line_prev} = $self->{line};
2090          $self->{column_prev} = $self->{column};
2091          $self->{column}++;
2092          $self->{nc}
2093              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2094        } else {
2095          $self->{set_nc}->($self);
2096        }
2097      
2098              return  ($self->{ct}); # start tag
2099              redo A;
2100          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2101            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2102            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1932  sub _get_next_token ($) { Line 2106  sub _get_next_token ($) {
2106              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2107                            
2108            }            }
2109          } else {  
2110            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2111          }            $self->{s_kwd} = '';
2112          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2113      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2114        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2115        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1948  sub _get_next_token ($) { Line 2120  sub _get_next_token ($) {
2120        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2121      }      }
2122        
2123              return  ($self->{ct}); # end tag
2124          return  ($self->{ct}); # start tag or end tag            redo A;
2125            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2126          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2127              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2128              
2129        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2130          $self->{line_prev} = $self->{line};
2131          $self->{column_prev} = $self->{column};
2132          $self->{column}++;
2133          $self->{nc}
2134              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2135        } else {
2136          $self->{set_nc}->($self);
2137        }
2138      
2139              return  ($self->{ct}); # ATTLIST
2140              redo A;
2141            } else {
2142              die "$0: $self->{ct}->{type}: Unknown token type";
2143            }
2144        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2145          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2146                        
2147              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2148            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2149    
2150              $self->{state} = DATA_STATE;
2151              $self->{s_kwd} = '';
2152              ## reconsume
2153              return  ($self->{ct}); # start tag
2154              redo A;
2155          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2156              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2157            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2158            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2159                            
# Line 1966  sub _get_next_token ($) { Line 2162  sub _get_next_token ($) {
2162              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2163                            
2164            }            }
2165    
2166              $self->{state} = DATA_STATE;
2167              $self->{s_kwd} = '';
2168              ## reconsume
2169              return  ($self->{ct}); # end tag
2170              redo A;
2171            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2172              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2173              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2174              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2175              ## Reconsume.
2176              return  ($self->{ct}); # ATTLIST
2177              redo A;
2178          } else {          } else {
2179            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2180          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2181        } else {        } else {
2182          if ({          if ({
2183               0x0022 => 1, # "               0x0022 => 1, # "
2184               0x0027 => 1, # '               0x0027 => 1, # '
2185               0x003D => 1, # =               0x003D => 1, # =
2186                 0x003C => 1, # <
2187              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2188                        
2189            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1990  sub _get_next_token ($) { Line 2193  sub _get_next_token ($) {
2193          }          }
2194          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2195          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2196                                q["'=& >],                                qq["'=& \x09\x0C>],
2197                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2198    
2199          ## Stay in the state          ## Stay in the state
# Line 2168  sub _get_next_token ($) { Line 2371  sub _get_next_token ($) {
2371          redo A;          redo A;
2372        }        }
2373      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2374        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2375    
2376        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2377        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2378                
2379        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2380                    if ($self->{in_subset}) {
2381          $self->{state} = DATA_STATE;            
2382          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2383            } else {
2384              
2385              $self->{state} = DATA_STATE;
2386              $self->{s_kwd} = '';
2387            }
2388                    
2389      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2390        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2192  sub _get_next_token ($) { Line 2400  sub _get_next_token ($) {
2400          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2401          redo A;          redo A;
2402        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2403                    if ($self->{in_subset}) {
2404          $self->{state} = DATA_STATE;            
2405          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2406            } else {
2407              
2408              $self->{state} = DATA_STATE;
2409              $self->{s_kwd} = '';
2410            }
2411          ## reconsume          ## reconsume
2412    
2413          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2221  sub _get_next_token ($) { Line 2434  sub _get_next_token ($) {
2434          redo A;          redo A;
2435        }        }
2436      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2437        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2438                
2439        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2440                    
# Line 2243  sub _get_next_token ($) { Line 2456  sub _get_next_token ($) {
2456          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2457                    
2458          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2459          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2460                    
2461      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2462        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2262  sub _get_next_token ($) { Line 2475  sub _get_next_token ($) {
2475                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2476                                                    
2477          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2478          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2479                    
2480      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2481        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2332  sub _get_next_token ($) { Line 2545  sub _get_next_token ($) {
2545              0x0054, # T              0x0054, # T
2546              0x0059, # Y              0x0059, # Y
2547              0x0050, # P              0x0050, # P
2548            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2549            $self->{nc} == [            $self->{nc} == [
2550              undef,              undef,
2551              0x006F, # o              0x006F, # o
# Line 2340  sub _get_next_token ($) { Line 2553  sub _get_next_token ($) {
2553              0x0074, # t              0x0074, # t
2554              0x0079, # y              0x0079, # y
2555              0x0070, # p              0x0070, # p
2556            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2557                    
2558          ## Stay in the state.          ## Stay in the state.
2559          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2560                    
2561      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2562        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2356  sub _get_next_token ($) { Line 2569  sub _get_next_token ($) {
2569      }      }
2570        
2571          redo A;          redo A;
2572        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2573                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2574                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2575          if ($self->{s_kwd} ne 'DOCTYP') {          if ($self->{is_xml} and
2576                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2577                        
2578            ## XML5: case-sensitive.            ## XML5: case-sensitive.
2579            $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO            $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
# Line 2391  sub _get_next_token ($) { Line 2605  sub _get_next_token ($) {
2605                                    
2606          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2607                          line => $self->{line_prev},                          line => $self->{line_prev},
2608                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2609          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2610          ## Reconsume.          ## Reconsume.
2611          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2612                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2613                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2614                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2615                                   };                                   };
2616          redo A;          redo A;
2617        }        }
# Line 2408  sub _get_next_token ($) { Line 2622  sub _get_next_token ($) {
2622              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2623              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2624              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2625            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2626                    
2627          ## Stay in the state.          ## Stay in the state.
2628          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2629                    
2630      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2631        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2424  sub _get_next_token ($) { Line 2638  sub _get_next_token ($) {
2638      }      }
2639        
2640          redo A;          redo A;
2641        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2642                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2643          if ($self->{is_xml} and          if ($self->{is_xml} and
2644              not $self->{tainted} and              not $self->{tainted} and
# Line 2459  sub _get_next_token ($) { Line 2673  sub _get_next_token ($) {
2673                    
2674          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2675                          line => $self->{line_prev},                          line => $self->{line_prev},
2676                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2677          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2678          ## Reconsume.          ## Reconsume.
2679          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2680                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2681                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2682                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2683                                   };                                   };
2684          redo A;          redo A;
2685        }        }
# Line 2486  sub _get_next_token ($) { Line 2700  sub _get_next_token ($) {
2700        
2701          redo A;          redo A;
2702        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2703          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2704          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2705          $self->{s_kwd} = '';            
2706              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2707            } else {
2708              
2709              $self->{state} = DATA_STATE;
2710              $self->{s_kwd} = '';
2711            }
2712                    
2713      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2714        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2506  sub _get_next_token ($) { Line 2725  sub _get_next_token ($) {
2725    
2726          redo A;          redo A;
2727        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2728          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2729          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2730          $self->{s_kwd} = '';            
2731              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2732            } else {
2733              
2734              $self->{state} = DATA_STATE;
2735              $self->{s_kwd} = '';
2736            }
2737          ## reconsume          ## reconsume
2738    
2739          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2550  sub _get_next_token ($) { Line 2774  sub _get_next_token ($) {
2774        
2775          redo A;          redo A;
2776        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2777          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2778          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2779          $self->{s_kwd} = '';            
2780              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2781            } else {
2782              
2783              $self->{state} = DATA_STATE;
2784              $self->{s_kwd} = '';
2785            }
2786                    
2787      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2788        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2570  sub _get_next_token ($) { Line 2799  sub _get_next_token ($) {
2799    
2800          redo A;          redo A;
2801        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2802          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2803          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2804          $self->{s_kwd} = '';            
2805              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2806            } else {
2807              
2808              $self->{state} = DATA_STATE;
2809              $self->{s_kwd} = '';
2810            }
2811          ## reconsume          ## reconsume
2812    
2813          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2598  sub _get_next_token ($) { Line 2832  sub _get_next_token ($) {
2832          redo A;          redo A;
2833        }        }
2834      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2835          ## XML5: "Comment state" and "DOCTYPE comment state".
2836    
2837        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2838                    
2839          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2614  sub _get_next_token ($) { Line 2850  sub _get_next_token ($) {
2850        
2851          redo A;          redo A;
2852        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2853          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2854          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2855          $self->{s_kwd} = '';            
2856              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2857            } else {
2858              
2859              $self->{state} = DATA_STATE;
2860              $self->{s_kwd} = '';
2861            }
2862          ## reconsume          ## reconsume
2863    
2864          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2645  sub _get_next_token ($) { Line 2886  sub _get_next_token ($) {
2886          redo A;          redo A;
2887        }        }
2888      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2889        ## XML5: "comment dash state".        ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2890    
2891        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2892                    
# Line 2663  sub _get_next_token ($) { Line 2904  sub _get_next_token ($) {
2904        
2905          redo A;          redo A;
2906        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2907          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2908          $self->{s_kwd} = '';          if ($self->{in_subset}) {
2909          $self->{state} = DATA_STATE;            
2910          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2911            } else {
2912              
2913              $self->{state} = DATA_STATE;
2914              $self->{s_kwd} = '';
2915            }
2916          ## reconsume          ## reconsume
2917    
2918          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2691  sub _get_next_token ($) { Line 2936  sub _get_next_token ($) {
2936          redo A;          redo A;
2937        }        }
2938      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2939          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2940    
2941        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2942                    if ($self->{in_subset}) {
2943          $self->{state} = DATA_STATE;            
2944          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2945            } else {
2946              
2947              $self->{state} = DATA_STATE;
2948              $self->{s_kwd} = '';
2949            }
2950                    
2951      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2952        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2731  sub _get_next_token ($) { Line 2983  sub _get_next_token ($) {
2983        
2984          redo A;          redo A;
2985        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2986          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2987          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2988          $self->{s_kwd} = '';            
2989              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2990            } else {
2991              
2992              $self->{state} = DATA_STATE;
2993              $self->{s_kwd} = '';
2994            }
2995          ## reconsume          ## reconsume
2996    
2997          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2742  sub _get_next_token ($) { Line 2999  sub _get_next_token ($) {
2999          redo A;          redo A;
3000        } else {        } else {
3001                    
         ## XML5: Not a parse error.  
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',  
                         line => $self->{line_prev},  
                         column => $self->{column_prev});  
3002          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3003          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3004                    
# Line 2779  sub _get_next_token ($) { Line 3032  sub _get_next_token ($) {
3032          redo A;          redo A;
3033        } else {        } else {
3034                    
3035            ## XML5: Unless EOF, swith to the bogus comment state.
3036          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3037          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3038          ## reconsume          ## reconsume
3039          redo A;          redo A;
3040        }        }
3041      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
3042          ## XML5: "DOCTYPE root name before state".
3043    
3044        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3045                    
3046          ## Stay in the state          ## Stay in the state
# Line 2802  sub _get_next_token ($) { Line 3058  sub _get_next_token ($) {
3058          redo A;          redo A;
3059        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3060                    
3061            ## XML5: No parse error.
3062          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3063          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3064          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 2830  sub _get_next_token ($) { Line 3087  sub _get_next_token ($) {
3087          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3088    
3089          redo A;          redo A;
3090          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3091            
3092            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3093            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3094            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3095            $self->{in_subset} = 1;
3096            
3097        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3098          $self->{line_prev} = $self->{line};
3099          $self->{column_prev} = $self->{column};
3100          $self->{column}++;
3101          $self->{nc}
3102              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3103        } else {
3104          $self->{set_nc}->($self);
3105        }
3106      
3107            return  ($self->{ct}); # DOCTYPE
3108            redo A;
3109        } else {        } else {
3110                    
3111          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2849  sub _get_next_token ($) { Line 3125  sub _get_next_token ($) {
3125          redo A;          redo A;
3126        }        }
3127      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3128  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3129    
3130          ## ISSUE: Redundant "First," in the spec.
3131    
3132        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3133                    
3134          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2895  sub _get_next_token ($) { Line 3174  sub _get_next_token ($) {
3174          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3175    
3176          redo A;          redo A;
3177          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3178            
3179            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3180            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3181            $self->{in_subset} = 1;
3182            
3183        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3184          $self->{line_prev} = $self->{line};
3185          $self->{column_prev} = $self->{column};
3186          $self->{column}++;
3187          $self->{nc}
3188              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3189        } else {
3190          $self->{set_nc}->($self);
3191        }
3192      
3193            return  ($self->{ct}); # DOCTYPE
3194            redo A;
3195        } else {        } else {
3196                    
3197          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2914  sub _get_next_token ($) { Line 3211  sub _get_next_token ($) {
3211          redo A;          redo A;
3212        }        }
3213      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3214          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3215          ## state", but implemented differently.
3216    
3217        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3218                    
3219          ## Stay in the state          ## Stay in the state
# Line 2930  sub _get_next_token ($) { Line 3230  sub _get_next_token ($) {
3230        
3231          redo A;          redo A;
3232        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3233            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3234              
3235              $self->{state} = DATA_STATE;
3236              $self->{s_kwd} = '';
3237            } else {
3238              
3239              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3240              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3241            }
3242                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3243                    
3244      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3245        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2944  sub _get_next_token ($) { Line 3251  sub _get_next_token ($) {
3251        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3252      }      }
3253        
3254            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3255          redo A;          redo A;
3256        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3257            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3258              
3259              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3260              $self->{state} = DATA_STATE;
3261              $self->{s_kwd} = '';
3262              $self->{ct}->{quirks} = 1;
3263            } else {
3264              
3265              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3266              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3267            }
3268                    
3269          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3270          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3271          redo A;          redo A;
3272        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3273                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3274            
3275          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3276          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3277                    
3278      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3279        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2977  sub _get_next_token ($) { Line 3288  sub _get_next_token ($) {
3288          redo A;          redo A;
3289        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3290                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3291            
3292          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3293          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3294                    
3295      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3296        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2991  sub _get_next_token ($) { Line 3303  sub _get_next_token ($) {
3303      }      }
3304        
3305          redo A;          redo A;
3306        } else {        } elsif ($self->{nc} == 0x0022 and # "
3307                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3308                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3309                    
3310          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3311          $self->{ct}->{quirks} = 1;          $self->{ct}->{value} = ''; # ENTITY
3312            
3313        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3314          $self->{line_prev} = $self->{line};
3315          $self->{column_prev} = $self->{column};
3316          $self->{column}++;
3317          $self->{nc}
3318              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3319        } else {
3320          $self->{set_nc}->($self);
3321        }
3322      
3323            redo A;
3324          } elsif ($self->{nc} == 0x0027 and # '
3325                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3326                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3327            
3328            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3329            $self->{ct}->{value} = ''; # ENTITY
3330            
3331        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3332          $self->{line_prev} = $self->{line};
3333          $self->{column_prev} = $self->{column};
3334          $self->{column}++;
3335          $self->{nc}
3336              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3337        } else {
3338          $self->{set_nc}->($self);
3339        }
3340      
3341            redo A;
3342          } elsif ($self->{is_xml} and
3343                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3344                   $self->{nc} == 0x005B) { # [
3345            
3346            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3347            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3348            $self->{in_subset} = 1;
3349            
3350        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3351          $self->{line_prev} = $self->{line};
3352          $self->{column_prev} = $self->{column};
3353          $self->{column}++;
3354          $self->{nc}
3355              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3356        } else {
3357          $self->{set_nc}->($self);
3358        }
3359      
3360            return  ($self->{ct}); # DOCTYPE
3361            redo A;
3362          } else {
3363            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3364    
3365            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3366              
3367              $self->{ct}->{quirks} = 1;
3368              $self->{state} = BOGUS_DOCTYPE_STATE;
3369            } else {
3370              
3371              $self->{state} = BOGUS_MD_STATE;
3372            }
3373    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3374                    
3375      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3376        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3018  sub _get_next_token ($) { Line 3392  sub _get_next_token ($) {
3392              0x0042, # B              0x0042, # B
3393              0x004C, # L              0x004C, # L
3394              0x0049, # I              0x0049, # I
3395            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3396            $self->{nc} == [            $self->{nc} == [
3397              undef,              undef,
3398              0x0075, # u              0x0075, # u
3399              0x0062, # b              0x0062, # b
3400              0x006C, # l              0x006C, # l
3401              0x0069, # i              0x0069, # i
3402            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3403                    
3404          ## Stay in the state.          ## Stay in the state.
3405          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3406                    
3407      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3408        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3041  sub _get_next_token ($) { Line 3415  sub _get_next_token ($) {
3415      }      }
3416        
3417          redo A;          redo A;
3418        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3419                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3420                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3421                    if ($self->{is_xml} and
3422                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3423              
3424              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3425                              text => 'PUBLIC',
3426                              line => $self->{line_prev},
3427                              column => $self->{column_prev} - 4);
3428            } else {
3429              
3430            }
3431          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3432                    
3433      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3059  sub _get_next_token ($) { Line 3442  sub _get_next_token ($) {
3442        
3443          redo A;          redo A;
3444        } else {        } else {
3445                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3446                          line => $self->{line_prev},                          line => $self->{line_prev},
3447                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3448          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3449              
3450          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3451              $self->{state} = BOGUS_DOCTYPE_STATE;
3452            } else {
3453              
3454              $self->{state} = BOGUS_MD_STATE;
3455            }
3456          ## Reconsume.          ## Reconsume.
3457          redo A;          redo A;
3458        }        }
# Line 3077  sub _get_next_token ($) { Line 3464  sub _get_next_token ($) {
3464              0x0053, # S              0x0053, # S
3465              0x0054, # T              0x0054, # T
3466              0x0045, # E              0x0045, # E
3467            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3468            $self->{nc} == [            $self->{nc} == [
3469              undef,              undef,
3470              0x0079, # y              0x0079, # y
3471              0x0073, # s              0x0073, # s
3472              0x0074, # t              0x0074, # t
3473              0x0065, # e              0x0065, # e
3474            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3475                    
3476          ## Stay in the state.          ## Stay in the state.
3477          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3478                    
3479      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3480        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3100  sub _get_next_token ($) { Line 3487  sub _get_next_token ($) {
3487      }      }
3488        
3489          redo A;          redo A;
3490        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3491                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3492                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3493                    if ($self->{is_xml} and
3494                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3495              
3496              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3497                              text => 'SYSTEM',
3498                              line => $self->{line_prev},
3499                              column => $self->{column_prev} - 4);
3500            } else {
3501              
3502            }
3503          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3504                    
3505      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3118  sub _get_next_token ($) { Line 3514  sub _get_next_token ($) {
3514        
3515          redo A;          redo A;
3516        } else {        } else {
3517                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3518                          line => $self->{line_prev},                          line => $self->{line_prev},
3519                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3520          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3521              
3522          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3523              $self->{state} = BOGUS_DOCTYPE_STATE;
3524            } else {
3525              
3526              $self->{state} = BOGUS_MD_STATE;
3527            }
3528          ## Reconsume.          ## Reconsume.
3529          redo A;          redo A;
3530        }        }
# Line 3177  sub _get_next_token ($) { Line 3577  sub _get_next_token ($) {
3577        
3578          redo A;          redo A;
3579        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3580          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3581            
3582          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3583          $self->{s_kwd} = '';            
3584              $self->{state} = DATA_STATE;
3585              $self->{s_kwd} = '';
3586              $self->{ct}->{quirks} = 1;
3587            } else {
3588              
3589              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3590            }
3591            
3592                    
3593      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3594        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3193  sub _get_next_token ($) { Line 3600  sub _get_next_token ($) {
3600        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3601      }      }
3602        
3603            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3604          redo A;          redo A;
3605        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3606            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3607              
3608              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3609              $self->{state} = DATA_STATE;
3610              $self->{s_kwd} = '';
3611              $self->{ct}->{quirks} = 1;
3612            } else {
3613              
3614              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3615              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3616            }
3617                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3618          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3619          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3620          redo A;          redo A;
3621        } else {        } elsif ($self->{is_xml} and
3622                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3623                   $self->{nc} == 0x005B) { # [
3624            
3625            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3626            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3627            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3628            $self->{in_subset} = 1;
3629                    
3630        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3631          $self->{line_prev} = $self->{line};
3632          $self->{column_prev} = $self->{column};
3633          $self->{column}++;
3634          $self->{nc}
3635              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3636        } else {
3637          $self->{set_nc}->($self);
3638        }
3639      
3640            return  ($self->{ct}); # DOCTYPE
3641            redo A;
3642          } else {
3643          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3644    
3645          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3646              
3647              $self->{ct}->{quirks} = 1;
3648              $self->{state} = BOGUS_DOCTYPE_STATE;
3649            } else {
3650              
3651              $self->{state} = BOGUS_MD_STATE;
3652            }
3653    
3654                    
3655      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3656        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3246  sub _get_next_token ($) { Line 3681  sub _get_next_token ($) {
3681        
3682          redo A;          redo A;
3683        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3684          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3685    
3686          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3687          $self->{s_kwd} = '';            
3688              $self->{state} = DATA_STATE;
3689              $self->{s_kwd} = '';
3690              $self->{ct}->{quirks} = 1;
3691            } else {
3692              
3693              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3694            }
3695    
3696                    
3697      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3698        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3262  sub _get_next_token ($) { Line 3704  sub _get_next_token ($) {
3704        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3705      }      }
3706        
3707            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3708          redo A;          redo A;
3709        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3710          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3711    
3712          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3713          $self->{s_kwd} = '';            
3714          ## reconsume            $self->{state} = DATA_STATE;
3715              $self->{s_kwd} = '';
3716          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3717            } else {
3718              
3719              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3720            }
3721            
3722            ## Reconsume.
3723          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3724          redo A;          redo A;
3725        } else {        } else {
3726                    
3727          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3728          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3729                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3730    
# Line 3317  sub _get_next_token ($) { Line 3759  sub _get_next_token ($) {
3759        
3760          redo A;          redo A;
3761        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3762          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3763    
3764          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3765          $self->{s_kwd} = '';            
3766              $self->{state} = DATA_STATE;
3767              $self->{s_kwd} = '';
3768              $self->{ct}->{quirks} = 1;
3769            } else {
3770              
3771              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3772            }
3773    
3774                    
3775      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3776        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3333  sub _get_next_token ($) { Line 3782  sub _get_next_token ($) {
3782        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3783      }      }
3784        
3785            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3786          redo A;          redo A;
3787        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3788          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3789    
3790          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3791          $self->{s_kwd} = '';            
3792              $self->{state} = DATA_STATE;
3793              $self->{s_kwd} = '';
3794              $self->{ct}->{quirks} = 1;
3795            } else {
3796              
3797              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3798            }
3799          
3800          ## reconsume          ## reconsume
3801            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3802          redo A;          redo A;
3803        } else {        } else {
3804                    
3805          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3806          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3807                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3808    
# Line 3389  sub _get_next_token ($) { Line 3838  sub _get_next_token ($) {
3838          redo A;          redo A;
3839        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3840                    
3841          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3842          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3843                    
3844      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3405  sub _get_next_token ($) { Line 3854  sub _get_next_token ($) {
3854          redo A;          redo A;
3855        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3856                    
3857          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3858          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3859                    
3860      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3420  sub _get_next_token ($) { Line 3869  sub _get_next_token ($) {
3869        
3870          redo A;          redo A;
3871        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3872            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3873              if ($self->{is_xml}) {
3874                
3875                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3876              } else {
3877                
3878              }
3879              $self->{state} = DATA_STATE;
3880              $self->{s_kwd} = '';
3881            } else {
3882              if ($self->{ct}->{type} == NOTATION_TOKEN) {
3883                
3884              } else {
3885                
3886                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3887              }
3888              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3889            }
3890                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3891                    
3892      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3893        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3434  sub _get_next_token ($) { Line 3899  sub _get_next_token ($) {
3899        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3900      }      }
3901        
3902            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3903          redo A;          redo A;
3904        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3905            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3906              
3907              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3908              
3909              $self->{state} = DATA_STATE;
3910              $self->{s_kwd} = '';
3911              $self->{ct}->{quirks} = 1;
3912            } else {
3913              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3914              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3915            }
3916                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3917          ## reconsume          ## reconsume
3918            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3919          $self->{ct}->{quirks} = 1;          redo A;
3920          } elsif ($self->{is_xml} and
3921                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3922                   $self->{nc} == 0x005B) { # [
3923            
3924            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3925            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3926            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3927            $self->{in_subset} = 1;
3928            
3929        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3930          $self->{line_prev} = $self->{line};
3931          $self->{column_prev} = $self->{column};
3932          $self->{column}++;
3933          $self->{nc}
3934              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3935        } else {
3936          $self->{set_nc}->($self);
3937        }
3938      
3939          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3940          redo A;          redo A;
3941        } else {        } else {
           
3942          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3943    
3944          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3945              
3946              $self->{ct}->{quirks} = 1;
3947              $self->{state} = BOGUS_DOCTYPE_STATE;
3948            } else {
3949              
3950              $self->{state} = BOGUS_MD_STATE;
3951            }
3952    
3953                    
3954      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3955        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3518  sub _get_next_token ($) { Line 4012  sub _get_next_token ($) {
4012        
4013          redo A;          redo A;
4014        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
4015          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4016                    
4017      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4018        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3534  sub _get_next_token ($) { Line 4025  sub _get_next_token ($) {
4025      }      }
4026        
4027    
4028          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4029          return  ($self->{ct}); # DOCTYPE            
4030              $self->{state} = DATA_STATE;
4031              $self->{s_kwd} = '';
4032              $self->{ct}->{quirks} = 1;
4033            } else {
4034              
4035              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4036            }
4037    
4038            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4039          redo A;          redo A;
4040        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4041            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4042              
4043              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4044              $self->{state} = DATA_STATE;
4045              $self->{s_kwd} = '';
4046              $self->{ct}->{quirks} = 1;
4047            } else {
4048              
4049              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4050              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4051            }
4052                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4053          ## reconsume          ## reconsume
4054            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4055            redo A;
4056          } elsif ($self->{is_xml} and
4057                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4058                   $self->{nc} == 0x005B) { # [
4059            
4060            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4061    
4062          $self->{ct}->{quirks} = 1;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4063            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4064            $self->{in_subset} = 1;
4065            
4066        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4067          $self->{line_prev} = $self->{line};
4068          $self->{column_prev} = $self->{column};
4069          $self->{column}++;
4070          $self->{nc}
4071              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4072        } else {
4073          $self->{set_nc}->($self);
4074        }
4075      
4076          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4077          redo A;          redo A;
4078        } else {        } else {
           
4079          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4080    
4081          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4082                        
4083              $self->{ct}->{quirks} = 1;
4084              $self->{state} = BOGUS_DOCTYPE_STATE;
4085            } else {
4086              
4087              $self->{state} = BOGUS_MD_STATE;
4088            }
4089    
4090                    
4091      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4092        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3585  sub _get_next_token ($) { Line 4116  sub _get_next_token ($) {
4116      }      }
4117        
4118          redo A;          redo A;
4119        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4120          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4121    
4122          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4123          $self->{s_kwd} = '';            
4124              $self->{state} = DATA_STATE;
4125              $self->{s_kwd} = '';
4126              $self->{ct}->{quirks} = 1;
4127            } else {
4128              
4129              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4130            }
4131            
4132                    
4133      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4134        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3602  sub _get_next_token ($) { Line 4140  sub _get_next_token ($) {
4140        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4141      }      }
4142        
4143            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4144          redo A;          redo A;
4145        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4146          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4147    
4148          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4149          $self->{s_kwd} = '';            
4150              $self->{state} = DATA_STATE;
4151              $self->{s_kwd} = '';
4152              $self->{ct}->{quirks} = 1;
4153            } else {
4154              
4155              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4156            }
4157            
4158          ## reconsume          ## reconsume
4159            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4160          redo A;          redo A;
4161        } else {        } else {
4162                    
4163          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4164          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4165                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4166    
# Line 3656  sub _get_next_token ($) { Line 4194  sub _get_next_token ($) {
4194      }      }
4195        
4196          redo A;          redo A;
4197        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4198                    
4199          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4200    
# Line 3679  sub _get_next_token ($) { Line 4217  sub _get_next_token ($) {
4217    
4218          redo A;          redo A;
4219        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4220          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4221    
4222          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4223          $self->{s_kwd} = '';            
4224          ## reconsume            $self->{state} = DATA_STATE;
4225              $self->{s_kwd} = '';
4226          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4227          return  ($self->{ct}); # DOCTYPE          } else {
4228              
4229              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4230            }
4231    
4232            ## reconsume
4233            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4234          redo A;          redo A;
4235        } else {        } else {
4236                    
4237          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4238          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4239                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4240    
# Line 3713  sub _get_next_token ($) { Line 4254  sub _get_next_token ($) {
4254        }        }
4255      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4256        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4257                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4258          ## Stay in the state            
4259              $self->{state} = BEFORE_NDATA_STATE;
4260            } else {
4261              
4262              ## Stay in the state
4263            }
4264                    
4265      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4266        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3728  sub _get_next_token ($) { Line 4274  sub _get_next_token ($) {
4274        
4275          redo A;          redo A;
4276        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4277            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4278              
4279              $self->{state} = DATA_STATE;
4280              $self->{s_kwd} = '';
4281            } else {
4282              
4283              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4284            }
4285    
4286                    
4287          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4288          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4289          $self->{column_prev} = $self->{column};
4290          $self->{column}++;
4291          $self->{nc}
4292              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4293        } else {
4294          $self->{set_nc}->($self);
4295        }
4296      
4297            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4298            redo A;
4299          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4300                   ($self->{nc} == 0x004E or # N
4301                    $self->{nc} == 0x006E)) { # n
4302            
4303            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4304            $self->{state} = NDATA_STATE;
4305            $self->{kwd} = chr $self->{nc};
4306                    
4307      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4308        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3742  sub _get_next_token ($) { Line 4314  sub _get_next_token ($) {
4314        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4315      }      }
4316        
4317            redo A;
4318          } elsif ($self->{nc} == -1) {
4319            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4320              
4321              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4322              $self->{state} = DATA_STATE;
4323              $self->{s_kwd} = '';
4324              $self->{ct}->{quirks} = 1;
4325            } else {
4326              
4327              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4328              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4329            }
4330    
4331            ## reconsume
4332            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4333            redo A;
4334          } elsif ($self->{is_xml} and
4335                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4336                   $self->{nc} == 0x005B) { # [
4337            
4338            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4339            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4340            $self->{in_subset} = 1;
4341            
4342        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4343          $self->{line_prev} = $self->{line};
4344          $self->{column_prev} = $self->{column};
4345          $self->{column}++;
4346          $self->{nc}
4347              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4348        } else {
4349          $self->{set_nc}->($self);
4350        }
4351      
4352          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4353            redo A;
4354          } else {
4355            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4356    
4357            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4358              
4359              #$self->{ct}->{quirks} = 1;
4360              $self->{state} = BOGUS_DOCTYPE_STATE;
4361            } else {
4362              
4363              $self->{state} = BOGUS_MD_STATE;
4364            }
4365    
4366            
4367        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4368          $self->{line_prev} = $self->{line};
4369          $self->{column_prev} = $self->{column};
4370          $self->{column}++;
4371          $self->{nc}
4372              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4373        } else {
4374          $self->{set_nc}->($self);
4375        }
4376      
4377            redo A;
4378          }
4379        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4380          if ($is_space->{$self->{nc}}) {
4381            
4382            ## Stay in the state.
4383            
4384        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4385          $self->{line_prev} = $self->{line};
4386          $self->{column_prev} = $self->{column};
4387          $self->{column}++;
4388          $self->{nc}
4389              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4390        } else {
4391          $self->{set_nc}->($self);
4392        }
4393      
4394            redo A;
4395          } elsif ($self->{nc} == 0x003E) { # >
4396            
4397            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4398            
4399        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4400          $self->{line_prev} = $self->{line};
4401          $self->{column_prev} = $self->{column};
4402          $self->{column}++;
4403          $self->{nc}
4404              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4405        } else {
4406          $self->{set_nc}->($self);
4407        }
4408      
4409            return  ($self->{ct}); # ENTITY
4410            redo A;
4411          } elsif ($self->{nc} == 0x004E or # N
4412                   $self->{nc} == 0x006E) { # n
4413            
4414            $self->{state} = NDATA_STATE;
4415            $self->{kwd} = chr $self->{nc};
4416            
4417        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4418          $self->{line_prev} = $self->{line};
4419          $self->{column_prev} = $self->{column};
4420          $self->{column}++;
4421          $self->{nc}
4422              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4423        } else {
4424          $self->{set_nc}->($self);
4425        }
4426      
4427          redo A;          redo A;
4428        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4429                    
4430          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4431          $self->{state} = DATA_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
         $self->{s_kwd} = '';  
4432          ## reconsume          ## reconsume
4433            return  ($self->{ct}); # ENTITY
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4434          redo A;          redo A;
4435        } else {        } else {
4436                    
4437          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4438          #$self->{ct}->{quirks} = 1;          $self->{state} = BOGUS_MD_STATE;
   
         $self->{state} = BOGUS_DOCTYPE_STATE;  
4439                    
4440      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4441        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3796  sub _get_next_token ($) { Line 4469  sub _get_next_token ($) {
4469          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4470    
4471          redo A;          redo A;
4472          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4473            
4474            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4475            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4476            $self->{in_subset} = 1;
4477            
4478        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4479          $self->{line_prev} = $self->{line};
4480          $self->{column_prev} = $self->{column};
4481          $self->{column}++;
4482          $self->{nc}
4483              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4484        } else {
4485          $self->{set_nc}->($self);
4486        }
4487      
4488            return  ($self->{ct}); # DOCTYPE
4489            redo A;
4490        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4491                    
4492          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3808  sub _get_next_token ($) { Line 4499  sub _get_next_token ($) {
4499        } else {        } else {
4500                    
4501          my $s = '';          my $s = '';
4502          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4503    
4504          ## Stay in the state          ## Stay in the state
4505                    
# Line 3968  sub _get_next_token ($) { Line 4659  sub _get_next_token ($) {
4659              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4660              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4661            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4662                    if ($self->{is_xml}) {
4663              
4664              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4665                              line => $self->{line_prev},
4666                              column => $self->{column_prev}
4667                                  + ($self->{nc} == -1 ? 1 : 0));
4668            } else {
4669              
4670              ## No error
4671            }
4672          ## Don't consume          ## Don't consume
         ## No error  
4673          ## Return nothing.          ## Return nothing.
4674          #          #
4675        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4676                    
4677          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4678          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4679                    
4680      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4681        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3989  sub _get_next_token ($) { Line 4688  sub _get_next_token ($) {
4688      }      }
4689        
4690          redo A;          redo A;
4691        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4692                   (0x0041 <= $self->{nc} and
4693                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4694                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4695                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
4696                    
4697          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4698          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4699          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4700          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4701          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4702                    
4703      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4043  sub _get_next_token ($) { Line 4743  sub _get_next_token ($) {
4743          redo A;          redo A;
4744        }        }
4745      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4746        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
4747            $self->{nc} == 0x0058) { # X          
4748            $self->{state} = HEXREF_X_STATE;
4749            $self->{kwd} .= chr $self->{nc};
4750                    
4751        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4752          $self->{line_prev} = $self->{line};
4753          $self->{column_prev} = $self->{column};
4754          $self->{column}++;
4755          $self->{nc}
4756              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4757        } else {
4758          $self->{set_nc}->($self);
4759        }
4760      
4761            redo A;
4762          } elsif ($self->{nc} == 0x0058) { # X
4763            
4764            if ($self->{is_xml}) {
4765              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4766            }
4767          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4768          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4769                    
4770      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4771        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4064  sub _get_next_token ($) { Line 4782  sub _get_next_token ($) {
4782                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4783                    
4784          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4785          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4786                    
4787      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4788        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4110  sub _get_next_token ($) { Line 4828  sub _get_next_token ($) {
4828        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4829            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4830                    
4831          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4832          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4833                    
4834          ## Stay in the state.          ## Stay in the state.
4835                    
# Line 4147  sub _get_next_token ($) { Line 4865  sub _get_next_token ($) {
4865          #          #
4866        }        }
4867    
4868        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4869        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4870        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4871        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
4872              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
4873              ($self->{is_xml} and $code == 0x0000)) {
4874                    
4875          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
4876                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4190  sub _get_next_token ($) { Line 4910  sub _get_next_token ($) {
4910          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4911                    
4912          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4913          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4914          ## Reconsume.          ## Reconsume.
4915          redo A;          redo A;
4916        } else {        } else {
# Line 4208  sub _get_next_token ($) { Line 4928  sub _get_next_token ($) {
4928            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4929            ## Reconsume.            ## Reconsume.
4930            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4931                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4932                      line => $self->{line_prev},                      line => $self->{line_prev},
4933                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4934                     });                     });
4935            redo A;            redo A;
4936          } else {          } else {
4937                        
4938            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4939            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4940            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4941            ## Reconsume.            ## Reconsume.
# Line 4226  sub _get_next_token ($) { Line 4946  sub _get_next_token ($) {
4946        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4947          # 0..9          # 0..9
4948                    
4949          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4950          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4951          ## Stay in the state.          ## Stay in the state.
4952                    
4953      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4244  sub _get_next_token ($) { Line 4964  sub _get_next_token ($) {
4964        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4965                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4966                    
4967          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4968          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4969          ## Stay in the state.          ## Stay in the state.
4970                    
4971      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4262  sub _get_next_token ($) { Line 4982  sub _get_next_token ($) {
4982        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4983                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4984                    
4985          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4986          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4987          ## Stay in the state.          ## Stay in the state.
4988                    
4989      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4300  sub _get_next_token ($) { Line 5020  sub _get_next_token ($) {
5020          #          #
5021        }        }
5022    
5023        my $code = $self->{s_kwd};        my $code = $self->{kwd};
5024        my $l = $self->{line_prev};        my $l = $self->{line_prev};
5025        my $c = $self->{column_prev};        my $c = $self->{column_prev};
5026        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
5027              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
5028              ($self->{is_xml} and $code == 0x0000)) {
5029                    
5030          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
5031                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4337  sub _get_next_token ($) { Line 5059  sub _get_next_token ($) {
5059          redo A;          redo A;
5060        }        }
5061      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5062        if (length $self->{s_kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5063            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5064            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5065              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5066             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5067              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5068             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5069              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5070             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5071                    {
5072                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5073                      $self->{entity_add} => 1,
5074                    }->{$self->{nc}}))) {
5075          our $EntityChar;          our $EntityChar;
5076          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5077          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5078                $self->{ge}->{$self->{kwd}}) {
5079            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5080                            if (defined $self->{ge}->{$self->{kwd}}) {
5081              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5082                    
5083                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5084                  } else {
5085                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5086                      
5087                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5088                                      value => $self->{kwd});
5089                    } else {
5090                      
5091                    }
5092                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5093                  }
5094                } else {
5095                  if ($self->{is_xml}) {
5096                    
5097                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5098                                    value => $self->{kwd},
5099                                    level => {
5100                                              'amp;' => $self->{level}->{warn},
5101                                              'quot;' => $self->{level}->{warn},
5102                                              'lt;' => $self->{level}->{warn},
5103                                              'gt;' => $self->{level}->{warn},
5104                                              'apos;' => $self->{level}->{warn},
5105                                             }->{$self->{kwd}} ||
5106                                             $self->{level}->{must});
5107                  } else {
5108                    
5109                  }
5110                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5111                }
5112              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5113                            
5114      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4367  sub _get_next_token ($) { Line 5124  sub _get_next_token ($) {
5124              #              #
5125            } else {            } else {
5126                            
5127              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
5128              $self->{entity__match} = -1;              $self->{entity__match} = -1;
5129              ## Stay in the state.              ## Stay in the state.
5130                            
# Line 4415  sub _get_next_token ($) { Line 5172  sub _get_next_token ($) {
5172          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
5173              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
5174                        
5175            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
5176            #            #
5177          } else {          } else {
5178                        
# Line 4427  sub _get_next_token ($) { Line 5184  sub _get_next_token ($) {
5184                    
5185          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
5186                          line => $self->{line_prev},                          line => $self->{line_prev},
5187                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
5188          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
5189          #          #
5190        }        }
5191        
# Line 4451  sub _get_next_token ($) { Line 5208  sub _get_next_token ($) {
5208                    data => $data,                    data => $data,
5209                    has_reference => $has_ref,                    has_reference => $has_ref,
5210                    line => $self->{line_prev},                    line => $self->{line_prev},
5211                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
5212                   });                   });
5213          redo A;          redo A;
5214        } else {        } else {
# Line 4467  sub _get_next_token ($) { Line 5224  sub _get_next_token ($) {
5224      ## XML-only states      ## XML-only states
5225    
5226      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
5227          ## XML5: "Pi state" and "DOCTYPE pi state".
5228    
5229        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
5230            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
5231            $self->{nc} == -1) {            $self->{nc} == -1) {
5232            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5233            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5234            ## "DOCTYPE pi state": Parse error, switch to the "data
5235            ## state".
5236          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5237                          line => $self->{line_prev},                          line => $self->{line_prev},
5238                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 4484  sub _get_next_token ($) { Line 5247  sub _get_next_token ($) {
5247                        };                        };
5248          redo A;          redo A;
5249        } else {        } else {
5250            ## XML5: "DOCTYPE pi state": Stay in the state.
5251          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
5252                         target => chr $self->{nc},                         target => chr $self->{nc},
5253                         data => '',                         data => '',
# Line 4521  sub _get_next_token ($) { Line 5285  sub _get_next_token ($) {
5285          redo A;          redo A;
5286        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5287          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5288          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5289          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5290            } else {
5291              $self->{state} = DATA_STATE;
5292              $self->{s_kwd} = '';
5293            }
5294          ## Reconsume.          ## Reconsume.
5295          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5296          redo A;          redo A;
# Line 4593  sub _get_next_token ($) { Line 5361  sub _get_next_token ($) {
5361          redo A;          redo A;
5362        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5363          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5364          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5365          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5366            } else {
5367              $self->{state} = DATA_STATE;
5368              $self->{s_kwd} = '';
5369            }
5370          ## Reprocess.          ## Reprocess.
5371          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5372          redo A;          redo A;
# Line 4618  sub _get_next_token ($) { Line 5390  sub _get_next_token ($) {
5390          redo A;          redo A;
5391        }        }
5392      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
5393          ## XML5: Part of "Pi after state".
5394    
5395        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5396          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5397          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5398            } else {
5399              $self->{state} = DATA_STATE;
5400              $self->{s_kwd} = '';
5401            }
5402                    
5403      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5404        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4663  sub _get_next_token ($) { Line 5441  sub _get_next_token ($) {
5441          redo A;          redo A;
5442        }        }
5443      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5444        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5445    
5446        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5447          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5448          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5449            } else {
5450              $self->{state} = DATA_STATE;
5451              $self->{s_kwd} = '';
5452            }
5453                    
5454      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5455        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4701  sub _get_next_token ($) { Line 5484  sub _get_next_token ($) {
5484          ## Reprocess.          ## Reprocess.
5485          redo A;          redo A;
5486        }        }
5487    
5488        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5489          if ($self->{nc} == 0x003C) { # <
5490            $self->{state} = DOCTYPE_TAG_STATE;
5491            
5492        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5493          $self->{line_prev} = $self->{line};
5494          $self->{column_prev} = $self->{column};
5495          $self->{column}++;
5496          $self->{nc}
5497              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5498        } else {
5499          $self->{set_nc}->($self);
5500        }
5501      
5502            redo A;
5503          } elsif ($self->{nc} == 0x0025) { # %
5504            ## XML5: Not defined yet.
5505    
5506            ## TODO:
5507    
5508            if (not $self->{stop_processing} and
5509                not $self->{document}->xml_standalone) {
5510              $self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type
5511                              level => $self->{level}->{info});
5512              $self->{stop_processing} = 1;
5513            }
5514    
5515            
5516        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5517          $self->{line_prev} = $self->{line};
5518          $self->{column_prev} = $self->{column};
5519          $self->{column}++;
5520          $self->{nc}
5521              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5522        } else {
5523          $self->{set_nc}->($self);
5524        }
5525      
5526            redo A;
5527          } elsif ($self->{nc} == 0x005D) { # ]
5528            delete $self->{in_subset};
5529            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5530            
5531        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5532          $self->{line_prev} = $self->{line};
5533          $self->{column_prev} = $self->{column};
5534          $self->{column}++;
5535          $self->{nc}
5536              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5537        } else {
5538          $self->{set_nc}->($self);
5539        }
5540      
5541            redo A;
5542          } elsif ($is_space->{$self->{nc}}) {
5543            ## Stay in the state.
5544            
5545        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5546          $self->{line_prev} = $self->{line};
5547          $self->{column_prev} = $self->{column};
5548          $self->{column}++;
5549          $self->{nc}
5550              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5551        } else {
5552          $self->{set_nc}->($self);
5553        }
5554      
5555            redo A;
5556          } elsif ($self->{nc} == -1) {
5557            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5558            delete $self->{in_subset};
5559            $self->{state} = DATA_STATE;
5560            $self->{s_kwd} = '';
5561            ## Reconsume.
5562            return  ({type => END_OF_DOCTYPE_TOKEN});
5563            redo A;
5564          } else {
5565            unless ($self->{internal_subset_tainted}) {
5566              ## XML5: No parse error.
5567              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5568              $self->{internal_subset_tainted} = 1;
5569            }
5570            ## Stay in the state.
5571            
5572        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5573          $self->{line_prev} = $self->{line};
5574          $self->{column_prev} = $self->{column};
5575          $self->{column}++;
5576          $self->{nc}
5577              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5578        } else {
5579          $self->{set_nc}->($self);
5580        }
5581      
5582            redo A;
5583          }
5584        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5585          if ($self->{nc} == 0x003E) { # >
5586            $self->{state} = DATA_STATE;
5587            $self->{s_kwd} = '';
5588            
5589        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5590          $self->{line_prev} = $self->{line};
5591          $self->{column_prev} = $self->{column};
5592          $self->{column}++;
5593          $self->{nc}
5594              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5595        } else {
5596          $self->{set_nc}->($self);
5597        }
5598      
5599            return  ({type => END_OF_DOCTYPE_TOKEN});
5600            redo A;
5601          } elsif ($self->{nc} == -1) {
5602            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5603            $self->{state} = DATA_STATE;
5604            $self->{s_kwd} = '';
5605            ## Reconsume.
5606            return  ({type => END_OF_DOCTYPE_TOKEN});
5607            redo A;
5608          } else {
5609            ## XML5: No parse error and stay in the state.
5610            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5611    
5612            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5613            
5614        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5615          $self->{line_prev} = $self->{line};
5616          $self->{column_prev} = $self->{column};
5617          $self->{column}++;
5618          $self->{nc}
5619              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5620        } else {
5621          $self->{set_nc}->($self);
5622        }
5623      
5624            redo A;
5625          }
5626        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5627          if ($self->{nc} == 0x003E) { # >
5628            $self->{state} = DATA_STATE;
5629            $self->{s_kwd} = '';
5630            
5631        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5632          $self->{line_prev} = $self->{line};
5633          $self->{column_prev} = $self->{column};
5634          $self->{column}++;
5635          $self->{nc}
5636              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5637        } else {
5638          $self->{set_nc}->($self);
5639        }
5640      
5641            return  ({type => END_OF_DOCTYPE_TOKEN});
5642            redo A;
5643          } elsif ($self->{nc} == -1) {
5644            $self->{state} = DATA_STATE;
5645            $self->{s_kwd} = '';
5646            ## Reconsume.
5647            return  ({type => END_OF_DOCTYPE_TOKEN});
5648            redo A;
5649          } else {
5650            ## Stay in the state.
5651            
5652        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5653          $self->{line_prev} = $self->{line};
5654          $self->{column_prev} = $self->{column};
5655          $self->{column}++;
5656          $self->{nc}
5657              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5658        } else {
5659          $self->{set_nc}->($self);
5660        }
5661      
5662            redo A;
5663          }
5664        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5665          if ($self->{nc} == 0x0021) { # !
5666            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5667            
5668        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5669          $self->{line_prev} = $self->{line};
5670          $self->{column_prev} = $self->{column};
5671          $self->{column}++;
5672          $self->{nc}
5673              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5674        } else {
5675          $self->{set_nc}->($self);
5676        }
5677      
5678            redo A;
5679          } elsif ($self->{nc} == 0x003F) { # ?
5680            $self->{state} = PI_STATE;
5681            
5682        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5683          $self->{line_prev} = $self->{line};
5684          $self->{column_prev} = $self->{column};
5685          $self->{column}++;
5686          $self->{nc}
5687              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5688        } else {
5689          $self->{set_nc}->($self);
5690        }
5691      
5692            redo A;
5693          } elsif ($self->{nc} == -1) {
5694            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5695            $self->{state} = DATA_STATE;
5696            $self->{s_kwd} = '';
5697            ## Reconsume.
5698            redo A;
5699          } else {
5700            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5701                            line => $self->{line_prev},
5702                            column => $self->{column_prev});
5703            $self->{state} = BOGUS_COMMENT_STATE;
5704            $self->{ct} = {type => COMMENT_TOKEN,
5705                           data => '',
5706                          }; ## NOTE: Will be discarded.
5707            
5708        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5709          $self->{line_prev} = $self->{line};
5710          $self->{column_prev} = $self->{column};
5711          $self->{column}++;
5712          $self->{nc}
5713              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5714        } else {
5715          $self->{set_nc}->($self);
5716        }
5717      
5718            redo A;
5719          }
5720        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5721          ## XML5: "DOCTYPE markup declaration state".
5722          
5723          if ($self->{nc} == 0x002D) { # -
5724            $self->{state} = MD_HYPHEN_STATE;
5725            
5726        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5727          $self->{line_prev} = $self->{line};
5728          $self->{column_prev} = $self->{column};
5729          $self->{column}++;
5730          $self->{nc}
5731              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5732        } else {
5733          $self->{set_nc}->($self);
5734        }
5735      
5736            redo A;
5737          } elsif ($self->{nc} == 0x0045 or # E
5738                   $self->{nc} == 0x0065) { # e
5739            $self->{state} = MD_E_STATE;
5740            $self->{kwd} = chr $self->{nc};
5741            
5742        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5743          $self->{line_prev} = $self->{line};
5744          $self->{column_prev} = $self->{column};
5745          $self->{column}++;
5746          $self->{nc}
5747              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5748        } else {
5749          $self->{set_nc}->($self);
5750        }
5751      
5752            redo A;
5753          } elsif ($self->{nc} == 0x0041 or # A
5754                   $self->{nc} == 0x0061) { # a
5755            $self->{state} = MD_ATTLIST_STATE;
5756            $self->{kwd} = chr $self->{nc};
5757            
5758        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5759          $self->{line_prev} = $self->{line};
5760          $self->{column_prev} = $self->{column};
5761          $self->{column}++;
5762          $self->{nc}
5763              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5764        } else {
5765          $self->{set_nc}->($self);
5766        }
5767      
5768            redo A;
5769          } elsif ($self->{nc} == 0x004E or # N
5770                   $self->{nc} == 0x006E) { # n
5771            $self->{state} = MD_NOTATION_STATE;
5772            $self->{kwd} = chr $self->{nc};
5773            
5774        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5775          $self->{line_prev} = $self->{line};
5776          $self->{column_prev} = $self->{column};
5777          $self->{column}++;
5778          $self->{nc}
5779              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5780        } else {
5781          $self->{set_nc}->($self);
5782        }
5783      
5784            redo A;
5785          } else {
5786            #
5787          }
5788          
5789          ## XML5: No parse error.
5790          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5791                          line => $self->{line_prev},
5792                          column => $self->{column_prev} - 1);
5793          ## Reconsume.
5794          $self->{state} = BOGUS_COMMENT_STATE;
5795          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5796          redo A;
5797        } elsif ($self->{state} == MD_E_STATE) {
5798          if ($self->{nc} == 0x004E or # N
5799              $self->{nc} == 0x006E) { # n
5800            $self->{state} = MD_ENTITY_STATE;
5801            $self->{kwd} .= chr $self->{nc};
5802                    
5803        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5804          $self->{line_prev} = $self->{line};
5805          $self->{column_prev} = $self->{column};
5806          $self->{column}++;
5807          $self->{nc}
5808              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5809        } else {
5810          $self->{set_nc}->($self);
5811        }
5812      
5813            redo A;
5814          } elsif ($self->{nc} == 0x004C or # L
5815                   $self->{nc} == 0x006C) { # l
5816            ## XML5: <!ELEMENT> not supported.
5817            $self->{state} = MD_ELEMENT_STATE;
5818            $self->{kwd} .= chr $self->{nc};
5819            
5820        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5821          $self->{line_prev} = $self->{line};
5822          $self->{column_prev} = $self->{column};
5823          $self->{column}++;
5824          $self->{nc}
5825              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5826        } else {
5827          $self->{set_nc}->($self);
5828        }
5829      
5830            redo A;
5831          } else {
5832            ## XML5: No parse error.
5833            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5834                            line => $self->{line_prev},
5835                            column => $self->{column_prev} - 2
5836                                + 1 * ($self->{nc} == -1));
5837            ## Reconsume.
5838            $self->{state} = BOGUS_COMMENT_STATE;
5839            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5840            redo A;
5841          }
5842        } elsif ($self->{state} == MD_ENTITY_STATE) {
5843          if ($self->{nc} == [
5844                undef,
5845                undef,
5846                0x0054, # T
5847                0x0049, # I
5848                0x0054, # T
5849              ]->[length $self->{kwd}] or
5850              $self->{nc} == [
5851                undef,
5852                undef,
5853                0x0074, # t
5854                0x0069, # i
5855                0x0074, # t
5856              ]->[length $self->{kwd}]) {
5857            ## Stay in the state.
5858            $self->{kwd} .= chr $self->{nc};
5859            
5860        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5861          $self->{line_prev} = $self->{line};
5862          $self->{column_prev} = $self->{column};
5863          $self->{column}++;
5864          $self->{nc}
5865              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5866        } else {
5867          $self->{set_nc}->($self);
5868        }
5869      
5870            redo A;
5871          } elsif ((length $self->{kwd}) == 5 and
5872                   ($self->{nc} == 0x0059 or # Y
5873                    $self->{nc} == 0x0079)) { # y
5874            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5875              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5876                              text => 'ENTITY',
5877                              line => $self->{line_prev},
5878                              column => $self->{column_prev} - 4);
5879            }
5880            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5881                           line => $self->{line_prev},
5882                           column => $self->{column_prev} - 6};
5883            $self->{state} = DOCTYPE_MD_STATE;
5884            
5885        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5886          $self->{line_prev} = $self->{line};
5887          $self->{column_prev} = $self->{column};
5888          $self->{column}++;
5889          $self->{nc}
5890              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5891        } else {
5892          $self->{set_nc}->($self);
5893        }
5894      
5895            redo A;
5896          } else {
5897            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5898                            line => $self->{line_prev},
5899                            column => $self->{column_prev} - 1
5900                                - (length $self->{kwd})
5901                                + 1 * ($self->{nc} == -1));
5902            $self->{state} = BOGUS_COMMENT_STATE;
5903            ## Reconsume.
5904            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5905            redo A;
5906          }
5907        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5908          if ($self->{nc} == [
5909               undef,
5910               undef,
5911               0x0045, # E
5912               0x004D, # M
5913               0x0045, # E
5914               0x004E, # N
5915              ]->[length $self->{kwd}] or
5916              $self->{nc} == [
5917               undef,
5918               undef,
5919               0x0065, # e
5920               0x006D, # m
5921               0x0065, # e
5922               0x006E, # n
5923              ]->[length $self->{kwd}]) {
5924            ## Stay in the state.
5925            $self->{kwd} .= chr $self->{nc};
5926            
5927        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5928          $self->{line_prev} = $self->{line};
5929          $self->{column_prev} = $self->{column};
5930          $self->{column}++;
5931          $self->{nc}
5932              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5933        } else {
5934          $self->{set_nc}->($self);
5935        }
5936      
5937            redo A;
5938          } elsif ((length $self->{kwd}) == 6 and
5939                   ($self->{nc} == 0x0054 or # T
5940                    $self->{nc} == 0x0074)) { # t
5941            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5942              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5943                              text => 'ELEMENT',
5944                              line => $self->{line_prev},
5945                              column => $self->{column_prev} - 5);
5946            }
5947            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5948                           line => $self->{line_prev},
5949                           column => $self->{column_prev} - 7};
5950            $self->{state} = DOCTYPE_MD_STATE;
5951            
5952        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5953          $self->{line_prev} = $self->{line};
5954          $self->{column_prev} = $self->{column};
5955          $self->{column}++;
5956          $self->{nc}
5957              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5958        } else {
5959          $self->{set_nc}->($self);
5960        }
5961      
5962            redo A;
5963          } else {
5964            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5965                            line => $self->{line_prev},
5966                            column => $self->{column_prev} - 1
5967                                - (length $self->{kwd})
5968                                + 1 * ($self->{nc} == -1));
5969            $self->{state} = BOGUS_COMMENT_STATE;
5970            ## Reconsume.
5971            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5972            redo A;
5973          }
5974        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5975          if ($self->{nc} == [
5976               undef,
5977               0x0054, # T
5978               0x0054, # T
5979               0x004C, # L
5980               0x0049, # I
5981               0x0053, # S
5982              ]->[length $self->{kwd}] or
5983              $self->{nc} == [
5984               undef,
5985               0x0074, # t
5986               0x0074, # t
5987               0x006C, # l
5988               0x0069, # i
5989               0x0073, # s
5990              ]->[length $self->{kwd}]) {
5991            ## Stay in the state.
5992            $self->{kwd} .= chr $self->{nc};
5993            
5994        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5995          $self->{line_prev} = $self->{line};
5996          $self->{column_prev} = $self->{column};
5997          $self->{column}++;
5998          $self->{nc}
5999              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6000        } else {
6001          $self->{set_nc}->($self);
6002        }
6003      
6004            redo A;
6005          } elsif ((length $self->{kwd}) == 6 and
6006                   ($self->{nc} == 0x0054 or # T
6007                    $self->{nc} == 0x0074)) { # t
6008            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
6009              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6010                              text => 'ATTLIST',
6011                              line => $self->{line_prev},
6012                              column => $self->{column_prev} - 5);
6013            }
6014            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
6015                           attrdefs => [],
6016                           line => $self->{line_prev},
6017                           column => $self->{column_prev} - 7};
6018            $self->{state} = DOCTYPE_MD_STATE;
6019            
6020        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6021          $self->{line_prev} = $self->{line};
6022          $self->{column_prev} = $self->{column};
6023          $self->{column}++;
6024          $self->{nc}
6025              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6026        } else {
6027          $self->{set_nc}->($self);
6028        }
6029      
6030            redo A;
6031          } else {
6032            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6033                            line => $self->{line_prev},
6034                            column => $self->{column_prev} - 1
6035                                 - (length $self->{kwd})
6036                                 + 1 * ($self->{nc} == -1));
6037            $self->{state} = BOGUS_COMMENT_STATE;
6038            ## Reconsume.
6039            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6040            redo A;
6041          }
6042        } elsif ($self->{state} == MD_NOTATION_STATE) {
6043          if ($self->{nc} == [
6044               undef,
6045               0x004F, # O
6046               0x0054, # T
6047               0x0041, # A
6048               0x0054, # T
6049               0x0049, # I
6050               0x004F, # O
6051              ]->[length $self->{kwd}] or
6052              $self->{nc} == [
6053               undef,
6054               0x006F, # o
6055               0x0074, # t
6056               0x0061, # a
6057               0x0074, # t
6058               0x0069, # i
6059               0x006F, # o
6060              ]->[length $self->{kwd}]) {
6061            ## Stay in the state.
6062            $self->{kwd} .= chr $self->{nc};
6063            
6064        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6065          $self->{line_prev} = $self->{line};
6066          $self->{column_prev} = $self->{column};
6067          $self->{column}++;
6068          $self->{nc}
6069              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6070        } else {
6071          $self->{set_nc}->($self);
6072        }
6073      
6074            redo A;
6075          } elsif ((length $self->{kwd}) == 7 and
6076                   ($self->{nc} == 0x004E or # N
6077                    $self->{nc} == 0x006E)) { # n
6078            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6079              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6080                              text => 'NOTATION',
6081                              line => $self->{line_prev},
6082                              column => $self->{column_prev} - 6);
6083            }
6084            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6085                           line => $self->{line_prev},
6086                           column => $self->{column_prev} - 8};
6087            $self->{state} = DOCTYPE_MD_STATE;
6088            
6089        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6090          $self->{line_prev} = $self->{line};
6091          $self->{column_prev} = $self->{column};
6092          $self->{column}++;
6093          $self->{nc}
6094              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6095        } else {
6096          $self->{set_nc}->($self);
6097        }
6098      
6099            redo A;
6100          } else {
6101            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6102                            line => $self->{line_prev},
6103                            column => $self->{column_prev} - 1
6104                                - (length $self->{kwd})
6105                                + 1 * ($self->{nc} == -1));
6106            $self->{state} = BOGUS_COMMENT_STATE;
6107            ## Reconsume.
6108            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6109            redo A;
6110          }
6111        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6112          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6113          ## "DOCTYPE NOTATION state".
6114    
6115          if ($is_space->{$self->{nc}}) {
6116            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6117            $self->{state} = BEFORE_MD_NAME_STATE;
6118            
6119        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6120          $self->{line_prev} = $self->{line};
6121          $self->{column_prev} = $self->{column};
6122          $self->{column}++;
6123          $self->{nc}
6124              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6125        } else {
6126          $self->{set_nc}->($self);
6127        }
6128      
6129            redo A;
6130          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6131                   $self->{nc} == 0x0025) { # %
6132            ## XML5: Switch to the "DOCTYPE bogus comment state".
6133            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6134            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6135            
6136        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6137          $self->{line_prev} = $self->{line};
6138          $self->{column_prev} = $self->{column};
6139          $self->{column}++;
6140          $self->{nc}
6141              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6142        } else {
6143          $self->{set_nc}->($self);
6144        }
6145      
6146            redo A;
6147          } elsif ($self->{nc} == -1) {
6148            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6149            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6150            ## Reconsume.
6151            redo A;
6152          } elsif ($self->{nc} == 0x003E) { # >
6153            ## XML5: Switch to the "DOCTYPE bogus comment state".
6154            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6155            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6156            
6157        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6158          $self->{line_prev} = $self->{line};
6159          $self->{column_prev} = $self->{column};
6160          $self->{column}++;
6161          $self->{nc}
6162              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6163        } else {
6164          $self->{set_nc}->($self);
6165        }
6166      
6167            redo A;
6168          } else {
6169            ## XML5: Switch to the "DOCTYPE bogus comment state".
6170            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6171            $self->{state} = BEFORE_MD_NAME_STATE;
6172            redo A;
6173          }
6174        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6175          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6176          ## before state", "DOCTYPE ATTLIST name before state".
6177    
6178          if ($is_space->{$self->{nc}}) {
6179            ## Stay in the state.
6180            
6181        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6182          $self->{line_prev} = $self->{line};
6183          $self->{column_prev} = $self->{column};
6184          $self->{column}++;
6185          $self->{nc}
6186              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6187        } else {
6188          $self->{set_nc}->($self);
6189        }
6190      
6191            redo A;
6192          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6193                   $self->{nc} == 0x0025) { # %
6194            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6195            
6196        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6197          $self->{line_prev} = $self->{line};
6198          $self->{column_prev} = $self->{column};
6199          $self->{column}++;
6200          $self->{nc}
6201              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6202        } else {
6203          $self->{set_nc}->($self);
6204        }
6205      
6206            redo A;
6207          } elsif ($self->{nc} == 0x003E) { # >
6208            ## XML5: Same as "Anything else".
6209            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6210            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6211            
6212        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6213          $self->{line_prev} = $self->{line};
6214          $self->{column_prev} = $self->{column};
6215          $self->{column}++;
6216          $self->{nc}
6217              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6218        } else {
6219          $self->{set_nc}->($self);
6220        }
6221      
6222            redo A;
6223          } elsif ($self->{nc} == -1) {
6224            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6225            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6226            ## Reconsume.
6227            redo A;
6228          } else {
6229            ## XML5: [ATTLIST] Not defined yet.
6230            $self->{ct}->{name} .= chr $self->{nc};
6231            $self->{state} = MD_NAME_STATE;
6232            
6233        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6234          $self->{line_prev} = $self->{line};
6235          $self->{column_prev} = $self->{column};
6236          $self->{column}++;
6237          $self->{nc}
6238              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6239        } else {
6240          $self->{set_nc}->($self);
6241        }
6242      
6243            redo A;
6244          }
6245        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6246          if ($is_space->{$self->{nc}}) {
6247            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6248            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6249            $self->{state} = BEFORE_MD_NAME_STATE;
6250            
6251        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6252          $self->{line_prev} = $self->{line};
6253          $self->{column_prev} = $self->{column};
6254          $self->{column}++;
6255          $self->{nc}
6256              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6257        } else {
6258          $self->{set_nc}->($self);
6259        }
6260      
6261            redo A;
6262          } elsif ($self->{nc} == 0x003E) { # >
6263            ## XML5: Same as "Anything else".
6264            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6265            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6266            
6267        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6268          $self->{line_prev} = $self->{line};
6269          $self->{column_prev} = $self->{column};
6270          $self->{column}++;
6271          $self->{nc}
6272              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6273        } else {
6274          $self->{set_nc}->($self);
6275        }
6276      
6277            redo A;
6278          } elsif ($self->{nc} == -1) {
6279            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6280            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6281            ## Reconsume.
6282            redo A;
6283          } else {
6284            ## XML5: No parse error.
6285            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6286            $self->{state} = BOGUS_COMMENT_STATE;
6287            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6288            ## Reconsume.
6289            redo A;
6290          }
6291        } elsif ($self->{state} == MD_NAME_STATE) {
6292          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6293          
6294          if ($is_space->{$self->{nc}}) {
6295            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6296              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6297            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6298              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6299            } else { # ENTITY/NOTATION
6300              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6301            }
6302            
6303        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6304          $self->{line_prev} = $self->{line};
6305          $self->{column_prev} = $self->{column};
6306          $self->{column}++;
6307          $self->{nc}
6308              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6309        } else {
6310          $self->{set_nc}->($self);
6311        }
6312      
6313            redo A;
6314          } elsif ($self->{nc} == 0x003E) { # >
6315            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6316              #
6317            } else {
6318              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6319            }
6320            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6321            
6322        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6323          $self->{line_prev} = $self->{line};
6324          $self->{column_prev} = $self->{column};
6325          $self->{column}++;
6326          $self->{nc}
6327              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6328        } else {
6329          $self->{set_nc}->($self);
6330        }
6331      
6332            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6333            redo A;
6334          } elsif ($self->{nc} == -1) {
6335            ## XML5: [ATTLIST] No parse error.
6336            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6337            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6338            ## Reconsume.
6339            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6340            redo A;
6341          } else {
6342            ## XML5: [ATTLIST] Not defined yet.
6343            $self->{ct}->{name} .= chr $self->{nc};
6344            ## Stay in the state.
6345            
6346        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6347          $self->{line_prev} = $self->{line};
6348          $self->{column_prev} = $self->{column};
6349          $self->{column}++;
6350          $self->{nc}
6351              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6352        } else {
6353          $self->{set_nc}->($self);
6354        }
6355      
6356            redo A;
6357          }
6358        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6359          if ($is_space->{$self->{nc}}) {
6360            ## Stay in the state.
6361            
6362        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6363          $self->{line_prev} = $self->{line};
6364          $self->{column_prev} = $self->{column};
6365          $self->{column}++;
6366          $self->{nc}
6367              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6368        } else {
6369          $self->{set_nc}->($self);
6370        }
6371      
6372            redo A;
6373          } elsif ($self->{nc} == 0x003E) { # >
6374            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6375            
6376        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6377          $self->{line_prev} = $self->{line};
6378          $self->{column_prev} = $self->{column};
6379          $self->{column}++;
6380          $self->{nc}
6381              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6382        } else {
6383          $self->{set_nc}->($self);
6384        }
6385      
6386            return  ($self->{ct}); # ATTLIST
6387            redo A;
6388          } elsif ($self->{nc} == -1) {
6389            ## XML5: No parse error.
6390            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6391            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6392            return  ($self->{ct});
6393            redo A;
6394          } else {
6395            ## XML5: Not defined yet.
6396            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6397                           tokens => [],
6398                           line => $self->{line}, column => $self->{column}};
6399            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6400            
6401        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6402          $self->{line_prev} = $self->{line};
6403          $self->{column_prev} = $self->{column};
6404          $self->{column}++;
6405          $self->{nc}
6406              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6407        } else {
6408          $self->{set_nc}->($self);
6409        }
6410      
6411            redo A;
6412          }
6413        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6414          if ($is_space->{$self->{nc}}) {
6415            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6416            
6417        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6418          $self->{line_prev} = $self->{line};
6419          $self->{column_prev} = $self->{column};
6420          $self->{column}++;
6421          $self->{nc}
6422              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6423        } else {
6424          $self->{set_nc}->($self);
6425        }
6426      
6427            redo A;
6428          } elsif ($self->{nc} == 0x003E) { # >
6429            ## XML5: Same as "anything else".
6430            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6431            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6432            
6433        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6434          $self->{line_prev} = $self->{line};
6435          $self->{column_prev} = $self->{column};
6436          $self->{column}++;
6437          $self->{nc}
6438              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6439        } else {
6440          $self->{set_nc}->($self);
6441        }
6442      
6443            return  ($self->{ct}); # ATTLIST
6444            redo A;
6445          } elsif ($self->{nc} == 0x0028) { # (
6446            ## XML5: Same as "anything else".
6447            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6448            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6449            
6450        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6451          $self->{line_prev} = $self->{line};
6452          $self->{column_prev} = $self->{column};
6453          $self->{column}++;
6454          $self->{nc}
6455              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6456        } else {
6457          $self->{set_nc}->($self);
6458        }
6459      
6460            redo A;
6461          } elsif ($self->{nc} == -1) {
6462            ## XML5: No parse error.
6463            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6464            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6465            
6466        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6467          $self->{line_prev} = $self->{line};
6468          $self->{column_prev} = $self->{column};
6469          $self->{column}++;
6470          $self->{nc}
6471              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6472        } else {
6473          $self->{set_nc}->($self);
6474        }
6475      
6476            return  ($self->{ct}); # ATTLIST
6477            redo A;
6478          } else {
6479            ## XML5: Not defined yet.
6480            $self->{ca}->{name} .= chr $self->{nc};
6481            ## Stay in the state.
6482            
6483        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6484          $self->{line_prev} = $self->{line};
6485          $self->{column_prev} = $self->{column};
6486          $self->{column}++;
6487          $self->{nc}
6488              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6489        } else {
6490          $self->{set_nc}->($self);
6491        }
6492      
6493            redo A;
6494          }
6495        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6496          if ($is_space->{$self->{nc}}) {
6497            ## Stay in the state.
6498            
6499        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6500          $self->{line_prev} = $self->{line};
6501          $self->{column_prev} = $self->{column};
6502          $self->{column}++;
6503          $self->{nc}
6504              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6505        } else {
6506          $self->{set_nc}->($self);
6507        }
6508      
6509            redo A;
6510          } elsif ($self->{nc} == 0x003E) { # >
6511            ## XML5: Same as "anything else".
6512            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6513            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6514            
6515        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6516          $self->{line_prev} = $self->{line};
6517          $self->{column_prev} = $self->{column};
6518          $self->{column}++;
6519          $self->{nc}
6520              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6521        } else {
6522          $self->{set_nc}->($self);
6523        }
6524      
6525            return  ($self->{ct}); # ATTLIST
6526            redo A;
6527          } elsif ($self->{nc} == 0x0028) { # (
6528            ## XML5: Same as "anything else".
6529            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6530            
6531        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6532          $self->{line_prev} = $self->{line};
6533          $self->{column_prev} = $self->{column};
6534          $self->{column}++;
6535          $self->{nc}
6536              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6537        } else {
6538          $self->{set_nc}->($self);
6539        }
6540      
6541            redo A;
6542          } elsif ($self->{nc} == -1) {
6543            ## XML5: No parse error.
6544            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6545            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6546            
6547        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6548          $self->{line_prev} = $self->{line};
6549          $self->{column_prev} = $self->{column};
6550          $self->{column}++;
6551          $self->{nc}
6552              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6553        } else {
6554          $self->{set_nc}->($self);
6555        }
6556      
6557            return  ($self->{ct});
6558            redo A;
6559          } else {
6560            ## XML5: Not defined yet.
6561            $self->{ca}->{type} = chr $self->{nc};
6562            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6563            
6564        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6565          $self->{line_prev} = $self->{line};
6566          $self->{column_prev} = $self->{column};
6567          $self->{column}++;
6568          $self->{nc}
6569              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6570        } else {
6571          $self->{set_nc}->($self);
6572        }
6573      
6574            redo A;
6575          }
6576        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6577          if ($is_space->{$self->{nc}}) {
6578            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6579            
6580        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6581          $self->{line_prev} = $self->{line};
6582          $self->{column_prev} = $self->{column};
6583          $self->{column}++;
6584          $self->{nc}
6585              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6586        } else {
6587          $self->{set_nc}->($self);
6588        }
6589      
6590            redo A;
6591          } elsif ($self->{nc} == 0x0023) { # #
6592            ## XML5: Same as "anything else".
6593            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6594            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6595            
6596        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6597          $self->{line_prev} = $self->{line};
6598          $self->{column_prev} = $self->{column};
6599          $self->{column}++;
6600          $self->{nc}
6601              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6602        } else {
6603          $self->{set_nc}->($self);
6604        }
6605      
6606            redo A;
6607          } elsif ($self->{nc} == 0x0022) { # "
6608            ## XML5: Same as "anything else".
6609            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6610            $self->{ca}->{value} = '';
6611            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6612            
6613        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6614          $self->{line_prev} = $self->{line};
6615          $self->{column_prev} = $self->{column};
6616          $self->{column}++;
6617          $self->{nc}
6618              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6619        } else {
6620          $self->{set_nc}->($self);
6621        }
6622      
6623            redo A;
6624          } elsif ($self->{nc} == 0x0027) { # '
6625            ## XML5: Same as "anything else".
6626            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6627            $self->{ca}->{value} = '';
6628            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6629            
6630        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6631          $self->{line_prev} = $self->{line};
6632          $self->{column_prev} = $self->{column};
6633          $self->{column}++;
6634          $self->{nc}
6635              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6636        } else {
6637          $self->{set_nc}->($self);
6638        }
6639      
6640            redo A;
6641          } elsif ($self->{nc} == 0x003E) { # >
6642            ## XML5: Same as "anything else".
6643            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6644            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6645            
6646        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6647          $self->{line_prev} = $self->{line};
6648          $self->{column_prev} = $self->{column};
6649          $self->{column}++;
6650          $self->{nc}
6651              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6652        } else {
6653          $self->{set_nc}->($self);
6654        }
6655      
6656            return  ($self->{ct}); # ATTLIST
6657            redo A;
6658          } elsif ($self->{nc} == 0x0028) { # (
6659            ## XML5: Same as "anything else".
6660            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6661            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6662            
6663        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6664          $self->{line_prev} = $self->{line};
6665          $self->{column_prev} = $self->{column};
6666          $self->{column}++;
6667          $self->{nc}
6668              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6669        } else {
6670          $self->{set_nc}->($self);
6671        }
6672      
6673            redo A;
6674          } elsif ($self->{nc} == -1) {
6675            ## XML5: No parse error.
6676            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6677            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6678            
6679        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6680          $self->{line_prev} = $self->{line};
6681          $self->{column_prev} = $self->{column};
6682          $self->{column}++;
6683          $self->{nc}
6684              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6685        } else {
6686          $self->{set_nc}->($self);
6687        }
6688      
6689            return  ($self->{ct});
6690            redo A;
6691          } else {
6692            ## XML5: Not defined yet.
6693            $self->{ca}->{type} .= chr $self->{nc};
6694            ## Stay in the state.
6695            
6696        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6697          $self->{line_prev} = $self->{line};
6698          $self->{column_prev} = $self->{column};
6699          $self->{column}++;
6700          $self->{nc}
6701              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6702        } else {
6703          $self->{set_nc}->($self);
6704        }
6705      
6706            redo A;
6707          }
6708        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6709          if ($is_space->{$self->{nc}}) {
6710            ## Stay in the state.
6711            
6712        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6713          $self->{line_prev} = $self->{line};
6714          $self->{column_prev} = $self->{column};
6715          $self->{column}++;
6716          $self->{nc}
6717              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6718        } else {
6719          $self->{set_nc}->($self);
6720        }
6721      
6722            redo A;
6723          } elsif ($self->{nc} == 0x0028) { # (
6724            ## XML5: Same as "anything else".
6725            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6726            
6727        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6728          $self->{line_prev} = $self->{line};
6729          $self->{column_prev} = $self->{column};
6730          $self->{column}++;
6731          $self->{nc}
6732              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6733        } else {
6734          $self->{set_nc}->($self);
6735        }
6736      
6737            redo A;
6738          } elsif ($self->{nc} == 0x0023) { # #
6739            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6740            
6741        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6742          $self->{line_prev} = $self->{line};
6743          $self->{column_prev} = $self->{column};
6744          $self->{column}++;
6745          $self->{nc}
6746              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6747        } else {
6748          $self->{set_nc}->($self);
6749        }
6750      
6751            redo A;
6752          } elsif ($self->{nc} == 0x0022) { # "
6753            ## XML5: Same as "anything else".
6754            $self->{ca}->{value} = '';
6755            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6756            
6757        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6758          $self->{line_prev} = $self->{line};
6759          $self->{column_prev} = $self->{column};
6760          $self->{column}++;
6761          $self->{nc}
6762              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6763        } else {
6764          $self->{set_nc}->($self);
6765        }
6766      
6767            redo A;
6768          } elsif ($self->{nc} == 0x0027) { # '
6769            ## XML5: Same as "anything else".
6770            $self->{ca}->{value} = '';
6771            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6772            
6773        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6774          $self->{line_prev} = $self->{line};
6775          $self->{column_prev} = $self->{column};
6776          $self->{column}++;
6777          $self->{nc}
6778              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6779        } else {
6780          $self->{set_nc}->($self);
6781        }
6782      
6783            redo A;
6784          } elsif ($self->{nc} == 0x003E) { # >
6785            ## XML5: Same as "anything else".
6786            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6787            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6788            
6789        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6790          $self->{line_prev} = $self->{line};
6791          $self->{column_prev} = $self->{column};
6792          $self->{column}++;
6793          $self->{nc}
6794              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6795        } else {
6796          $self->{set_nc}->($self);
6797        }
6798      
6799            return  ($self->{ct}); # ATTLIST
6800            redo A;
6801          } elsif ($self->{nc} == -1) {
6802            ## XML5: No parse error.
6803            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6804            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6805            
6806        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6807          $self->{line_prev} = $self->{line};
6808          $self->{column_prev} = $self->{column};
6809          $self->{column}++;
6810          $self->{nc}
6811              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6812        } else {
6813          $self->{set_nc}->($self);
6814        }
6815      
6816            return  ($self->{ct});
6817            redo A;
6818          } else {
6819            ## XML5: Switch to the "DOCTYPE bogus comment state".
6820            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6821            $self->{ca}->{value} = '';
6822            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6823            ## Reconsume.
6824            redo A;
6825          }
6826        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6827          if ($is_space->{$self->{nc}}) {
6828            ## Stay in the state.
6829            
6830        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6831          $self->{line_prev} = $self->{line};
6832          $self->{column_prev} = $self->{column};
6833          $self->{column}++;
6834          $self->{nc}
6835              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6836        } else {
6837          $self->{set_nc}->($self);
6838        }
6839      
6840            redo A;
6841          } elsif ($self->{nc} == 0x007C) { # |
6842            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6843            ## Stay in the state.
6844            
6845        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6846          $self->{line_prev} = $self->{line};
6847          $self->{column_prev} = $self->{column};
6848          $self->{column}++;
6849          $self->{nc}
6850              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6851        } else {
6852          $self->{set_nc}->($self);
6853        }
6854      
6855            redo A;
6856          } elsif ($self->{nc} == 0x0029) { # )
6857            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6858            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6859            
6860        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6861          $self->{line_prev} = $self->{line};
6862          $self->{column_prev} = $self->{column};
6863          $self->{column}++;
6864          $self->{nc}
6865              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6866        } else {
6867          $self->{set_nc}->($self);
6868        }
6869      
6870            redo A;
6871          } elsif ($self->{nc} == 0x003E) { # >
6872            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6873            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6874            
6875        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6876          $self->{line_prev} = $self->{line};
6877          $self->{column_prev} = $self->{column};
6878          $self->{column}++;
6879          $self->{nc}
6880              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6881        } else {
6882          $self->{set_nc}->($self);
6883        }
6884      
6885            return  ($self->{ct}); # ATTLIST
6886            redo A;
6887          } elsif ($self->{nc} == -1) {
6888            ## XML5: No parse error.
6889            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6890            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6891            
6892        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6893          $self->{line_prev} = $self->{line};
6894          $self->{column_prev} = $self->{column};
6895          $self->{column}++;
6896          $self->{nc}
6897              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6898        } else {
6899          $self->{set_nc}->($self);
6900        }
6901      
6902            return  ($self->{ct});
6903            redo A;
6904          } else {
6905            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6906            $self->{state} = ALLOWED_TOKEN_STATE;
6907            
6908        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6909          $self->{line_prev} = $self->{line};
6910          $self->{column_prev} = $self->{column};
6911          $self->{column}++;
6912          $self->{nc}
6913              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6914        } else {
6915          $self->{set_nc}->($self);
6916        }
6917      
6918            redo A;
6919          }
6920        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6921          if ($is_space->{$self->{nc}}) {
6922            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6923            
6924        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6925          $self->{line_prev} = $self->{line};
6926          $self->{column_prev} = $self->{column};
6927          $self->{column}++;
6928          $self->{nc}
6929              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6930        } else {
6931          $self->{set_nc}->($self);
6932        }
6933      
6934            redo A;
6935          } elsif ($self->{nc} == 0x007C) { # |
6936            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6937            
6938        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6939          $self->{line_prev} = $self->{line};
6940          $self->{column_prev} = $self->{column};
6941          $self->{column}++;
6942          $self->{nc}
6943              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6944        } else {
6945          $self->{set_nc}->($self);
6946        }
6947      
6948            redo A;
6949          } elsif ($self->{nc} == 0x0029) { # )
6950            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6951            
6952        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6953          $self->{line_prev} = $self->{line};
6954          $self->{column_prev} = $self->{column};
6955          $self->{column}++;
6956          $self->{nc}
6957              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6958        } else {
6959          $self->{set_nc}->($self);
6960        }
6961      
6962            redo A;
6963          } elsif ($self->{nc} == 0x003E) { # >
6964            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6965            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6966            
6967        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6968          $self->{line_prev} = $self->{line};
6969          $self->{column_prev} = $self->{column};
6970          $self->{column}++;
6971          $self->{nc}
6972              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6973        } else {
6974          $self->{set_nc}->($self);
6975        }
6976      
6977            return  ($self->{ct}); # ATTLIST
6978            redo A;
6979          } elsif ($self->{nc} == -1) {
6980            ## XML5: No parse error.
6981            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6982            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6983            
6984        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6985          $self->{line_prev} = $self->{line};
6986          $self->{column_prev} = $self->{column};
6987          $self->{column}++;
6988          $self->{nc}
6989              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6990        } else {
6991          $self->{set_nc}->($self);
6992        }
6993      
6994            return  ($self->{ct});
6995            redo A;
6996          } else {
6997            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6998            ## Stay in the state.
6999            
7000        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7001          $self->{line_prev} = $self->{line};
7002          $self->{column_prev} = $self->{column};
7003          $self->{column}++;
7004          $self->{nc}
7005              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7006        } else {
7007          $self->{set_nc}->($self);
7008        }
7009      
7010            redo A;
7011          }
7012        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
7013          if ($is_space->{$self->{nc}}) {
7014            ## Stay in the state.
7015            
7016        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7017          $self->{line_prev} = $self->{line};
7018          $self->{column_prev} = $self->{column};
7019          $self->{column}++;
7020          $self->{nc}
7021              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7022        } else {
7023          $self->{set_nc}->($self);
7024        }
7025      
7026            redo A;
7027          } elsif ($self->{nc} == 0x007C) { # |
7028            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
7029            
7030        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7031          $self->{line_prev} = $self->{line};
7032          $self->{column_prev} = $self->{column};
7033          $self->{column}++;
7034          $self->{nc}
7035              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7036        } else {
7037          $self->{set_nc}->($self);
7038        }
7039      
7040            redo A;
7041          } elsif ($self->{nc} == 0x0029) { # )
7042            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7043            
7044        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7045          $self->{line_prev} = $self->{line};
7046          $self->{column_prev} = $self->{column};
7047          $self->{column}++;
7048          $self->{nc}
7049              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7050        } else {
7051          $self->{set_nc}->($self);
7052        }
7053      
7054            redo A;
7055          } elsif ($self->{nc} == 0x003E) { # >
7056            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7057            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7058            
7059        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7060          $self->{line_prev} = $self->{line};
7061          $self->{column_prev} = $self->{column};
7062          $self->{column}++;
7063          $self->{nc}
7064              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7065        } else {
7066          $self->{set_nc}->($self);
7067        }
7068      
7069            return  ($self->{ct}); # ATTLIST
7070            redo A;
7071          } elsif ($self->{nc} == -1) {
7072            ## XML5: No parse error.
7073            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7074            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7075            
7076        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7077          $self->{line_prev} = $self->{line};
7078          $self->{column_prev} = $self->{column};
7079          $self->{column}++;
7080          $self->{nc}
7081              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7082        } else {
7083          $self->{set_nc}->($self);
7084        }
7085      
7086            return  ($self->{ct});
7087            redo A;
7088          } else {
7089            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7090                            line => $self->{line_prev},
7091                            column => $self->{column_prev});
7092            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7093            $self->{state} = ALLOWED_TOKEN_STATE;
7094            
7095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7096          $self->{line_prev} = $self->{line};
7097          $self->{column_prev} = $self->{column};
7098          $self->{column}++;
7099          $self->{nc}
7100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7101        } else {
7102          $self->{set_nc}->($self);
7103        }
7104      
7105            redo A;
7106          }
7107        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7108          if ($is_space->{$self->{nc}}) {
7109            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7110            
7111        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7112          $self->{line_prev} = $self->{line};
7113          $self->{column_prev} = $self->{column};
7114          $self->{column}++;
7115          $self->{nc}
7116              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7117        } else {
7118          $self->{set_nc}->($self);
7119        }
7120      
7121            redo A;
7122          } elsif ($self->{nc} == 0x0023) { # #
7123            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7124            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7125            
7126        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7127          $self->{line_prev} = $self->{line};
7128          $self->{column_prev} = $self->{column};
7129          $self->{column}++;
7130          $self->{nc}
7131              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7132        } else {
7133          $self->{set_nc}->($self);
7134        }
7135      
7136            redo A;
7137          } elsif ($self->{nc} == 0x0022) { # "
7138            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7139            $self->{ca}->{value} = '';
7140            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7141            
7142        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7143          $self->{line_prev} = $self->{line};
7144          $self->{column_prev} = $self->{column};
7145          $self->{column}++;
7146          $self->{nc}
7147              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7148        } else {
7149          $self->{set_nc}->($self);
7150        }
7151      
7152            redo A;
7153          } elsif ($self->{nc} == 0x0027) { # '
7154            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7155            $self->{ca}->{value} = '';
7156            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7157            
7158        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7159          $self->{line_prev} = $self->{line};
7160          $self->{column_prev} = $self->{column};
7161          $self->{column}++;
7162          $self->{nc}
7163              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7164        } else {
7165          $self->{set_nc}->($self);
7166        }
7167      
7168            redo A;
7169          } elsif ($self->{nc} == 0x003E) { # >
7170            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7171            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7172            
7173        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7174          $self->{line_prev} = $self->{line};
7175          $self->{column_prev} = $self->{column};
7176          $self->{column}++;
7177          $self->{nc}
7178              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7179        } else {
7180          $self->{set_nc}->($self);
7181        }
7182      
7183            return  ($self->{ct}); # ATTLIST
7184            redo A;
7185          } elsif ($self->{nc} == -1) {
7186            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7187            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7188            
7189        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7190          $self->{line_prev} = $self->{line};
7191          $self->{column_prev} = $self->{column};
7192          $self->{column}++;
7193          $self->{nc}
7194              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7195        } else {
7196          $self->{set_nc}->($self);
7197        }
7198      
7199            return  ($self->{ct});
7200            redo A;
7201          } else {
7202            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7203            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7204            ## Reconsume.
7205            redo A;
7206          }
7207        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7208          if ($is_space->{$self->{nc}}) {
7209            ## Stay in the state.
7210            
7211        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7212          $self->{line_prev} = $self->{line};
7213          $self->{column_prev} = $self->{column};
7214          $self->{column}++;
7215          $self->{nc}
7216              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7217        } else {
7218          $self->{set_nc}->($self);
7219        }
7220      
7221            redo A;
7222          } elsif ($self->{nc} == 0x0023) { # #
7223            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7224            
7225        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7226          $self->{line_prev} = $self->{line};
7227          $self->{column_prev} = $self->{column};
7228          $self->{column}++;
7229          $self->{nc}
7230              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7231        } else {
7232          $self->{set_nc}->($self);
7233        }
7234      
7235            redo A;
7236          } elsif ($self->{nc} == 0x0022) { # "
7237            $self->{ca}->{value} = '';
7238            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7239            
7240        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7241          $self->{line_prev} = $self->{line};
7242          $self->{column_prev} = $self->{column};
7243          $self->{column}++;
7244          $self->{nc}
7245              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7246        } else {
7247          $self->{set_nc}->($self);
7248        }
7249      
7250            redo A;
7251          } elsif ($self->{nc} == 0x0027) { # '
7252            $self->{ca}->{value} = '';
7253            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7254            
7255        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7256          $self->{line_prev} = $self->{line};
7257          $self->{column_prev} = $self->{column};
7258          $self->{column}++;
7259          $self->{nc}
7260              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7261        } else {
7262          $self->{set_nc}->($self);
7263        }
7264      
7265            redo A;
7266          } elsif ($self->{nc} == 0x003E) { # >
7267            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7268            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7269            
7270        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7271          $self->{line_prev} = $self->{line};
7272          $self->{column_prev} = $self->{column};
7273          $self->{column}++;
7274          $self->{nc}
7275              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7276        } else {
7277          $self->{set_nc}->($self);
7278        }
7279      
7280            return  ($self->{ct}); # ATTLIST
7281            redo A;
7282          } elsif ($self->{nc} == -1) {
7283            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7284            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7285            
7286        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7287          $self->{line_prev} = $self->{line};
7288          $self->{column_prev} = $self->{column};
7289          $self->{column}++;
7290          $self->{nc}
7291              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7292        } else {
7293          $self->{set_nc}->($self);
7294        }
7295      
7296            return  ($self->{ct});
7297            redo A;
7298          } else {
7299            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7300            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7301            ## Reconsume.
7302            redo A;
7303          }
7304        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7305          if ($is_space->{$self->{nc}}) {
7306            ## XML5: No parse error.
7307            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7308            $self->{state} = BOGUS_MD_STATE;
7309            ## Reconsume.
7310            redo A;
7311          } elsif ($self->{nc} == 0x0022) { # "
7312            ## XML5: Same as "anything else".
7313            $self->{ca}->{value} = '';
7314            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7315            
7316        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7317          $self->{line_prev} = $self->{line};
7318          $self->{column_prev} = $self->{column};
7319          $self->{column}++;
7320          $self->{nc}
7321              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7322        } else {
7323          $self->{set_nc}->($self);
7324        }
7325      
7326            redo A;
7327          } elsif ($self->{nc} == 0x0027) { # '
7328            ## XML5: Same as "anything else".
7329            $self->{ca}->{value} = '';
7330            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7331            
7332        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7333          $self->{line_prev} = $self->{line};
7334          $self->{column_prev} = $self->{column};
7335          $self->{column}++;
7336          $self->{nc}
7337              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7338        } else {
7339          $self->{set_nc}->($self);
7340        }
7341      
7342            redo A;
7343          } elsif ($self->{nc} == 0x003E) { # >
7344            ## XML5: Same as "anything else".
7345            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7346            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7347            
7348        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7349          $self->{line_prev} = $self->{line};
7350          $self->{column_prev} = $self->{column};
7351          $self->{column}++;
7352          $self->{nc}
7353              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7354        } else {
7355          $self->{set_nc}->($self);
7356        }
7357      
7358            return  ($self->{ct}); # ATTLIST
7359            redo A;
7360          } elsif ($self->{nc} == -1) {
7361            ## XML5: No parse error.
7362            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7363            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7364            
7365        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7366          $self->{line_prev} = $self->{line};
7367          $self->{column_prev} = $self->{column};
7368          $self->{column}++;
7369          $self->{nc}
7370              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7371        } else {
7372          $self->{set_nc}->($self);
7373        }
7374      
7375            return  ($self->{ct});
7376            redo A;
7377          } else {
7378            $self->{ca}->{default} = chr $self->{nc};
7379            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7380            
7381        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7382          $self->{line_prev} = $self->{line};
7383          $self->{column_prev} = $self->{column};
7384          $self->{column}++;
7385          $self->{nc}
7386              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7387        } else {
7388          $self->{set_nc}->($self);
7389        }
7390      
7391            redo A;
7392          }
7393        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7394          if ($is_space->{$self->{nc}}) {
7395            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7396            
7397        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7398          $self->{line_prev} = $self->{line};
7399          $self->{column_prev} = $self->{column};
7400          $self->{column}++;
7401          $self->{nc}
7402              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7403        } else {
7404          $self->{set_nc}->($self);
7405        }
7406      
7407            redo A;
7408          } elsif ($self->{nc} == 0x0022) { # "
7409            ## XML5: Same as "anything else".
7410            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7411            $self->{ca}->{value} = '';
7412            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7413            
7414        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7415          $self->{line_prev} = $self->{line};
7416          $self->{column_prev} = $self->{column};
7417          $self->{column}++;
7418          $self->{nc}
7419              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7420        } else {
7421          $self->{set_nc}->($self);
7422        }
7423      
7424            redo A;
7425          } elsif ($self->{nc} == 0x0027) { # '
7426            ## XML5: Same as "anything else".
7427            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7428            $self->{ca}->{value} = '';
7429            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7430            
7431        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7432          $self->{line_prev} = $self->{line};
7433          $self->{column_prev} = $self->{column};
7434          $self->{column}++;
7435          $self->{nc}
7436              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7437        } else {
7438          $self->{set_nc}->($self);
7439        }
7440      
7441            redo A;
7442          } elsif ($self->{nc} == 0x003E) { # >
7443            ## XML5: Same as "anything else".
7444            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7445            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7446            
7447        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7448          $self->{line_prev} = $self->{line};
7449          $self->{column_prev} = $self->{column};
7450          $self->{column}++;
7451          $self->{nc}
7452              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7453        } else {
7454          $self->{set_nc}->($self);
7455        }
7456      
7457            return  ($self->{ct}); # ATTLIST
7458            redo A;
7459          } elsif ($self->{nc} == -1) {
7460            ## XML5: No parse error.
7461            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7462            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7463            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7464            
7465        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7466          $self->{line_prev} = $self->{line};
7467          $self->{column_prev} = $self->{column};
7468          $self->{column}++;
7469          $self->{nc}
7470              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7471        } else {
7472          $self->{set_nc}->($self);
7473        }
7474      
7475            return  ($self->{ct});
7476            redo A;
7477          } else {
7478            $self->{ca}->{default} .= chr $self->{nc};
7479            ## Stay in the state.
7480            
7481        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7482          $self->{line_prev} = $self->{line};
7483          $self->{column_prev} = $self->{column};
7484          $self->{column}++;
7485          $self->{nc}
7486              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7487        } else {
7488          $self->{set_nc}->($self);
7489        }
7490      
7491            redo A;
7492          }
7493        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7494          if ($is_space->{$self->{nc}}) {
7495            ## Stay in the state.
7496            
7497        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7498          $self->{line_prev} = $self->{line};
7499          $self->{column_prev} = $self->{column};
7500          $self->{column}++;
7501          $self->{nc}
7502              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7503        } else {
7504          $self->{set_nc}->($self);
7505        }
7506      
7507            redo A;
7508          } elsif ($self->{nc} == 0x0022) { # "
7509            $self->{ca}->{value} = '';
7510            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7511            
7512        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7513          $self->{line_prev} = $self->{line};
7514          $self->{column_prev} = $self->{column};
7515          $self->{column}++;
7516          $self->{nc}
7517              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7518        } else {
7519          $self->{set_nc}->($self);
7520        }
7521      
7522            redo A;
7523          } elsif ($self->{nc} == 0x0027) { # '
7524            $self->{ca}->{value} = '';
7525            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7526            
7527        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7528          $self->{line_prev} = $self->{line};
7529          $self->{column_prev} = $self->{column};
7530          $self->{column}++;
7531          $self->{nc}
7532              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7533        } else {
7534          $self->{set_nc}->($self);
7535        }
7536      
7537            redo A;
7538          } elsif ($self->{nc} == 0x003E) { # >
7539            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7540            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7541            
7542        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7543          $self->{line_prev} = $self->{line};
7544          $self->{column_prev} = $self->{column};
7545          $self->{column}++;
7546          $self->{nc}
7547              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7548        } else {
7549          $self->{set_nc}->($self);
7550        }
7551      
7552            return  ($self->{ct}); # ATTLIST
7553            redo A;
7554          } elsif ($self->{nc} == -1) {
7555            ## XML5: No parse error.
7556            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7557            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7558            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7559            
7560        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7561          $self->{line_prev} = $self->{line};
7562          $self->{column_prev} = $self->{column};
7563          $self->{column}++;
7564          $self->{nc}
7565              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7566        } else {
7567          $self->{set_nc}->($self);
7568        }
7569      
7570            return  ($self->{ct});
7571            redo A;
7572          } else {
7573            ## XML5: Not defined yet.
7574            if ($self->{ca}->{default} eq 'FIXED') {
7575              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7576            } else {
7577              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7578              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7579            }
7580            ## Reconsume.
7581            redo A;
7582          }
7583        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7584          if ($is_space->{$self->{nc}} or
7585              $self->{nc} == -1 or
7586              $self->{nc} == 0x003E) { # >
7587            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7588            ## Reconsume.
7589            redo A;
7590          } else {
7591            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7592            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7593            ## Reconsume.
7594            redo A;
7595          }
7596        } elsif ($self->{state} == NDATA_STATE) {
7597          ## ASCII case-insensitive
7598          if ($self->{nc} == [
7599                undef,
7600                0x0044, # D
7601                0x0041, # A
7602                0x0054, # T
7603              ]->[length $self->{kwd}] or
7604              $self->{nc} == [
7605                undef,
7606                0x0064, # d
7607                0x0061, # a
7608                0x0074, # t
7609              ]->[length $self->{kwd}]) {
7610            
7611            ## Stay in the state.
7612            $self->{kwd} .= chr $self->{nc};
7613            
7614        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7615          $self->{line_prev} = $self->{line};
7616          $self->{column_prev} = $self->{column};
7617          $self->{column}++;
7618          $self->{nc}
7619              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7620        } else {
7621          $self->{set_nc}->($self);
7622        }
7623      
7624            redo A;
7625          } elsif ((length $self->{kwd}) == 4 and
7626                   ($self->{nc} == 0x0041 or # A
7627                    $self->{nc} == 0x0061)) { # a
7628            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7629              
7630              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7631                              text => 'NDATA',
7632                              line => $self->{line_prev},
7633                              column => $self->{column_prev} - 4);
7634            } else {
7635              
7636            }
7637            $self->{state} = AFTER_NDATA_STATE;
7638            
7639        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7640          $self->{line_prev} = $self->{line};
7641          $self->{column_prev} = $self->{column};
7642          $self->{column}++;
7643          $self->{nc}
7644              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7645        } else {
7646          $self->{set_nc}->($self);
7647        }
7648      
7649            redo A;
7650          } else {
7651            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7652                            line => $self->{line_prev},
7653                            column => $self->{column_prev} + 1
7654                                - length $self->{kwd});
7655            
7656            $self->{state} = BOGUS_MD_STATE;
7657            ## Reconsume.
7658            redo A;
7659          }
7660        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7661          if ($is_space->{$self->{nc}}) {
7662            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7663            
7664        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7665          $self->{line_prev} = $self->{line};
7666          $self->{column_prev} = $self->{column};
7667          $self->{column}++;
7668          $self->{nc}
7669              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7670        } else {
7671          $self->{set_nc}->($self);
7672        }
7673      
7674            redo A;
7675          } elsif ($self->{nc} == 0x003E) { # >
7676            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7677            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7678            
7679        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7680          $self->{line_prev} = $self->{line};
7681          $self->{column_prev} = $self->{column};
7682          $self->{column}++;
7683          $self->{nc}
7684              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7685        } else {
7686          $self->{set_nc}->($self);
7687        }
7688      
7689            return  ($self->{ct}); # ENTITY
7690            redo A;
7691          } elsif ($self->{nc} == -1) {
7692            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7693            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7694            
7695        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7696          $self->{line_prev} = $self->{line};
7697          $self->{column_prev} = $self->{column};
7698          $self->{column}++;
7699          $self->{nc}
7700              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7701        } else {
7702          $self->{set_nc}->($self);
7703        }
7704      
7705            return  ($self->{ct}); # ENTITY
7706            redo A;
7707          } else {
7708            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7709                            line => $self->{line_prev},
7710                            column => $self->{column_prev} + 1
7711                                - length $self->{kwd});
7712            $self->{state} = BOGUS_MD_STATE;
7713            ## Reconsume.
7714            redo A;
7715          }
7716        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7717          if ($is_space->{$self->{nc}}) {
7718            ## Stay in the state.
7719            
7720        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7721          $self->{line_prev} = $self->{line};
7722          $self->{column_prev} = $self->{column};
7723          $self->{column}++;
7724          $self->{nc}
7725              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7726        } else {
7727          $self->{set_nc}->($self);
7728        }
7729      
7730            redo A;
7731          } elsif ($self->{nc} == 0x003E) { # >
7732            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7733            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7734            
7735        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7736          $self->{line_prev} = $self->{line};
7737          $self->{column_prev} = $self->{column};
7738          $self->{column}++;
7739          $self->{nc}
7740              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7741        } else {
7742          $self->{set_nc}->($self);
7743        }
7744      
7745            return  ($self->{ct}); # ENTITY
7746            redo A;
7747          } elsif ($self->{nc} == -1) {
7748            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7749            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7750            
7751        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7752          $self->{line_prev} = $self->{line};
7753          $self->{column_prev} = $self->{column};
7754          $self->{column}++;
7755          $self->{nc}
7756              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7757        } else {
7758          $self->{set_nc}->($self);
7759        }
7760      
7761            return  ($self->{ct}); # ENTITY
7762            redo A;
7763          } else {
7764            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7765            $self->{state} = NOTATION_NAME_STATE;
7766            
7767        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7768          $self->{line_prev} = $self->{line};
7769          $self->{column_prev} = $self->{column};
7770          $self->{column}++;
7771          $self->{nc}
7772              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7773        } else {
7774          $self->{set_nc}->($self);
7775        }
7776      
7777            redo A;
7778          }
7779        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7780          if ($is_space->{$self->{nc}}) {
7781            $self->{state} = AFTER_MD_DEF_STATE;
7782            
7783        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7784          $self->{line_prev} = $self->{line};
7785          $self->{column_prev} = $self->{column};
7786          $self->{column}++;
7787          $self->{nc}
7788              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7789        } else {
7790          $self->{set_nc}->($self);
7791        }
7792      
7793            redo A;
7794          } elsif ($self->{nc} == 0x003E) { # >
7795            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7796            
7797        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7798          $self->{line_prev} = $self->{line};
7799          $self->{column_prev} = $self->{column};
7800          $self->{column}++;
7801          $self->{nc}
7802              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7803        } else {
7804          $self->{set_nc}->($self);
7805        }
7806      
7807            return  ($self->{ct}); # ENTITY
7808            redo A;
7809          } elsif ($self->{nc} == -1) {
7810            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7811            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7812            
7813        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7814          $self->{line_prev} = $self->{line};
7815          $self->{column_prev} = $self->{column};
7816          $self->{column}++;
7817          $self->{nc}
7818              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7819        } else {
7820          $self->{set_nc}->($self);
7821        }
7822      
7823            return  ($self->{ct}); # ENTITY
7824            redo A;
7825          } else {
7826            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7827            ## Stay in the state.
7828            
7829        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7830          $self->{line_prev} = $self->{line};
7831          $self->{column_prev} = $self->{column};
7832          $self->{column}++;
7833          $self->{nc}
7834              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7835        } else {
7836          $self->{set_nc}->($self);
7837        }
7838      
7839            redo A;
7840          }
7841        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7842          if ($self->{nc} == 0x0022) { # "
7843            $self->{state} = AFTER_MD_DEF_STATE;
7844            
7845        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7846          $self->{line_prev} = $self->{line};
7847          $self->{column_prev} = $self->{column};
7848          $self->{column}++;
7849          $self->{nc}
7850              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7851        } else {
7852          $self->{set_nc}->($self);
7853        }
7854      
7855            redo A;
7856          } elsif ($self->{nc} == 0x0026) { # &
7857            $self->{prev_state} = $self->{state};
7858            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7859            $self->{entity_add} = 0x0022; # "
7860            
7861        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7862          $self->{line_prev} = $self->{line};
7863          $self->{column_prev} = $self->{column};
7864          $self->{column}++;
7865          $self->{nc}
7866              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7867        } else {
7868          $self->{set_nc}->($self);
7869        }
7870      
7871            redo A;
7872    ## TODO: %
7873          } elsif ($self->{nc} == -1) {
7874            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7875            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7876            ## Reconsume.
7877            return  ($self->{ct}); # ENTITY
7878            redo A;
7879          } else {
7880            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7881            
7882        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7883          $self->{line_prev} = $self->{line};
7884          $self->{column_prev} = $self->{column};
7885          $self->{column}++;
7886          $self->{nc}
7887              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7888        } else {
7889          $self->{set_nc}->($self);
7890        }
7891      
7892            redo A;
7893          }
7894        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7895          if ($self->{nc} == 0x0027) { # '
7896            $self->{state} = AFTER_MD_DEF_STATE;
7897            
7898        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7899          $self->{line_prev} = $self->{line};
7900          $self->{column_prev} = $self->{column};
7901          $self->{column}++;
7902          $self->{nc}
7903              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7904        } else {
7905          $self->{set_nc}->($self);
7906        }
7907      
7908            redo A;
7909          } elsif ($self->{nc} == 0x0026) { # &
7910            $self->{prev_state} = $self->{state};
7911            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7912            $self->{entity_add} = 0x0027; # '
7913            
7914        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7915          $self->{line_prev} = $self->{line};
7916          $self->{column_prev} = $self->{column};
7917          $self->{column}++;
7918          $self->{nc}
7919              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7920        } else {
7921          $self->{set_nc}->($self);
7922        }
7923      
7924            redo A;
7925    ## TODO: %
7926          } elsif ($self->{nc} == -1) {
7927            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7928            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7929            ## Reconsume.
7930            return  ($self->{ct}); # ENTITY
7931            redo A;
7932          } else {
7933            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7934            
7935        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7936          $self->{line_prev} = $self->{line};
7937          $self->{column_prev} = $self->{column};
7938          $self->{column}++;
7939          $self->{nc}
7940              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7941        } else {
7942          $self->{set_nc}->($self);
7943        }
7944      
7945            redo A;
7946          }
7947        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7948          if ($is_space->{$self->{nc}} or
7949              {
7950                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7951                $self->{entity_add} => 1,
7952              }->{$self->{nc}}) {
7953            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
7954                            line => $self->{line_prev},
7955                            column => $self->{column_prev}
7956                                + ($self->{nc} == -1 ? 1 : 0));
7957            ## Don't consume
7958            ## Return nothing.
7959            #
7960          } elsif ($self->{nc} == 0x0023) { # #
7961            $self->{ca} = $self->{ct};
7962            $self->{state} = ENTITY_HASH_STATE;
7963            $self->{kwd} = '#';
7964            
7965        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7966          $self->{line_prev} = $self->{line};
7967          $self->{column_prev} = $self->{column};
7968          $self->{column}++;
7969          $self->{nc}
7970              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7971        } else {
7972          $self->{set_nc}->($self);
7973        }
7974      
7975            redo A;
7976          } else {
7977            #
7978          }
7979    
7980          $self->{ct}->{value} .= '&';
7981          $self->{state} = $self->{prev_state};
7982          ## Reconsume.
7983          redo A;
7984        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
7985          if ($is_space->{$self->{nc}}) {
7986            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
7987            
7988        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7989          $self->{line_prev} = $self->{line};
7990          $self->{column_prev} = $self->{column};
7991          $self->{column}++;
7992          $self->{nc}
7993              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7994        } else {
7995          $self->{set_nc}->($self);
7996        }
7997      
7998            redo A;
7999          } elsif ($self->{nc} == 0x0028) { # (
8000            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8001            $self->{ct}->{content} = ['('];
8002            $self->{group_depth} = 1;
8003            
8004        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8005          $self->{line_prev} = $self->{line};
8006          $self->{column_prev} = $self->{column};
8007          $self->{column}++;
8008          $self->{nc}
8009              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8010        } else {
8011          $self->{set_nc}->($self);
8012        }
8013      
8014            redo A;
8015          } elsif ($self->{nc} == 0x003E) { # >
8016            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
8017            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8018            
8019        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8020          $self->{line_prev} = $self->{line};
8021          $self->{column_prev} = $self->{column};
8022          $self->{column}++;
8023          $self->{nc}
8024              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8025        } else {
8026          $self->{set_nc}->($self);
8027        }
8028      
8029            return  ($self->{ct}); # ELEMENT
8030            redo A;
8031          } elsif ($self->{nc} == -1) {
8032            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8033            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8034            
8035        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8036          $self->{line_prev} = $self->{line};
8037          $self->{column_prev} = $self->{column};
8038          $self->{column}++;
8039          $self->{nc}
8040              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8041        } else {
8042          $self->{set_nc}->($self);
8043        }
8044      
8045            return  ($self->{ct}); # ELEMENT
8046            redo A;
8047          } else {
8048            $self->{ct}->{content} = [chr $self->{nc}];
8049            $self->{state} = CONTENT_KEYWORD_STATE;
8050            
8051        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8052          $self->{line_prev} = $self->{line};
8053          $self->{column_prev} = $self->{column};
8054          $self->{column}++;
8055          $self->{nc}
8056              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8057        } else {
8058          $self->{set_nc}->($self);
8059        }
8060      
8061            redo A;
8062          }
8063        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8064          if ($is_space->{$self->{nc}}) {
8065            $self->{state} = AFTER_MD_DEF_STATE;
8066            
8067        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8068          $self->{line_prev} = $self->{line};
8069          $self->{column_prev} = $self->{column};
8070          $self->{column}++;
8071          $self->{nc}
8072              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8073        } else {
8074          $self->{set_nc}->($self);
8075        }
8076      
8077            redo A;
8078          } elsif ($self->{nc} == 0x003E) { # >
8079            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8080            
8081        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8082          $self->{line_prev} = $self->{line};
8083          $self->{column_prev} = $self->{column};
8084          $self->{column}++;
8085          $self->{nc}
8086              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8087        } else {
8088          $self->{set_nc}->($self);
8089        }
8090      
8091            return  ($self->{ct}); # ELEMENT
8092            redo A;
8093          } elsif ($self->{nc} == -1) {
8094            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8095            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8096            
8097        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8098          $self->{line_prev} = $self->{line};
8099          $self->{column_prev} = $self->{column};
8100          $self->{column}++;
8101          $self->{nc}
8102              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8103        } else {
8104          $self->{set_nc}->($self);
8105        }
8106      
8107            return  ($self->{ct}); # ELEMENT
8108            redo A;
8109          } else {
8110            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8111            ## Stay in the state.
8112            
8113        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8114          $self->{line_prev} = $self->{line};
8115          $self->{column_prev} = $self->{column};
8116          $self->{column}++;
8117          $self->{nc}
8118              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8119        } else {
8120          $self->{set_nc}->($self);
8121        }
8122      
8123            redo A;
8124          }
8125        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8126          if ($is_space->{$self->{nc}}) {
8127            ## Stay in the state.
8128            
8129        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8130          $self->{line_prev} = $self->{line};
8131          $self->{column_prev} = $self->{column};
8132          $self->{column}++;
8133          $self->{nc}
8134              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8135        } else {
8136          $self->{set_nc}->($self);
8137        }
8138      
8139            redo A;
8140          } elsif ($self->{nc} == 0x0028) { # (
8141            $self->{group_depth}++;
8142            push @{$self->{ct}->{content}}, chr $self->{nc};
8143            ## Stay in the state.
8144            
8145        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8146          $self->{line_prev} = $self->{line};
8147          $self->{column_prev} = $self->{column};
8148          $self->{column}++;
8149          $self->{nc}
8150              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8151        } else {
8152          $self->{set_nc}->($self);
8153        }
8154      
8155            redo A;
8156          } elsif ($self->{nc} == 0x007C or # |
8157                   $self->{nc} == 0x002C) { # ,
8158            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8159            ## Stay in the state.
8160            
8161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8162          $self->{line_prev} = $self->{line};
8163          $self->{column_prev} = $self->{column};
8164          $self->{column}++;
8165          $self->{nc}
8166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8167        } else {
8168          $self->{set_nc}->($self);
8169        }
8170      
8171            redo A;
8172          } elsif ($self->{nc} == 0x0029) { # )
8173            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8174            push @{$self->{ct}->{content}}, chr $self->{nc};
8175            $self->{group_depth}--;
8176            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8177            
8178        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8179          $self->{line_prev} = $self->{line};
8180          $self->{column_prev} = $self->{column};
8181          $self->{column}++;
8182          $self->{nc}
8183              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8184        } else {
8185          $self->{set_nc}->($self);
8186        }
8187      
8188            redo A;
8189          } elsif ($self->{nc} == 0x003E) { # >
8190            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8191            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8192            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8193            
8194        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8195          $self->{line_prev} = $self->{line};
8196          $self->{column_prev} = $self->{column};
8197          $self->{column}++;
8198          $self->{nc}
8199              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8200        } else {
8201          $self->{set_nc}->($self);
8202        }
8203      
8204            return  ($self->{ct}); # ELEMENT
8205            redo A;
8206          } elsif ($self->{nc} == -1) {
8207            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8208            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8209            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8210            
8211        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8212          $self->{line_prev} = $self->{line};
8213          $self->{column_prev} = $self->{column};
8214          $self->{column}++;
8215          $self->{nc}
8216              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8217        } else {
8218          $self->{set_nc}->($self);
8219        }
8220      
8221            return  ($self->{ct}); # ELEMENT
8222            redo A;
8223          } else {
8224            push @{$self->{ct}->{content}}, chr $self->{nc};
8225            $self->{state} = CM_ELEMENT_NAME_STATE;
8226            
8227        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8228          $self->{line_prev} = $self->{line};
8229          $self->{column_prev} = $self->{column};
8230          $self->{column}++;
8231          $self->{nc}
8232              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8233        } else {
8234          $self->{set_nc}->($self);
8235        }
8236      
8237            redo A;
8238          }
8239        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8240          if ($is_space->{$self->{nc}}) {
8241            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8242            
8243        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8244          $self->{line_prev} = $self->{line};
8245          $self->{column_prev} = $self->{column};
8246          $self->{column}++;
8247          $self->{nc}
8248              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8249        } else {
8250          $self->{set_nc}->($self);
8251        }
8252      
8253            redo A;
8254          } elsif ($self->{nc} == 0x002A or # *
8255                   $self->{nc} == 0x002B or # +
8256                   $self->{nc} == 0x003F) { # ?
8257            push @{$self->{ct}->{content}}, chr $self->{nc};
8258            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8259            
8260        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8261          $self->{line_prev} = $self->{line};
8262          $self->{column_prev} = $self->{column};
8263          $self->{column}++;
8264          $self->{nc}
8265              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8266        } else {
8267          $self->{set_nc}->($self);
8268        }
8269      
8270            redo A;
8271          } elsif ($self->{nc} == 0x007C or # |
8272                   $self->{nc} == 0x002C) { # ,
8273            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8274            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8275            
8276        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8277          $self->{line_prev} = $self->{line};
8278          $self->{column_prev} = $self->{column};
8279          $self->{column}++;
8280          $self->{nc}
8281              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8282        } else {
8283          $self->{set_nc}->($self);
8284        }
8285      
8286            redo A;
8287          } elsif ($self->{nc} == 0x0029) { # )
8288            $self->{group_depth}--;
8289            push @{$self->{ct}->{content}}, chr $self->{nc};
8290            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8291            
8292        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8293          $self->{line_prev} = $self->{line};
8294          $self->{column_prev} = $self->{column};
8295          $self->{column}++;
8296          $self->{nc}
8297              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8298        } else {
8299          $self->{set_nc}->($self);
8300        }
8301      
8302            redo A;
8303          } elsif ($self->{nc} == 0x003E) { # >
8304            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8305            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8306            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8307            
8308        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8309          $self->{line_prev} = $self->{line};
8310          $self->{column_prev} = $self->{column};
8311          $self->{column}++;
8312          $self->{nc}
8313              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8314        } else {
8315          $self->{set_nc}->($self);
8316        }
8317      
8318            return  ($self->{ct}); # ELEMENT
8319            redo A;
8320          } elsif ($self->{nc} == -1) {
8321            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8322            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8323            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8324            
8325        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8326          $self->{line_prev} = $self->{line};
8327          $self->{column_prev} = $self->{column};
8328          $self->{column}++;
8329          $self->{nc}
8330              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8331        } else {
8332          $self->{set_nc}->($self);
8333        }
8334      
8335            return  ($self->{ct}); # ELEMENT
8336            redo A;
8337          } else {
8338            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8339            ## Stay in the state.
8340            
8341        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8342          $self->{line_prev} = $self->{line};
8343          $self->{column_prev} = $self->{column};
8344          $self->{column}++;
8345          $self->{nc}
8346              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8347        } else {
8348          $self->{set_nc}->($self);
8349        }
8350      
8351            redo A;
8352          }
8353        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8354          if ($is_space->{$self->{nc}}) {
8355            ## Stay in the state.
8356            
8357        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8358          $self->{line_prev} = $self->{line};
8359          $self->{column_prev} = $self->{column};
8360          $self->{column}++;
8361          $self->{nc}
8362              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8363        } else {
8364          $self->{set_nc}->($self);
8365        }
8366      
8367            redo A;
8368          } elsif ($self->{nc} == 0x007C or # |
8369                   $self->{nc} == 0x002C) { # ,
8370            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8371            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8372            
8373        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8374          $self->{line_prev} = $self->{line};
8375          $self->{column_prev} = $self->{column};
8376          $self->{column}++;
8377          $self->{nc}
8378              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8379        } else {
8380          $self->{set_nc}->($self);
8381        }
8382      
8383            redo A;
8384          } elsif ($self->{nc} == 0x0029) { # )
8385            $self->{group_depth}--;
8386            push @{$self->{ct}->{content}}, chr $self->{nc};
8387            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8388            
8389        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8390          $self->{line_prev} = $self->{line};
8391          $self->{column_prev} = $self->{column};
8392          $self->{column}++;
8393          $self->{nc}
8394              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8395        } else {
8396          $self->{set_nc}->($self);
8397        }
8398      
8399            redo A;
8400          } elsif ($self->{nc} == 0x003E) { # >
8401            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8402            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8403            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8404            
8405        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8406          $self->{line_prev} = $self->{line};
8407          $self->{column_prev} = $self->{column};
8408          $self->{column}++;
8409          $self->{nc}
8410              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8411        } else {
8412          $self->{set_nc}->($self);
8413        }
8414      
8415            return  ($self->{ct}); # ELEMENT
8416            redo A;
8417          } elsif ($self->{nc} == -1) {
8418            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8419            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8420            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8421            
8422        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8423          $self->{line_prev} = $self->{line};
8424          $self->{column_prev} = $self->{column};
8425          $self->{column}++;
8426          $self->{nc}
8427              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8428        } else {
8429          $self->{set_nc}->($self);
8430        }
8431      
8432            return  ($self->{ct}); # ELEMENT
8433            redo A;
8434          } else {
8435            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8436            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8437            $self->{state} = BOGUS_MD_STATE;
8438            
8439        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8440          $self->{line_prev} = $self->{line};
8441          $self->{column_prev} = $self->{column};
8442          $self->{column}++;
8443          $self->{nc}
8444              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8445        } else {
8446          $self->{set_nc}->($self);
8447        }
8448      
8449            redo A;
8450          }
8451        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8452          if ($is_space->{$self->{nc}}) {
8453            if ($self->{group_depth}) {
8454              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8455            } else {
8456              $self->{state} = AFTER_MD_DEF_STATE;
8457            }
8458            
8459        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8460          $self->{line_prev} = $self->{line};
8461          $self->{column_prev} = $self->{column};
8462          $self->{column}++;
8463          $self->{nc}
8464              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8465        } else {
8466          $self->{set_nc}->($self);
8467        }
8468      
8469            redo A;
8470          } elsif ($self->{nc} == 0x002A or # *
8471                   $self->{nc} == 0x002B or # +
8472                   $self->{nc} == 0x003F) { # ?
8473            push @{$self->{ct}->{content}}, chr $self->{nc};
8474            if ($self->{group_depth}) {
8475              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8476            } else {
8477              $self->{state} = AFTER_MD_DEF_STATE;
8478            }
8479            
8480        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8481          $self->{line_prev} = $self->{line};
8482          $self->{column_prev} = $self->{column};
8483          $self->{column}++;
8484          $self->{nc}
8485              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8486        } else {
8487          $self->{set_nc}->($self);
8488        }
8489      
8490            redo A;
8491          } elsif ($self->{nc} == 0x0029) { # )
8492            if ($self->{group_depth}) {
8493              $self->{group_depth}--;
8494              push @{$self->{ct}->{content}}, chr $self->{nc};
8495              ## Stay in the state.
8496              
8497        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8498          $self->{line_prev} = $self->{line};
8499          $self->{column_prev} = $self->{column};
8500          $self->{column}++;
8501          $self->{nc}
8502              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8503        } else {
8504          $self->{set_nc}->($self);
8505        }
8506      
8507              redo A;
8508            } else {
8509              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8510              $self->{state} = BOGUS_MD_STATE;
8511              ## Reconsume.
8512              redo A;
8513            }
8514          } elsif ($self->{nc} == 0x003E) { # >
8515            if ($self->{group_depth}) {
8516              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8517              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8518            }
8519            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8520            
8521        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8522          $self->{line_prev} = $self->{line};
8523          $self->{column_prev} = $self->{column};
8524          $self->{column}++;
8525          $self->{nc}
8526              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8527        } else {
8528          $self->{set_nc}->($self);
8529        }
8530      
8531            return  ($self->{ct}); # ELEMENT
8532            redo A;
8533          } elsif ($self->{nc} == -1) {
8534            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8535            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8536            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8537            
8538        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8539          $self->{line_prev} = $self->{line};
8540          $self->{column_prev} = $self->{column};
8541          $self->{column}++;
8542          $self->{nc}
8543              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8544        } else {
8545          $self->{set_nc}->($self);
8546        }
8547      
8548            return  ($self->{ct}); # ELEMENT
8549            redo A;
8550          } else {
8551            if ($self->{group_depth}) {
8552              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8553            } else {
8554              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8555              $self->{state} = BOGUS_MD_STATE;
8556            }
8557            ## Reconsume.
8558            redo A;
8559          }
8560        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8561          if ($is_space->{$self->{nc}}) {
8562            ## Stay in the state.
8563            
8564        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8565          $self->{line_prev} = $self->{line};
8566          $self->{column_prev} = $self->{column};
8567          $self->{column}++;
8568          $self->{nc}
8569              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8570        } else {
8571          $self->{set_nc}->($self);
8572        }
8573      
8574            redo A;
8575          } elsif ($self->{nc} == 0x003E) { # >
8576            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8577            
8578        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8579          $self->{line_prev} = $self->{line};
8580          $self->{column_prev} = $self->{column};
8581          $self->{column}++;
8582          $self->{nc}
8583              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8584        } else {
8585          $self->{set_nc}->($self);
8586        }
8587      
8588            return  ($self->{ct}); # ENTITY/ELEMENT
8589            redo A;
8590          } elsif ($self->{nc} == -1) {
8591            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8592            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8593            
8594        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8595          $self->{line_prev} = $self->{line};
8596          $self->{column_prev} = $self->{column};
8597          $self->{column}++;
8598          $self->{nc}
8599              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8600        } else {
8601          $self->{set_nc}->($self);
8602        }
8603      
8604            return  ($self->{ct}); # ENTITY/ELEMENT
8605            redo A;
8606          } else {
8607            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8608            $self->{state} = BOGUS_MD_STATE;
8609            ## Reconsume.
8610            redo A;
8611          }
8612        } elsif ($self->{state} == BOGUS_MD_STATE) {
8613          if ($self->{nc} == 0x003E) { # >
8614            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8615            
8616        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8617          $self->{line_prev} = $self->{line};
8618          $self->{column_prev} = $self->{column};
8619          $self->{column}++;
8620          $self->{nc}
8621              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8622        } else {
8623          $self->{set_nc}->($self);
8624        }
8625      
8626            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8627            redo A;
8628          } elsif ($self->{nc} == -1) {
8629            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8630            ## Reconsume.
8631            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8632            redo A;
8633          } else {
8634            ## Stay in the state.
8635            
8636        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8637          $self->{line_prev} = $self->{line};
8638          $self->{column_prev} = $self->{column};
8639          $self->{column}++;
8640          $self->{nc}
8641              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8642        } else {
8643          $self->{set_nc}->($self);
8644        }
8645      
8646            redo A;
8647          }
8648      } else {      } else {
8649        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8650      }      }
# Line 4712  sub _get_next_token ($) { Line 8655  sub _get_next_token ($) {
8655    
8656  1;  1;
8657  ## $Date$  ## $Date$
8658                                    

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.27

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24