/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12 by wakaba, Wed Oct 15 12:49:49 2008 UTC revision 1.29 by wakaba, Sun Aug 16 04:06:34 2009 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
# Line 43  sub END_OF_FILE_TOKEN () { 5 } Line 55  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } ## NOTE: XML only.  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65  ## XML5: XML5 has "empty tag token".  In this implementation, it is  ## XML5: XML5 has "empty tag token".  In this implementation, it is
66  ## represented as a start tag token with $self->{self_closing} flag  ## represented as a start tag token with $self->{self_closing} flag
# Line 133  sub PI_AFTER_STATE () { 55 } Line 151  sub PI_AFTER_STATE () { 55 }
151  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
152  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
# Line 1679  sub _get_next_token ($) { Line 1740  sub _get_next_token ($) {
1740    
1741          redo A;          redo A;
1742        } else {        } else {
1743          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, <
1744                        
1745            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1746            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
# Line 1706  sub _get_next_token ($) { Line 1767  sub _get_next_token ($) {
1767          redo A;          redo A;
1768        }        }
1769      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1770        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1771          ## ATTLIST attribute value double quoted state".
1772                
1773        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1774                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1775          ## XML5: "Tag attribute name before state".            
1776          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1777              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1778              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1779            } else {
1780              
1781              ## XML5: "Tag attribute name before state".
1782              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1783            }
1784                    
1785      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1786        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1747  sub _get_next_token ($) { Line 1816  sub _get_next_token ($) {
1816      }      }
1817        
1818          redo A;          redo A;
1819          } elsif ($self->{is_xml} and
1820                   $is_space->{$self->{nc}}) {
1821            
1822            $self->{ca}->{value} .= ' ';
1823            ## Stay in the state.
1824            
1825        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1826          $self->{line_prev} = $self->{line};
1827          $self->{column_prev} = $self->{column};
1828          $self->{column}++;
1829          $self->{nc}
1830              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1831        } else {
1832          $self->{set_nc}->($self);
1833        }
1834      
1835            redo A;
1836        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1837          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1838          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1839                        
1840            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1841    
1842              $self->{state} = DATA_STATE;
1843              $self->{s_kwd} = '';
1844              ## reconsume
1845              return  ($self->{ct}); # start tag
1846              redo A;
1847          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1848            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1849            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1761  sub _get_next_token ($) { Line 1853  sub _get_next_token ($) {
1853              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1854                            
1855            }            }
1856    
1857              $self->{state} = DATA_STATE;
1858              $self->{s_kwd} = '';
1859              ## reconsume
1860              return  ($self->{ct}); # end tag
1861              redo A;
1862            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1863              ## XML5: No parse error above; not defined yet.
1864              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1865              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1866              ## Reconsume.
1867              return  ($self->{ct}); # ATTLIST
1868              redo A;
1869          } else {          } else {
1870            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1871          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1872        } else {        } else {
1873            ## XML5 [ATTLIST]: Not defined yet.
1874          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1875                        
1876            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1781  sub _get_next_token ($) { Line 1880  sub _get_next_token ($) {
1880          }          }
1881          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1882          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1883                                q["&<],                                qq["&<\x09\x0C\x20],
1884                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1885    
1886          ## Stay in the state          ## Stay in the state
# Line 1799  sub _get_next_token ($) { Line 1898  sub _get_next_token ($) {
1898          redo A;          redo A;
1899        }        }
1900      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1901        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1902          ## ATTLIST attribute value single quoted state".
1903    
1904        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1905                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1906          ## XML5: "Before attribute name state" (sic).            
1907          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1908              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1909              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1910            } else {
1911              
1912              ## XML5: "Before attribute name state" (sic).
1913              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1914            }
1915                    
1916      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1917        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1840  sub _get_next_token ($) { Line 1947  sub _get_next_token ($) {
1947      }      }
1948        
1949          redo A;          redo A;
1950          } elsif ($self->{is_xml} and
1951                   $is_space->{$self->{nc}}) {
1952            
1953            $self->{ca}->{value} .= ' ';
1954            ## Stay in the state.
1955            
1956        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1957          $self->{line_prev} = $self->{line};
1958          $self->{column_prev} = $self->{column};
1959          $self->{column}++;
1960          $self->{nc}
1961              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1962        } else {
1963          $self->{set_nc}->($self);
1964        }
1965      
1966            redo A;
1967        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1968          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1969          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1970                        
1971            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1972    
1973              $self->{state} = DATA_STATE;
1974              $self->{s_kwd} = '';
1975              ## reconsume
1976              return  ($self->{ct}); # start tag
1977              redo A;
1978          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1979            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1980            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1854  sub _get_next_token ($) { Line 1984  sub _get_next_token ($) {
1984              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1985                            
1986            }            }
1987    
1988              $self->{state} = DATA_STATE;
1989              $self->{s_kwd} = '';
1990              ## reconsume
1991              return  ($self->{ct}); # end tag
1992              redo A;
1993            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1994              ## XML5: No parse error above; not defined yet.
1995              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1996              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1997              ## Reconsume.
1998              return  ($self->{ct}); # ATTLIST
1999              redo A;
2000          } else {          } else {
2001            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2002          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2003        } else {        } else {
2004            ## XML5 [ATTLIST]: Not defined yet.
2005          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
2006                        
2007            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1874  sub _get_next_token ($) { Line 2011  sub _get_next_token ($) {
2011          }          }
2012          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2013          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2014                                q['&<],                                qq['&<\x09\x0C\x20],
2015                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2016    
2017          ## Stay in the state          ## Stay in the state
# Line 1895  sub _get_next_token ($) { Line 2032  sub _get_next_token ($) {
2032        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
2033    
2034        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2035                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2036          ## XML5: "Tag attribute name before state".            
2037          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2038              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2039            } else {
2040              
2041              ## XML5: "Tag attribute name before state".
2042              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2043            }
2044                    
2045      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2046        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1938  sub _get_next_token ($) { Line 2081  sub _get_next_token ($) {
2081          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2082                        
2083            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2084    
2085              $self->{state} = DATA_STATE;
2086              $self->{s_kwd} = '';
2087              
2088        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2089          $self->{line_prev} = $self->{line};
2090          $self->{column_prev} = $self->{column};
2091          $self->{column}++;
2092          $self->{nc}
2093              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2094        } else {
2095          $self->{set_nc}->($self);
2096        }
2097      
2098              return  ($self->{ct}); # start tag
2099              redo A;
2100          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2101            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2102            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1947  sub _get_next_token ($) { Line 2106  sub _get_next_token ($) {
2106              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2107                            
2108            }            }
2109          } else {  
2110            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2111          }            $self->{s_kwd} = '';
2112          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2113      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2114        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2115        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1963  sub _get_next_token ($) { Line 2120  sub _get_next_token ($) {
2120        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2121      }      }
2122        
2123              return  ($self->{ct}); # end tag
2124          return  ($self->{ct}); # start tag or end tag            redo A;
2125            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2126          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2127              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2128              
2129        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2130          $self->{line_prev} = $self->{line};
2131          $self->{column_prev} = $self->{column};
2132          $self->{column}++;
2133          $self->{nc}
2134              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2135        } else {
2136          $self->{set_nc}->($self);
2137        }
2138      
2139              return  ($self->{ct}); # ATTLIST
2140              redo A;
2141            } else {
2142              die "$0: $self->{ct}->{type}: Unknown token type";
2143            }
2144        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2145          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2146                        
2147              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2148            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2149    
2150              $self->{state} = DATA_STATE;
2151              $self->{s_kwd} = '';
2152              ## reconsume
2153              return  ($self->{ct}); # start tag
2154              redo A;
2155          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2156              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2157            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2158            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2159                            
# Line 1981  sub _get_next_token ($) { Line 2162  sub _get_next_token ($) {
2162              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2163                            
2164            }            }
2165    
2166              $self->{state} = DATA_STATE;
2167              $self->{s_kwd} = '';
2168              ## reconsume
2169              return  ($self->{ct}); # end tag
2170              redo A;
2171            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2172              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2173              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2174              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2175              ## Reconsume.
2176              return  ($self->{ct}); # ATTLIST
2177              redo A;
2178          } else {          } else {
2179            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2180          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2181        } else {        } else {
2182          if ({          if ({
2183               0x0022 => 1, # "               0x0022 => 1, # "
2184               0x0027 => 1, # '               0x0027 => 1, # '
2185               0x003D => 1, # =               0x003D => 1, # =
2186                 0x003C => 1, # <
2187              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2188                        
2189            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 2005  sub _get_next_token ($) { Line 2193  sub _get_next_token ($) {
2193          }          }
2194          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2195          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2196                                q["'=& >],                                qq["'=& \x09\x0C>],
2197                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2198    
2199          ## Stay in the state          ## Stay in the state
# Line 2183  sub _get_next_token ($) { Line 2371  sub _get_next_token ($) {
2371          redo A;          redo A;
2372        }        }
2373      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2374        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2375    
2376        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2377        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2378                
2379        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2380                    if ($self->{in_subset}) {
2381          $self->{state} = DATA_STATE;            
2382          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2383            } else {
2384              
2385              $self->{state} = DATA_STATE;
2386              $self->{s_kwd} = '';
2387            }
2388                    
2389      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2390        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2207  sub _get_next_token ($) { Line 2400  sub _get_next_token ($) {
2400          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2401          redo A;          redo A;
2402        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2403                    if ($self->{in_subset}) {
2404          $self->{state} = DATA_STATE;            
2405          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2406            } else {
2407              
2408              $self->{state} = DATA_STATE;
2409              $self->{s_kwd} = '';
2410            }
2411          ## reconsume          ## reconsume
2412    
2413          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2236  sub _get_next_token ($) { Line 2434  sub _get_next_token ($) {
2434          redo A;          redo A;
2435        }        }
2436      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2437        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2438                
2439        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2440                    
# Line 2502  sub _get_next_token ($) { Line 2700  sub _get_next_token ($) {
2700        
2701          redo A;          redo A;
2702        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2703          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2704          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2705          $self->{s_kwd} = '';            
2706              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2707            } else {
2708              
2709              $self->{state} = DATA_STATE;
2710              $self->{s_kwd} = '';
2711            }
2712                    
2713      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2714        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2522  sub _get_next_token ($) { Line 2725  sub _get_next_token ($) {
2725    
2726          redo A;          redo A;
2727        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2728          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2729          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2730          $self->{s_kwd} = '';            
2731              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2732            } else {
2733              
2734              $self->{state} = DATA_STATE;
2735              $self->{s_kwd} = '';
2736            }
2737          ## reconsume          ## reconsume
2738    
2739          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2566  sub _get_next_token ($) { Line 2774  sub _get_next_token ($) {
2774        
2775          redo A;          redo A;
2776        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2777          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2778          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2779          $self->{s_kwd} = '';            
2780              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2781            } else {
2782              
2783              $self->{state} = DATA_STATE;
2784              $self->{s_kwd} = '';
2785            }
2786                    
2787      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2788        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2586  sub _get_next_token ($) { Line 2799  sub _get_next_token ($) {
2799    
2800          redo A;          redo A;
2801        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2802          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2803          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2804          $self->{s_kwd} = '';            
2805              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2806            } else {
2807              
2808              $self->{state} = DATA_STATE;
2809              $self->{s_kwd} = '';
2810            }
2811          ## reconsume          ## reconsume
2812    
2813          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2614  sub _get_next_token ($) { Line 2832  sub _get_next_token ($) {
2832          redo A;          redo A;
2833        }        }
2834      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2835          ## XML5: "Comment state" and "DOCTYPE comment state".
2836    
2837        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2838                    
2839          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2630  sub _get_next_token ($) { Line 2850  sub _get_next_token ($) {
2850        
2851          redo A;          redo A;
2852        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2853          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2854          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2855          $self->{s_kwd} = '';            
2856              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2857            } else {
2858              
2859              $self->{state} = DATA_STATE;
2860              $self->{s_kwd} = '';
2861            }
2862          ## reconsume          ## reconsume
2863    
2864          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2661  sub _get_next_token ($) { Line 2886  sub _get_next_token ($) {
2886          redo A;          redo A;
2887        }        }
2888      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2889        ## XML5: "comment dash state".        ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2890    
2891        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2892                    
# Line 2679  sub _get_next_token ($) { Line 2904  sub _get_next_token ($) {
2904        
2905          redo A;          redo A;
2906        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2907          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2908          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2909          $self->{s_kwd} = '';            
2910              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2911            } else {
2912              
2913              $self->{state} = DATA_STATE;
2914              $self->{s_kwd} = '';
2915            }
2916          ## reconsume          ## reconsume
2917    
2918          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2706  sub _get_next_token ($) { Line 2936  sub _get_next_token ($) {
2936          redo A;          redo A;
2937        }        }
2938      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2939          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2940    
2941        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2942                    if ($self->{in_subset}) {
2943          $self->{state} = DATA_STATE;            
2944          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2945            } else {
2946              
2947              $self->{state} = DATA_STATE;
2948              $self->{s_kwd} = '';
2949            }
2950                    
2951      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2952        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2746  sub _get_next_token ($) { Line 2983  sub _get_next_token ($) {
2983        
2984          redo A;          redo A;
2985        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2986          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2987          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2988          $self->{s_kwd} = '';            
2989              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2990            } else {
2991              
2992              $self->{state} = DATA_STATE;
2993              $self->{s_kwd} = '';
2994            }
2995          ## reconsume          ## reconsume
2996    
2997          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2757  sub _get_next_token ($) { Line 2999  sub _get_next_token ($) {
2999          redo A;          redo A;
3000        } else {        } else {
3001                    
         ## XML5: Not a parse error.  
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',  
                         line => $self->{line_prev},  
                         column => $self->{column_prev});  
3002          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3003          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3004                    
# Line 2792  sub _get_next_token ($) { Line 3030  sub _get_next_token ($) {
3030      }      }
3031        
3032          redo A;          redo A;
3033          } elsif ($self->{nc} == -1) {
3034            
3035            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3036            $self->{ct}->{quirks} = 1;
3037    
3038            $self->{state} = DATA_STATE;
3039            ## Reconsume.
3040            return  ($self->{ct}); # DOCTYPE (quirks)
3041    
3042            redo A;
3043        } else {        } else {
3044                    
3045          ## XML5: Unless EOF, swith to the bogus comment state.          ## XML5: Swith to the bogus comment state.
3046          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3047          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3048          ## reconsume          ## reconsume
# Line 2839  sub _get_next_token ($) { Line 3087  sub _get_next_token ($) {
3087          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3088    
3089          redo A;          redo A;
3090          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3091            
3092            $self->{ct}->{name} # DOCTYPE
3093                = chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3094            delete $self->{ct}->{quirks};
3095            $self->{state} = DOCTYPE_NAME_STATE;
3096            
3097        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3098          $self->{line_prev} = $self->{line};
3099          $self->{column_prev} = $self->{column};
3100          $self->{column}++;
3101          $self->{nc}
3102              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3103        } else {
3104          $self->{set_nc}->($self);
3105        }
3106      
3107            redo A;
3108        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3109                    
3110          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
# Line 2853  sub _get_next_token ($) { Line 3119  sub _get_next_token ($) {
3119                    
3120          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3121          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3122            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3123            $self->{in_subset} = 1;
3124                    
3125      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3126        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2864  sub _get_next_token ($) { Line 3132  sub _get_next_token ($) {
3132        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3133      }      }
3134        
3135            return  ($self->{ct}); # DOCTYPE
3136          redo A;          redo A;
3137        } else {        } else {
3138                    
# Line 2922  sub _get_next_token ($) { Line 3191  sub _get_next_token ($) {
3191          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3192    
3193          redo A;          redo A;
3194          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3195            
3196            $self->{ct}->{name} # DOCTYPE
3197                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3198            delete $self->{ct}->{quirks};
3199            ## Stay in the state.
3200            
3201        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3202          $self->{line_prev} = $self->{line};
3203          $self->{column_prev} = $self->{column};
3204          $self->{column}++;
3205          $self->{nc}
3206              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3207        } else {
3208          $self->{set_nc}->($self);
3209        }
3210      
3211            redo A;
3212        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3213                    
3214          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
# Line 2936  sub _get_next_token ($) { Line 3223  sub _get_next_token ($) {
3223        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3224                    
3225          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3226            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3227            $self->{in_subset} = 1;
3228                    
3229      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3230        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2947  sub _get_next_token ($) { Line 3236  sub _get_next_token ($) {
3236        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3237      }      }
3238        
3239            return  ($self->{ct}); # DOCTYPE
3240          redo A;          redo A;
3241        } else {        } else {
3242                    
3243          $self->{ct}->{name}          $self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE
3244            .= chr ($self->{nc}); # DOCTYPE          ## Stay in the state.
         ## Stay in the state  
3245                    
3246      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3247        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2986  sub _get_next_token ($) { Line 3275  sub _get_next_token ($) {
3275        
3276          redo A;          redo A;
3277        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3278            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3279              
3280              $self->{state} = DATA_STATE;
3281              $self->{s_kwd} = '';
3282            } else {
3283              
3284              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3285              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3286            }
3287                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3288                    
3289      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3290        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3000  sub _get_next_token ($) { Line 3296  sub _get_next_token ($) {
3296        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3297      }      }
3298        
3299            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3300          redo A;          redo A;
3301        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3302            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3303              
3304              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3305              $self->{state} = DATA_STATE;
3306              $self->{s_kwd} = '';
3307              $self->{ct}->{quirks} = 1;
3308            } else {
3309              
3310              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3311              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3312            }
3313                    
3314          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3315          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3316          redo A;          redo A;
3317        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3318                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
# Line 3049  sub _get_next_token ($) { Line 3348  sub _get_next_token ($) {
3348      }      }
3349        
3350          redo A;          redo A;
3351        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{nc} == 0x0022 and # "
3352                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3353                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3354            
3355            $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3356            $self->{ct}->{value} = ''; # ENTITY
3357            
3358        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3359          $self->{line_prev} = $self->{line};
3360          $self->{column_prev} = $self->{column};
3361          $self->{column}++;
3362          $self->{nc}
3363              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3364        } else {
3365          $self->{set_nc}->($self);
3366        }
3367      
3368            redo A;
3369          } elsif ($self->{nc} == 0x0027 and # '
3370                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3371                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3372            
3373            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3374            $self->{ct}->{value} = ''; # ENTITY
3375            
3376        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3377          $self->{line_prev} = $self->{line};
3378          $self->{column_prev} = $self->{column};
3379          $self->{column}++;
3380          $self->{nc}
3381              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3382        } else {
3383          $self->{set_nc}->($self);
3384        }
3385      
3386            redo A;
3387          } elsif ($self->{is_xml} and
3388                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3389                   $self->{nc} == 0x005B) { # [
3390                    
3391          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3392          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3393            $self->{in_subset} = 1;
3394                    
3395      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3396        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3064  sub _get_next_token ($) { Line 3402  sub _get_next_token ($) {
3402        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3403      }      }
3404        
3405            return  ($self->{ct}); # DOCTYPE
3406          redo A;          redo A;
3407        } else {        } else {
3408                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3409          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');  
3410          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3411              
3412              $self->{ct}->{quirks} = 1;
3413              $self->{state} = BOGUS_DOCTYPE_STATE;
3414            } else {
3415              
3416              $self->{state} = BOGUS_MD_STATE;
3417            }
3418    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3419                    
3420      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3421        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3142  sub _get_next_token ($) { Line 3487  sub _get_next_token ($) {
3487        
3488          redo A;          redo A;
3489        } else {        } else {
3490                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3491                          line => $self->{line_prev},                          line => $self->{line_prev},
3492                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3493          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3494              
3495          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3496              $self->{state} = BOGUS_DOCTYPE_STATE;
3497            } else {
3498              
3499              $self->{state} = BOGUS_MD_STATE;
3500            }
3501          ## Reconsume.          ## Reconsume.
3502          redo A;          redo A;
3503        }        }
# Line 3210  sub _get_next_token ($) { Line 3559  sub _get_next_token ($) {
3559        
3560          redo A;          redo A;
3561        } else {        } else {
3562                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3563                          line => $self->{line_prev},                          line => $self->{line_prev},
3564                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3565          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3566              
3567          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3568              $self->{state} = BOGUS_DOCTYPE_STATE;
3569            } else {
3570              
3571              $self->{state} = BOGUS_MD_STATE;
3572            }
3573          ## Reconsume.          ## Reconsume.
3574          redo A;          redo A;
3575        }        }
# Line 3269  sub _get_next_token ($) { Line 3622  sub _get_next_token ($) {
3622        
3623          redo A;          redo A;
3624        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3625          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3626            
3627          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3628          $self->{s_kwd} = '';            
3629              $self->{state} = DATA_STATE;
3630              $self->{s_kwd} = '';
3631              $self->{ct}->{quirks} = 1;
3632            } else {
3633              
3634              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3635            }
3636            
3637                    
3638      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3639        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3285  sub _get_next_token ($) { Line 3645  sub _get_next_token ($) {
3645        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3646      }      }
3647        
3648            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3649          redo A;          redo A;
3650        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3651            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3652              
3653              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3654              $self->{state} = DATA_STATE;
3655              $self->{s_kwd} = '';
3656              $self->{ct}->{quirks} = 1;
3657            } else {
3658              
3659              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3660              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3661            }
3662                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3663          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3664          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3665          redo A;          redo A;
3666        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3667                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3668                   $self->{nc} == 0x005B) { # [
3669                    
3670          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3671          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3672          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3673            $self->{in_subset} = 1;
3674                    
3675      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3676        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3318  sub _get_next_token ($) { Line 3682  sub _get_next_token ($) {
3682        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3683      }      }
3684        
3685            return  ($self->{ct}); # DOCTYPE
3686          redo A;          redo A;
3687        } else {        } else {
           
3688          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3689    
3690          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3691              
3692              $self->{ct}->{quirks} = 1;
3693              $self->{state} = BOGUS_DOCTYPE_STATE;
3694            } else {
3695              
3696              $self->{state} = BOGUS_MD_STATE;
3697            }
3698    
3699                    
3700      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3701        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3355  sub _get_next_token ($) { Line 3726  sub _get_next_token ($) {
3726        
3727          redo A;          redo A;
3728        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3729          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3730    
3731          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3732          $self->{s_kwd} = '';            
3733              $self->{state} = DATA_STATE;
3734              $self->{s_kwd} = '';
3735              $self->{ct}->{quirks} = 1;
3736            } else {
3737              
3738              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3739            }
3740    
3741                    
3742      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3743        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3371  sub _get_next_token ($) { Line 3749  sub _get_next_token ($) {
3749        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3750      }      }
3751        
3752            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3753          redo A;          redo A;
3754        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3755          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3756    
3757          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3758          $self->{s_kwd} = '';            
3759          ## reconsume            $self->{state} = DATA_STATE;
3760              $self->{s_kwd} = '';
3761          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3762            } else {
3763              
3764              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3765            }
3766            
3767            ## Reconsume.
3768          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3769          redo A;          redo A;
3770        } else {        } else {
3771                    
3772          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3773          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3774                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3775    
# Line 3426  sub _get_next_token ($) { Line 3804  sub _get_next_token ($) {
3804        
3805          redo A;          redo A;
3806        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3807          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3808    
3809          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3810          $self->{s_kwd} = '';            
3811              $self->{state} = DATA_STATE;
3812              $self->{s_kwd} = '';
3813              $self->{ct}->{quirks} = 1;
3814            } else {
3815              
3816              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3817            }
3818    
3819                    
3820      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3821        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3442  sub _get_next_token ($) { Line 3827  sub _get_next_token ($) {
3827        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3828      }      }
3829        
3830            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3831          redo A;          redo A;
3832        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3833          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3834    
3835          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3836          $self->{s_kwd} = '';            
3837              $self->{state} = DATA_STATE;
3838              $self->{s_kwd} = '';
3839              $self->{ct}->{quirks} = 1;
3840            } else {
3841              
3842              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3843            }
3844          
3845          ## reconsume          ## reconsume
3846            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3847          redo A;          redo A;
3848        } else {        } else {
3849                    
3850          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3851          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3852                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3853    
# Line 3498  sub _get_next_token ($) { Line 3883  sub _get_next_token ($) {
3883          redo A;          redo A;
3884        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3885                    
3886          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3887          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3888                    
3889      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3514  sub _get_next_token ($) { Line 3899  sub _get_next_token ($) {
3899          redo A;          redo A;
3900        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3901                    
3902          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3903          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3904                    
3905      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3529  sub _get_next_token ($) { Line 3914  sub _get_next_token ($) {
3914        
3915          redo A;          redo A;
3916        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3917          if ($self->{is_xml}) {          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3918                        if ($self->{is_xml}) {
3919            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');              
3920                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3921              } else {
3922                
3923              }
3924              $self->{state} = DATA_STATE;
3925              $self->{s_kwd} = '';
3926          } else {          } else {
3927                        if ($self->{ct}->{type} == NOTATION_TOKEN) {
3928                
3929              } else {
3930                
3931                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3932              }
3933              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3934          }          }
3935          $self->{state} = DATA_STATE;          
         $self->{s_kwd} = '';  
3936                    
3937      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3938        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3548  sub _get_next_token ($) { Line 3944  sub _get_next_token ($) {
3944        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3945      }      }
3946        
3947            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3948          redo A;          redo A;
3949        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3950            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3951              
3952              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3953              
3954              $self->{state} = DATA_STATE;
3955              $self->{s_kwd} = '';
3956              $self->{ct}->{quirks} = 1;
3957            } else {
3958              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3959              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3960            }
3961                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3962          ## reconsume          ## reconsume
3963            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3964          redo A;          redo A;
3965        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3966                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3967                   $self->{nc} == 0x005B) { # [
3968                    
3969          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3970          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3971          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3972            $self->{in_subset} = 1;
3973                    
3974      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3975        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3580  sub _get_next_token ($) { Line 3981  sub _get_next_token ($) {
3981        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3982      }      }
3983        
3984            return  ($self->{ct}); # DOCTYPE
3985          redo A;          redo A;
3986        } else {        } else {
           
3987          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3988    
3989          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3990              
3991              $self->{ct}->{quirks} = 1;
3992              $self->{state} = BOGUS_DOCTYPE_STATE;
3993            } else {
3994              
3995              $self->{state} = BOGUS_MD_STATE;
3996            }
3997    
3998                    
3999      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4000        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3649  sub _get_next_token ($) { Line 4057  sub _get_next_token ($) {
4057        
4058          redo A;          redo A;
4059        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
4060          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4061                    
4062      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4063        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3665  sub _get_next_token ($) { Line 4070  sub _get_next_token ($) {
4070      }      }
4071        
4072    
4073          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4074          return  ($self->{ct}); # DOCTYPE            
4075              $self->{state} = DATA_STATE;
4076              $self->{s_kwd} = '';
4077              $self->{ct}->{quirks} = 1;
4078            } else {
4079              
4080              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4081            }
4082    
4083            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4084          redo A;          redo A;
4085        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4086            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4087              
4088              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4089              $self->{state} = DATA_STATE;
4090              $self->{s_kwd} = '';
4091              $self->{ct}->{quirks} = 1;
4092            } else {
4093              
4094              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4095              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4096            }
4097                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4098          ## reconsume          ## reconsume
4099            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4100          redo A;          redo A;
4101        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4102                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4103                   $self->{nc} == 0x005B) { # [
4104                    
4105          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4106    
4107          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4108          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4109            $self->{in_subset} = 1;
4110                    
4111      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4112        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3698  sub _get_next_token ($) { Line 4118  sub _get_next_token ($) {
4118        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4119      }      }
4120        
4121            return  ($self->{ct}); # DOCTYPE
4122          redo A;          redo A;
4123        } else {        } else {
           
4124          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4125    
4126          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4127                        
4128              $self->{ct}->{quirks} = 1;
4129              $self->{state} = BOGUS_DOCTYPE_STATE;
4130            } else {
4131              
4132              $self->{state} = BOGUS_MD_STATE;
4133            }
4134    
4135                    
4136      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4137        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3735  sub _get_next_token ($) { Line 4162  sub _get_next_token ($) {
4162        
4163          redo A;          redo A;
4164        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4165          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4166    
4167          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4168          $self->{s_kwd} = '';            
4169              $self->{state} = DATA_STATE;
4170              $self->{s_kwd} = '';
4171              $self->{ct}->{quirks} = 1;
4172            } else {
4173              
4174              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4175            }
4176            
4177                    
4178      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4179        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3751  sub _get_next_token ($) { Line 4185  sub _get_next_token ($) {
4185        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4186      }      }
4187        
4188            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4189          redo A;          redo A;
4190        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4191          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4192    
4193          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4194          $self->{s_kwd} = '';            
4195              $self->{state} = DATA_STATE;
4196              $self->{s_kwd} = '';
4197              $self->{ct}->{quirks} = 1;
4198            } else {
4199              
4200              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4201            }
4202            
4203          ## reconsume          ## reconsume
4204            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4205          redo A;          redo A;
4206        } else {        } else {
4207                    
4208          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4209          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4210                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4211    
# Line 3828  sub _get_next_token ($) { Line 4262  sub _get_next_token ($) {
4262    
4263          redo A;          redo A;
4264        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4265          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4266    
4267          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4268          $self->{s_kwd} = '';            
4269          ## reconsume            $self->{state} = DATA_STATE;
4270              $self->{s_kwd} = '';
4271          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4272          return  ($self->{ct}); # DOCTYPE          } else {
4273              
4274              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4275            }
4276    
4277            ## reconsume
4278            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4279          redo A;          redo A;
4280        } else {        } else {
4281                    
4282          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4283          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4284                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4285    
# Line 3862  sub _get_next_token ($) { Line 4299  sub _get_next_token ($) {
4299        }        }
4300      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4301        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4302                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4303          ## Stay in the state            
4304              $self->{state} = BEFORE_NDATA_STATE;
4305            } else {
4306              
4307              ## Stay in the state
4308            }
4309                    
4310      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4311        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3877  sub _get_next_token ($) { Line 4319  sub _get_next_token ($) {
4319        
4320          redo A;          redo A;
4321        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4322            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4323              
4324              $self->{state} = DATA_STATE;
4325              $self->{s_kwd} = '';
4326            } else {
4327              
4328              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4329            }
4330    
4331                    
4332          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4333          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4334          $self->{column_prev} = $self->{column};
4335          $self->{column}++;
4336          $self->{nc}
4337              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4338        } else {
4339          $self->{set_nc}->($self);
4340        }
4341      
4342            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4343            redo A;
4344          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4345                   ($self->{nc} == 0x004E or # N
4346                    $self->{nc} == 0x006E)) { # n
4347            
4348            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4349            $self->{state} = NDATA_STATE;
4350            $self->{kwd} = chr $self->{nc};
4351                    
4352      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4353        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3891  sub _get_next_token ($) { Line 4359  sub _get_next_token ($) {
4359        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4360      }      }
4361        
   
         return  ($self->{ct}); # DOCTYPE  
   
4362          redo A;          redo A;
4363        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4364                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4365          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');            
4366          $self->{state} = DATA_STATE;            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4367          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4368          ## reconsume            $self->{s_kwd} = '';
4369              $self->{ct}->{quirks} = 1;
4370          $self->{ct}->{quirks} = 1;          } else {
4371          return  ($self->{ct}); # DOCTYPE            
4372              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4373              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4374            }
4375    
4376            ## reconsume
4377            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4378          redo A;          redo A;
4379        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4380                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4381                   $self->{nc} == 0x005B) { # [
4382                    
4383          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4384          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4385            $self->{in_subset} = 1;
4386                    
4387      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4388        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3921  sub _get_next_token ($) { Line 4394  sub _get_next_token ($) {
4394        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4395      }      }
4396        
4397            return  ($self->{ct}); # DOCTYPE
4398          redo A;          redo A;
4399        } else {        } else {
           
4400          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
         #$self->{ct}->{quirks} = 1;  
4401    
4402          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4403              
4404              #$self->{ct}->{quirks} = 1;
4405              $self->{state} = BOGUS_DOCTYPE_STATE;
4406            } else {
4407              
4408              $self->{state} = BOGUS_MD_STATE;
4409            }
4410    
4411                    
4412      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4413        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3941  sub _get_next_token ($) { Line 4421  sub _get_next_token ($) {
4421        
4422          redo A;          redo A;
4423        }        }
4424      } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {      } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4425        if ($self->{nc} == 0x003E) { # >        if ($is_space->{$self->{nc}}) {
4426                    
4427          $self->{state} = DATA_STATE;          ## Stay in the state.
         $self->{s_kwd} = '';  
4428                    
4429      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4430        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3957  sub _get_next_token ($) { Line 4436  sub _get_next_token ($) {
4436        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4437      }      }
4438        
   
         return  ($self->{ct}); # DOCTYPE  
   
4439          redo A;          redo A;
4440        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{nc} == 0x003E) { # >
4441          if ($self->{ct}->{has_internal_subset}) { # DOCTYPE          
4442                      $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4443            ## Stay in the state.          
             
4444      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4445        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
4446        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 3976  sub _get_next_token ($) { Line 4451  sub _get_next_token ($) {
4451        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4452      }      }
4453        
4454            redo A;          return  ($self->{ct}); # ENTITY
4455          } else {          redo A;
4456                    } elsif ($self->{nc} == 0x004E or # N
4457            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;                 $self->{nc} == 0x006E) { # n
4458            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          
4459                      $self->{state} = NDATA_STATE;
4460            $self->{kwd} = chr $self->{nc};
4461            
4462      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4463        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
4464        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 3992  sub _get_next_token ($) { Line 4469  sub _get_next_token ($) {
4469        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4470      }      }
4471        
4472            redo A;          redo A;
4473          }        } elsif ($self->{nc} == -1) {
4474            
4475            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4476            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4477            ## reconsume
4478            return  ($self->{ct}); # ENTITY
4479            redo A;
4480          } else {
4481            
4482            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4483            $self->{state} = BOGUS_MD_STATE;
4484            
4485        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4486          $self->{line_prev} = $self->{line};
4487          $self->{column_prev} = $self->{column};
4488          $self->{column}++;
4489          $self->{nc}
4490              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4491        } else {
4492          $self->{set_nc}->($self);
4493        }
4494      
4495            redo A;
4496          }
4497        } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
4498          if ($self->{nc} == 0x003E) { # >
4499            
4500            $self->{state} = DATA_STATE;
4501            $self->{s_kwd} = '';
4502            
4503        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4504          $self->{line_prev} = $self->{line};
4505          $self->{column_prev} = $self->{column};
4506          $self->{column}++;
4507          $self->{nc}
4508              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4509        } else {
4510          $self->{set_nc}->($self);
4511        }
4512      
4513    
4514            return  ($self->{ct}); # DOCTYPE
4515    
4516            redo A;
4517          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4518            
4519            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4520            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4521            $self->{in_subset} = 1;
4522            
4523        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4524          $self->{line_prev} = $self->{line};
4525          $self->{column_prev} = $self->{column};
4526          $self->{column}++;
4527          $self->{nc}
4528              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4529        } else {
4530          $self->{set_nc}->($self);
4531        }
4532      
4533            return  ($self->{ct}); # DOCTYPE
4534            redo A;
4535        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4536                    
4537          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 4166  sub _get_next_token ($) { Line 4704  sub _get_next_token ($) {
4704              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4705              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4706            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4707                    if ($self->{is_xml}) {
4708              
4709              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4710                              line => $self->{line_prev},
4711                              column => $self->{column_prev}
4712                                  + ($self->{nc} == -1 ? 1 : 0));
4713            } else {
4714              
4715              ## No error
4716            }
4717          ## Don't consume          ## Don't consume
         ## No error  
4718          ## Return nothing.          ## Return nothing.
4719          #          #
4720        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
# Line 4187  sub _get_next_token ($) { Line 4733  sub _get_next_token ($) {
4733      }      }
4734        
4735          redo A;          redo A;
4736        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4737                   (0x0041 <= $self->{nc} and
4738                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4739                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4740                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
# Line 4241  sub _get_next_token ($) { Line 4788  sub _get_next_token ($) {
4788          redo A;          redo A;
4789        }        }
4790      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4791        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
           $self->{nc} == 0x0058) { # X  
4792                    
4793          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4794          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
# Line 4258  sub _get_next_token ($) { Line 4804  sub _get_next_token ($) {
4804      }      }
4805        
4806          redo A;          redo A;
4807          } elsif ($self->{nc} == 0x0058) { # X
4808            
4809            if ($self->{is_xml}) {
4810              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4811            }
4812            $self->{state} = HEXREF_X_STATE;
4813            $self->{kwd} .= chr $self->{nc};
4814            
4815        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4816          $self->{line_prev} = $self->{line};
4817          $self->{column_prev} = $self->{column};
4818          $self->{column}++;
4819          $self->{nc}
4820              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4821        } else {
4822          $self->{set_nc}->($self);
4823        }
4824      
4825            redo A;
4826        } elsif (0x0030 <= $self->{nc} and        } elsif (0x0030 <= $self->{nc} and
4827                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4828                    
# Line 4348  sub _get_next_token ($) { Line 4913  sub _get_next_token ($) {
4913        my $code = $self->{kwd};        my $code = $self->{kwd};
4914        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4915        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4916        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
4917              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
4918              ($self->{is_xml} and $code == 0x0000)) {
4919                    
4920          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
4921                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4501  sub _get_next_token ($) { Line 5068  sub _get_next_token ($) {
5068        my $code = $self->{kwd};        my $code = $self->{kwd};
5069        my $l = $self->{line_prev};        my $l = $self->{line_prev};
5070        my $c = $self->{column_prev};        my $c = $self->{column_prev};
5071        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
5072              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
5073              ($self->{is_xml} and $code == 0x0000)) {
5074                    
5075          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
5076                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4535  sub _get_next_token ($) { Line 5104  sub _get_next_token ($) {
5104          redo A;          redo A;
5105        }        }
5106      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5107        if (length $self->{kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5108            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5109            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5110              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5111             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5112              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5113             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5114              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5115             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5116                    {
5117                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5118                      $self->{entity_add} => 1,
5119                    }->{$self->{nc}}))) {
5120          our $EntityChar;          our $EntityChar;
5121          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5122          if (defined $EntityChar->{$self->{kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5123                $self->{ge}->{$self->{kwd}}) {
5124            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5125                            if (defined $self->{ge}->{$self->{kwd}}) {
5126              $self->{entity__value} = $EntityChar->{$self->{kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5127                    
5128                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5129                  } else {
5130                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5131                      
5132                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5133                                      value => $self->{kwd});
5134                    } else {
5135                      
5136                    }
5137                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5138                  }
5139                } else {
5140                  if ($self->{is_xml}) {
5141                    
5142                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5143                                    value => $self->{kwd},
5144                                    level => {
5145                                              'amp;' => $self->{level}->{warn},
5146                                              'quot;' => $self->{level}->{warn},
5147                                              'lt;' => $self->{level}->{warn},
5148                                              'gt;' => $self->{level}->{warn},
5149                                              'apos;' => $self->{level}->{warn},
5150                                             }->{$self->{kwd}} ||
5151                                             $self->{level}->{must});
5152                  } else {
5153                    
5154                  }
5155                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5156                }
5157              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5158                            
5159      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4665  sub _get_next_token ($) { Line 5269  sub _get_next_token ($) {
5269      ## XML-only states      ## XML-only states
5270    
5271      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
5272          ## XML5: "Pi state" and "DOCTYPE pi state".
5273    
5274        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
5275            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
5276            $self->{nc} == -1) {            $self->{nc} == -1) {
5277            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5278            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5279            ## "DOCTYPE pi state": Parse error, switch to the "data
5280            ## state".
5281          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5282                          line => $self->{line_prev},                          line => $self->{line_prev},
5283                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 4682  sub _get_next_token ($) { Line 5292  sub _get_next_token ($) {
5292                        };                        };
5293          redo A;          redo A;
5294        } else {        } else {
5295            ## XML5: "DOCTYPE pi state": Stay in the state.
5296          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
5297                         target => chr $self->{nc},                         target => chr $self->{nc},
5298                         data => '',                         data => '',
# Line 4719  sub _get_next_token ($) { Line 5330  sub _get_next_token ($) {
5330          redo A;          redo A;
5331        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5332          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5333          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5334          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5335            } else {
5336              $self->{state} = DATA_STATE;
5337              $self->{s_kwd} = '';
5338            }
5339          ## Reconsume.          ## Reconsume.
5340          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5341          redo A;          redo A;
# Line 4791  sub _get_next_token ($) { Line 5406  sub _get_next_token ($) {
5406          redo A;          redo A;
5407        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5408          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5409          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5410          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5411            } else {
5412              $self->{state} = DATA_STATE;
5413              $self->{s_kwd} = '';
5414            }
5415          ## Reprocess.          ## Reprocess.
5416          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5417          redo A;          redo A;
# Line 4816  sub _get_next_token ($) { Line 5435  sub _get_next_token ($) {
5435          redo A;          redo A;
5436        }        }
5437      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
5438          ## XML5: Part of "Pi after state".
5439    
5440        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5441          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5442          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5443            } else {
5444              $self->{state} = DATA_STATE;
5445              $self->{s_kwd} = '';
5446            }
5447                    
5448      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5449        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4861  sub _get_next_token ($) { Line 5486  sub _get_next_token ($) {
5486          redo A;          redo A;
5487        }        }
5488      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5489        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5490    
5491        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5492          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5493          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5494            } else {
5495              $self->{state} = DATA_STATE;
5496              $self->{s_kwd} = '';
5497            }
5498                    
5499      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5500        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4902  sub _get_next_token ($) { Line 5532  sub _get_next_token ($) {
5532    
5533      } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {      } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5534        if ($self->{nc} == 0x003C) { # <        if ($self->{nc} == 0x003C) { # <
5535          ## TODO:          $self->{state} = DOCTYPE_TAG_STATE;
5536                    
5537      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5538        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4919  sub _get_next_token ($) { Line 5549  sub _get_next_token ($) {
5549          ## XML5: Not defined yet.          ## XML5: Not defined yet.
5550    
5551          ## TODO:          ## TODO:
5552    
5553            if (not $self->{stop_processing} and
5554                not $self->{document}->xml_standalone) {
5555              $self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type
5556                              level => $self->{level}->{info});
5557              $self->{stop_processing} = 1;
5558            }
5559    
5560                    
5561      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5562        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4932  sub _get_next_token ($) { Line 5570  sub _get_next_token ($) {
5570        
5571          redo A;          redo A;
5572        } elsif ($self->{nc} == 0x005D) { # ]        } elsif ($self->{nc} == 0x005D) { # ]
5573            delete $self->{in_subset};
5574          $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5575                    
5576      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4961  sub _get_next_token ($) { Line 5600  sub _get_next_token ($) {
5600          redo A;          redo A;
5601        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5602          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5603            delete $self->{in_subset};
5604          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
5605          $self->{s_kwd} = '';          $self->{s_kwd} = '';
5606          ## Reconsume.          ## Reconsume.
5607          return  ($self->{ct}); # DOCTYPE          return  ({type => END_OF_DOCTYPE_TOKEN});
5608          redo A;          redo A;
5609        } else {        } else {
5610          unless ($self->{internal_subset_tainted}) {          unless ($self->{internal_subset_tainted}) {
# Line 5001  sub _get_next_token ($) { Line 5641  sub _get_next_token ($) {
5641        $self->{set_nc}->($self);        $self->{set_nc}->($self);
5642      }      }
5643        
5644          return  ($self->{ct}); # DOCTYPE          return  ({type => END_OF_DOCTYPE_TOKEN});
5645          redo A;          redo A;
5646        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5647          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5648          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
5649          $self->{s_kwd} = '';          $self->{s_kwd} = '';
5650          ## Reconsume.          ## Reconsume.
5651          return  ($self->{ct}); # DOCTYPE          return  ({type => END_OF_DOCTYPE_TOKEN});
5652          redo A;          redo A;
5653        } else {        } else {
5654          ## XML5: No parse error and stay in the state.          ## XML5: No parse error and stay in the state.
5655          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5656    
5657          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5658            
5659        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5660          $self->{line_prev} = $self->{line};
5661          $self->{column_prev} = $self->{column};
5662          $self->{column}++;
5663          $self->{nc}
5664              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5665        } else {
5666          $self->{set_nc}->($self);
5667        }
5668      
5669            redo A;
5670          }
5671        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5672          if ($self->{nc} == 0x003E) { # >
5673            $self->{state} = DATA_STATE;
5674            $self->{s_kwd} = '';
5675            
5676        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5677          $self->{line_prev} = $self->{line};
5678          $self->{column_prev} = $self->{column};
5679          $self->{column}++;
5680          $self->{nc}
5681              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5682        } else {
5683          $self->{set_nc}->($self);
5684        }
5685      
5686            return  ({type => END_OF_DOCTYPE_TOKEN});
5687            redo A;
5688          } elsif ($self->{nc} == -1) {
5689            $self->{state} = DATA_STATE;
5690            $self->{s_kwd} = '';
5691            ## Reconsume.
5692            return  ({type => END_OF_DOCTYPE_TOKEN});
5693            redo A;
5694          } else {
5695            ## Stay in the state.
5696            
5697        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5698          $self->{line_prev} = $self->{line};
5699          $self->{column_prev} = $self->{column};
5700          $self->{column}++;
5701          $self->{nc}
5702              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5703        } else {
5704          $self->{set_nc}->($self);
5705        }
5706      
5707            redo A;
5708          }
5709        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5710          if ($self->{nc} == 0x0021) { # !
5711            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5712            
5713        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5714          $self->{line_prev} = $self->{line};
5715          $self->{column_prev} = $self->{column};
5716          $self->{column}++;
5717          $self->{nc}
5718              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5719        } else {
5720          $self->{set_nc}->($self);
5721        }
5722      
5723            redo A;
5724          } elsif ($self->{nc} == 0x003F) { # ?
5725            $self->{state} = PI_STATE;
5726            
5727        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5728          $self->{line_prev} = $self->{line};
5729          $self->{column_prev} = $self->{column};
5730          $self->{column}++;
5731          $self->{nc}
5732              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5733        } else {
5734          $self->{set_nc}->($self);
5735        }
5736      
5737            redo A;
5738          } elsif ($self->{nc} == -1) {
5739            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5740            $self->{state} = DATA_STATE;
5741            $self->{s_kwd} = '';
5742            ## Reconsume.
5743            redo A;
5744          } else {
5745            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5746                            line => $self->{line_prev},
5747                            column => $self->{column_prev});
5748            $self->{state} = BOGUS_COMMENT_STATE;
5749            $self->{ct} = {type => COMMENT_TOKEN,
5750                           data => '',
5751                          }; ## NOTE: Will be discarded.
5752            
5753        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5754          $self->{line_prev} = $self->{line};
5755          $self->{column_prev} = $self->{column};
5756          $self->{column}++;
5757          $self->{nc}
5758              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5759        } else {
5760          $self->{set_nc}->($self);
5761        }
5762      
5763            redo A;
5764          }
5765        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5766          ## XML5: "DOCTYPE markup declaration state".
5767          
5768          if ($self->{nc} == 0x002D) { # -
5769            $self->{state} = MD_HYPHEN_STATE;
5770            
5771        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5772          $self->{line_prev} = $self->{line};
5773          $self->{column_prev} = $self->{column};
5774          $self->{column}++;
5775          $self->{nc}
5776              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5777        } else {
5778          $self->{set_nc}->($self);
5779        }
5780      
5781            redo A;
5782          } elsif ($self->{nc} == 0x0045 or # E
5783                   $self->{nc} == 0x0065) { # e
5784            $self->{state} = MD_E_STATE;
5785            $self->{kwd} = chr $self->{nc};
5786            
5787        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5788          $self->{line_prev} = $self->{line};
5789          $self->{column_prev} = $self->{column};
5790          $self->{column}++;
5791          $self->{nc}
5792              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5793        } else {
5794          $self->{set_nc}->($self);
5795        }
5796      
5797            redo A;
5798          } elsif ($self->{nc} == 0x0041 or # A
5799                   $self->{nc} == 0x0061) { # a
5800            $self->{state} = MD_ATTLIST_STATE;
5801            $self->{kwd} = chr $self->{nc};
5802            
5803        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5804          $self->{line_prev} = $self->{line};
5805          $self->{column_prev} = $self->{column};
5806          $self->{column}++;
5807          $self->{nc}
5808              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5809        } else {
5810          $self->{set_nc}->($self);
5811        }
5812      
5813            redo A;
5814          } elsif ($self->{nc} == 0x004E or # N
5815                   $self->{nc} == 0x006E) { # n
5816            $self->{state} = MD_NOTATION_STATE;
5817            $self->{kwd} = chr $self->{nc};
5818            
5819        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5820          $self->{line_prev} = $self->{line};
5821          $self->{column_prev} = $self->{column};
5822          $self->{column}++;
5823          $self->{nc}
5824              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5825        } else {
5826          $self->{set_nc}->($self);
5827        }
5828      
5829            redo A;
5830          } else {
5831            #
5832          }
5833          
5834          ## XML5: No parse error.
5835          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5836                          line => $self->{line_prev},
5837                          column => $self->{column_prev} - 1);
5838          ## Reconsume.
5839          $self->{state} = BOGUS_COMMENT_STATE;
5840          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5841          redo A;
5842        } elsif ($self->{state} == MD_E_STATE) {
5843          if ($self->{nc} == 0x004E or # N
5844              $self->{nc} == 0x006E) { # n
5845            $self->{state} = MD_ENTITY_STATE;
5846            $self->{kwd} .= chr $self->{nc};
5847            
5848        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5849          $self->{line_prev} = $self->{line};
5850          $self->{column_prev} = $self->{column};
5851          $self->{column}++;
5852          $self->{nc}
5853              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5854        } else {
5855          $self->{set_nc}->($self);
5856        }
5857      
5858            redo A;
5859          } elsif ($self->{nc} == 0x004C or # L
5860                   $self->{nc} == 0x006C) { # l
5861            ## XML5: <!ELEMENT> not supported.
5862            $self->{state} = MD_ELEMENT_STATE;
5863            $self->{kwd} .= chr $self->{nc};
5864            
5865        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5866          $self->{line_prev} = $self->{line};
5867          $self->{column_prev} = $self->{column};
5868          $self->{column}++;
5869          $self->{nc}
5870              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5871        } else {
5872          $self->{set_nc}->($self);
5873        }
5874      
5875            redo A;
5876          } else {
5877            ## XML5: No parse error.
5878            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5879                            line => $self->{line_prev},
5880                            column => $self->{column_prev} - 2
5881                                + 1 * ($self->{nc} == -1));
5882            ## Reconsume.
5883            $self->{state} = BOGUS_COMMENT_STATE;
5884            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5885            redo A;
5886          }
5887        } elsif ($self->{state} == MD_ENTITY_STATE) {
5888          if ($self->{nc} == [
5889                undef,
5890                undef,
5891                0x0054, # T
5892                0x0049, # I
5893                0x0054, # T
5894              ]->[length $self->{kwd}] or
5895              $self->{nc} == [
5896                undef,
5897                undef,
5898                0x0074, # t
5899                0x0069, # i
5900                0x0074, # t
5901              ]->[length $self->{kwd}]) {
5902            ## Stay in the state.
5903            $self->{kwd} .= chr $self->{nc};
5904            
5905        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5906          $self->{line_prev} = $self->{line};
5907          $self->{column_prev} = $self->{column};
5908          $self->{column}++;
5909          $self->{nc}
5910              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5911        } else {
5912          $self->{set_nc}->($self);
5913        }
5914      
5915            redo A;
5916          } elsif ((length $self->{kwd}) == 5 and
5917                   ($self->{nc} == 0x0059 or # Y
5918                    $self->{nc} == 0x0079)) { # y
5919            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5920              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5921                              text => 'ENTITY',
5922                              line => $self->{line_prev},
5923                              column => $self->{column_prev} - 4);
5924            }
5925            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5926                           line => $self->{line_prev},
5927                           column => $self->{column_prev} - 6};
5928            $self->{state} = DOCTYPE_MD_STATE;
5929            
5930        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5931          $self->{line_prev} = $self->{line};
5932          $self->{column_prev} = $self->{column};
5933          $self->{column}++;
5934          $self->{nc}
5935              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5936        } else {
5937          $self->{set_nc}->($self);
5938        }
5939      
5940            redo A;
5941          } else {
5942            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5943                            line => $self->{line_prev},
5944                            column => $self->{column_prev} - 1
5945                                - (length $self->{kwd})
5946                                + 1 * ($self->{nc} == -1));
5947            $self->{state} = BOGUS_COMMENT_STATE;
5948            ## Reconsume.
5949            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5950            redo A;
5951          }
5952        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5953          if ($self->{nc} == [
5954               undef,
5955               undef,
5956               0x0045, # E
5957               0x004D, # M
5958               0x0045, # E
5959               0x004E, # N
5960              ]->[length $self->{kwd}] or
5961              $self->{nc} == [
5962               undef,
5963               undef,
5964               0x0065, # e
5965               0x006D, # m
5966               0x0065, # e
5967               0x006E, # n
5968              ]->[length $self->{kwd}]) {
5969            ## Stay in the state.
5970            $self->{kwd} .= chr $self->{nc};
5971            
5972        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5973          $self->{line_prev} = $self->{line};
5974          $self->{column_prev} = $self->{column};
5975          $self->{column}++;
5976          $self->{nc}
5977              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5978        } else {
5979          $self->{set_nc}->($self);
5980        }
5981      
5982            redo A;
5983          } elsif ((length $self->{kwd}) == 6 and
5984                   ($self->{nc} == 0x0054 or # T
5985                    $self->{nc} == 0x0074)) { # t
5986            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5987              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5988                              text => 'ELEMENT',
5989                              line => $self->{line_prev},
5990                              column => $self->{column_prev} - 5);
5991            }
5992            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5993                           line => $self->{line_prev},
5994                           column => $self->{column_prev} - 7};
5995            $self->{state} = DOCTYPE_MD_STATE;
5996            
5997        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5998          $self->{line_prev} = $self->{line};
5999          $self->{column_prev} = $self->{column};
6000          $self->{column}++;
6001          $self->{nc}
6002              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6003        } else {
6004          $self->{set_nc}->($self);
6005        }
6006      
6007            redo A;
6008          } else {
6009            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6010                            line => $self->{line_prev},
6011                            column => $self->{column_prev} - 1
6012                                - (length $self->{kwd})
6013                                + 1 * ($self->{nc} == -1));
6014            $self->{state} = BOGUS_COMMENT_STATE;
6015            ## Reconsume.
6016            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6017            redo A;
6018          }
6019        } elsif ($self->{state} == MD_ATTLIST_STATE) {
6020          if ($self->{nc} == [
6021               undef,
6022               0x0054, # T
6023               0x0054, # T
6024               0x004C, # L
6025               0x0049, # I
6026               0x0053, # S
6027              ]->[length $self->{kwd}] or
6028              $self->{nc} == [
6029               undef,
6030               0x0074, # t
6031               0x0074, # t
6032               0x006C, # l
6033               0x0069, # i
6034               0x0073, # s
6035              ]->[length $self->{kwd}]) {
6036            ## Stay in the state.
6037            $self->{kwd} .= chr $self->{nc};
6038            
6039        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6040          $self->{line_prev} = $self->{line};
6041          $self->{column_prev} = $self->{column};
6042          $self->{column}++;
6043          $self->{nc}
6044              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6045        } else {
6046          $self->{set_nc}->($self);
6047        }
6048      
6049            redo A;
6050          } elsif ((length $self->{kwd}) == 6 and
6051                   ($self->{nc} == 0x0054 or # T
6052                    $self->{nc} == 0x0074)) { # t
6053            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
6054              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6055                              text => 'ATTLIST',
6056                              line => $self->{line_prev},
6057                              column => $self->{column_prev} - 5);
6058            }
6059            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
6060                           attrdefs => [],
6061                           line => $self->{line_prev},
6062                           column => $self->{column_prev} - 7};
6063            $self->{state} = DOCTYPE_MD_STATE;
6064            
6065        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6066          $self->{line_prev} = $self->{line};
6067          $self->{column_prev} = $self->{column};
6068          $self->{column}++;
6069          $self->{nc}
6070              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6071        } else {
6072          $self->{set_nc}->($self);
6073        }
6074      
6075            redo A;
6076          } else {
6077            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6078                            line => $self->{line_prev},
6079                            column => $self->{column_prev} - 1
6080                                 - (length $self->{kwd})
6081                                 + 1 * ($self->{nc} == -1));
6082            $self->{state} = BOGUS_COMMENT_STATE;
6083            ## Reconsume.
6084            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6085            redo A;
6086          }
6087        } elsif ($self->{state} == MD_NOTATION_STATE) {
6088          if ($self->{nc} == [
6089               undef,
6090               0x004F, # O
6091               0x0054, # T
6092               0x0041, # A
6093               0x0054, # T
6094               0x0049, # I
6095               0x004F, # O
6096              ]->[length $self->{kwd}] or
6097              $self->{nc} == [
6098               undef,
6099               0x006F, # o
6100               0x0074, # t
6101               0x0061, # a
6102               0x0074, # t
6103               0x0069, # i
6104               0x006F, # o
6105              ]->[length $self->{kwd}]) {
6106            ## Stay in the state.
6107            $self->{kwd} .= chr $self->{nc};
6108            
6109        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6110          $self->{line_prev} = $self->{line};
6111          $self->{column_prev} = $self->{column};
6112          $self->{column}++;
6113          $self->{nc}
6114              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6115        } else {
6116          $self->{set_nc}->($self);
6117        }
6118      
6119            redo A;
6120          } elsif ((length $self->{kwd}) == 7 and
6121                   ($self->{nc} == 0x004E or # N
6122                    $self->{nc} == 0x006E)) { # n
6123            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6124              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6125                              text => 'NOTATION',
6126                              line => $self->{line_prev},
6127                              column => $self->{column_prev} - 6);
6128            }
6129            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6130                           line => $self->{line_prev},
6131                           column => $self->{column_prev} - 8};
6132            $self->{state} = DOCTYPE_MD_STATE;
6133            
6134        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6135          $self->{line_prev} = $self->{line};
6136          $self->{column_prev} = $self->{column};
6137          $self->{column}++;
6138          $self->{nc}
6139              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6140        } else {
6141          $self->{set_nc}->($self);
6142        }
6143      
6144            redo A;
6145          } else {
6146            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6147                            line => $self->{line_prev},
6148                            column => $self->{column_prev} - 1
6149                                - (length $self->{kwd})
6150                                + 1 * ($self->{nc} == -1));
6151            $self->{state} = BOGUS_COMMENT_STATE;
6152            ## Reconsume.
6153            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6154            redo A;
6155          }
6156        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6157          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6158          ## "DOCTYPE NOTATION state".
6159    
6160          if ($is_space->{$self->{nc}}) {
6161            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6162            $self->{state} = BEFORE_MD_NAME_STATE;
6163            
6164        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6165          $self->{line_prev} = $self->{line};
6166          $self->{column_prev} = $self->{column};
6167          $self->{column}++;
6168          $self->{nc}
6169              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6170        } else {
6171          $self->{set_nc}->($self);
6172        }
6173      
6174            redo A;
6175          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6176                   $self->{nc} == 0x0025) { # %
6177            ## XML5: Switch to the "DOCTYPE bogus comment state".
6178            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6179            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6180            
6181        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6182          $self->{line_prev} = $self->{line};
6183          $self->{column_prev} = $self->{column};
6184          $self->{column}++;
6185          $self->{nc}
6186              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6187        } else {
6188          $self->{set_nc}->($self);
6189        }
6190      
6191            redo A;
6192          } elsif ($self->{nc} == -1) {
6193            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6194            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6195            ## Reconsume.
6196            redo A;
6197          } elsif ($self->{nc} == 0x003E) { # >
6198            ## XML5: Switch to the "DOCTYPE bogus comment state".
6199            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6200            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6201            
6202        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6203          $self->{line_prev} = $self->{line};
6204          $self->{column_prev} = $self->{column};
6205          $self->{column}++;
6206          $self->{nc}
6207              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6208        } else {
6209          $self->{set_nc}->($self);
6210        }
6211      
6212            redo A;
6213          } else {
6214            ## XML5: Switch to the "DOCTYPE bogus comment state".
6215            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6216            $self->{state} = BEFORE_MD_NAME_STATE;
6217            redo A;
6218          }
6219        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6220          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6221          ## before state", "DOCTYPE ATTLIST name before state".
6222    
6223          if ($is_space->{$self->{nc}}) {
6224            ## Stay in the state.
6225            
6226        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6227          $self->{line_prev} = $self->{line};
6228          $self->{column_prev} = $self->{column};
6229          $self->{column}++;
6230          $self->{nc}
6231              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6232        } else {
6233          $self->{set_nc}->($self);
6234        }
6235      
6236            redo A;
6237          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6238                   $self->{nc} == 0x0025) { # %
6239            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6240            
6241        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6242          $self->{line_prev} = $self->{line};
6243          $self->{column_prev} = $self->{column};
6244          $self->{column}++;
6245          $self->{nc}
6246              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6247        } else {
6248          $self->{set_nc}->($self);
6249        }
6250      
6251            redo A;
6252          } elsif ($self->{nc} == 0x003E) { # >
6253            ## XML5: Same as "Anything else".
6254            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6255            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6256            
6257        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6258          $self->{line_prev} = $self->{line};
6259          $self->{column_prev} = $self->{column};
6260          $self->{column}++;
6261          $self->{nc}
6262              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6263        } else {
6264          $self->{set_nc}->($self);
6265        }
6266      
6267            redo A;
6268          } elsif ($self->{nc} == -1) {
6269            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6270            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6271            ## Reconsume.
6272            redo A;
6273          } else {
6274            ## XML5: [ATTLIST] Not defined yet.
6275            $self->{ct}->{name} .= chr $self->{nc};
6276            $self->{state} = MD_NAME_STATE;
6277            
6278        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6279          $self->{line_prev} = $self->{line};
6280          $self->{column_prev} = $self->{column};
6281          $self->{column}++;
6282          $self->{nc}
6283              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6284        } else {
6285          $self->{set_nc}->($self);
6286        }
6287      
6288            redo A;
6289          }
6290        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6291          if ($is_space->{$self->{nc}}) {
6292            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6293            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6294            $self->{state} = BEFORE_MD_NAME_STATE;
6295            
6296        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6297          $self->{line_prev} = $self->{line};
6298          $self->{column_prev} = $self->{column};
6299          $self->{column}++;
6300          $self->{nc}
6301              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6302        } else {
6303          $self->{set_nc}->($self);
6304        }
6305      
6306            redo A;
6307          } elsif ($self->{nc} == 0x003E) { # >
6308            ## XML5: Same as "Anything else".
6309            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6310            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6311            
6312        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6313          $self->{line_prev} = $self->{line};
6314          $self->{column_prev} = $self->{column};
6315          $self->{column}++;
6316          $self->{nc}
6317              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6318        } else {
6319          $self->{set_nc}->($self);
6320        }
6321      
6322            redo A;
6323          } elsif ($self->{nc} == -1) {
6324            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6325            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6326            ## Reconsume.
6327            redo A;
6328          } else {
6329            ## XML5: No parse error.
6330            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6331            $self->{state} = BOGUS_COMMENT_STATE;
6332            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6333            ## Reconsume.
6334            redo A;
6335          }
6336        } elsif ($self->{state} == MD_NAME_STATE) {
6337          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6338          
6339          if ($is_space->{$self->{nc}}) {
6340            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6341              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6342            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6343              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6344            } else { # ENTITY/NOTATION
6345              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6346            }
6347            
6348        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6349          $self->{line_prev} = $self->{line};
6350          $self->{column_prev} = $self->{column};
6351          $self->{column}++;
6352          $self->{nc}
6353              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6354        } else {
6355          $self->{set_nc}->($self);
6356        }
6357      
6358            redo A;
6359          } elsif ($self->{nc} == 0x003E) { # >
6360            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6361              #
6362            } else {
6363              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6364            }
6365            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6366            
6367        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6368          $self->{line_prev} = $self->{line};
6369          $self->{column_prev} = $self->{column};
6370          $self->{column}++;
6371          $self->{nc}
6372              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6373        } else {
6374          $self->{set_nc}->($self);
6375        }
6376      
6377            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6378            redo A;
6379          } elsif ($self->{nc} == -1) {
6380            ## XML5: [ATTLIST] No parse error.
6381            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6382            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6383            ## Reconsume.
6384            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6385            redo A;
6386          } else {
6387            ## XML5: [ATTLIST] Not defined yet.
6388            $self->{ct}->{name} .= chr $self->{nc};
6389            ## Stay in the state.
6390            
6391        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6392          $self->{line_prev} = $self->{line};
6393          $self->{column_prev} = $self->{column};
6394          $self->{column}++;
6395          $self->{nc}
6396              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6397        } else {
6398          $self->{set_nc}->($self);
6399        }
6400      
6401            redo A;
6402          }
6403        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6404          if ($is_space->{$self->{nc}}) {
6405            ## Stay in the state.
6406            
6407        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6408          $self->{line_prev} = $self->{line};
6409          $self->{column_prev} = $self->{column};
6410          $self->{column}++;
6411          $self->{nc}
6412              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6413        } else {
6414          $self->{set_nc}->($self);
6415        }
6416      
6417            redo A;
6418          } elsif ($self->{nc} == 0x003E) { # >
6419            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6420            
6421        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6422          $self->{line_prev} = $self->{line};
6423          $self->{column_prev} = $self->{column};
6424          $self->{column}++;
6425          $self->{nc}
6426              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6427        } else {
6428          $self->{set_nc}->($self);
6429        }
6430      
6431            return  ($self->{ct}); # ATTLIST
6432            redo A;
6433          } elsif ($self->{nc} == -1) {
6434            ## XML5: No parse error.
6435            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6436            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6437            return  ($self->{ct});
6438            redo A;
6439          } else {
6440            ## XML5: Not defined yet.
6441            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6442                           tokens => [],
6443                           line => $self->{line}, column => $self->{column}};
6444            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6445            
6446        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6447          $self->{line_prev} = $self->{line};
6448          $self->{column_prev} = $self->{column};
6449          $self->{column}++;
6450          $self->{nc}
6451              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6452        } else {
6453          $self->{set_nc}->($self);
6454        }
6455      
6456            redo A;
6457          }
6458        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6459          if ($is_space->{$self->{nc}}) {
6460            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6461            
6462        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6463          $self->{line_prev} = $self->{line};
6464          $self->{column_prev} = $self->{column};
6465          $self->{column}++;
6466          $self->{nc}
6467              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6468        } else {
6469          $self->{set_nc}->($self);
6470        }
6471      
6472            redo A;
6473          } elsif ($self->{nc} == 0x003E) { # >
6474            ## XML5: Same as "anything else".
6475            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6476            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6477            
6478        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6479          $self->{line_prev} = $self->{line};
6480          $self->{column_prev} = $self->{column};
6481          $self->{column}++;
6482          $self->{nc}
6483              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6484        } else {
6485          $self->{set_nc}->($self);
6486        }
6487      
6488            return  ($self->{ct}); # ATTLIST
6489            redo A;
6490          } elsif ($self->{nc} == 0x0028) { # (
6491            ## XML5: Same as "anything else".
6492            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6493            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6494            
6495        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6496          $self->{line_prev} = $self->{line};
6497          $self->{column_prev} = $self->{column};
6498          $self->{column}++;
6499          $self->{nc}
6500              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6501        } else {
6502          $self->{set_nc}->($self);
6503        }
6504      
6505            redo A;
6506          } elsif ($self->{nc} == -1) {
6507            ## XML5: No parse error.
6508            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6509            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6510            
6511        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6512          $self->{line_prev} = $self->{line};
6513          $self->{column_prev} = $self->{column};
6514          $self->{column}++;
6515          $self->{nc}
6516              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6517        } else {
6518          $self->{set_nc}->($self);
6519        }
6520      
6521            return  ($self->{ct}); # ATTLIST
6522            redo A;
6523          } else {
6524            ## XML5: Not defined yet.
6525            $self->{ca}->{name} .= chr $self->{nc};
6526            ## Stay in the state.
6527            
6528        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6529          $self->{line_prev} = $self->{line};
6530          $self->{column_prev} = $self->{column};
6531          $self->{column}++;
6532          $self->{nc}
6533              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6534        } else {
6535          $self->{set_nc}->($self);
6536        }
6537      
6538            redo A;
6539          }
6540        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6541          if ($is_space->{$self->{nc}}) {
6542            ## Stay in the state.
6543            
6544        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6545          $self->{line_prev} = $self->{line};
6546          $self->{column_prev} = $self->{column};
6547          $self->{column}++;
6548          $self->{nc}
6549              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6550        } else {
6551          $self->{set_nc}->($self);
6552        }
6553      
6554            redo A;
6555          } elsif ($self->{nc} == 0x003E) { # >
6556            ## XML5: Same as "anything else".
6557            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6558            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6559            
6560        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6561          $self->{line_prev} = $self->{line};
6562          $self->{column_prev} = $self->{column};
6563          $self->{column}++;
6564          $self->{nc}
6565              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6566        } else {
6567          $self->{set_nc}->($self);
6568        }
6569      
6570            return  ($self->{ct}); # ATTLIST
6571            redo A;
6572          } elsif ($self->{nc} == 0x0028) { # (
6573            ## XML5: Same as "anything else".
6574            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6575            
6576        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6577          $self->{line_prev} = $self->{line};
6578          $self->{column_prev} = $self->{column};
6579          $self->{column}++;
6580          $self->{nc}
6581              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6582        } else {
6583          $self->{set_nc}->($self);
6584        }
6585      
6586            redo A;
6587          } elsif ($self->{nc} == -1) {
6588            ## XML5: No parse error.
6589            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6590            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6591            
6592        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6593          $self->{line_prev} = $self->{line};
6594          $self->{column_prev} = $self->{column};
6595          $self->{column}++;
6596          $self->{nc}
6597              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6598        } else {
6599          $self->{set_nc}->($self);
6600        }
6601      
6602            return  ($self->{ct});
6603            redo A;
6604          } else {
6605            ## XML5: Not defined yet.
6606            $self->{ca}->{type} = chr $self->{nc};
6607            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6608            
6609        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6610          $self->{line_prev} = $self->{line};
6611          $self->{column_prev} = $self->{column};
6612          $self->{column}++;
6613          $self->{nc}
6614              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6615        } else {
6616          $self->{set_nc}->($self);
6617        }
6618      
6619            redo A;
6620          }
6621        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6622          if ($is_space->{$self->{nc}}) {
6623            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6624            
6625        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6626          $self->{line_prev} = $self->{line};
6627          $self->{column_prev} = $self->{column};
6628          $self->{column}++;
6629          $self->{nc}
6630              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6631        } else {
6632          $self->{set_nc}->($self);
6633        }
6634      
6635            redo A;
6636          } elsif ($self->{nc} == 0x0023) { # #
6637            ## XML5: Same as "anything else".
6638            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6639            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6640            
6641        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6642          $self->{line_prev} = $self->{line};
6643          $self->{column_prev} = $self->{column};
6644          $self->{column}++;
6645          $self->{nc}
6646              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6647        } else {
6648          $self->{set_nc}->($self);
6649        }
6650      
6651            redo A;
6652          } elsif ($self->{nc} == 0x0022) { # "
6653            ## XML5: Same as "anything else".
6654            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6655            $self->{ca}->{value} = '';
6656            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6657            
6658        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6659          $self->{line_prev} = $self->{line};
6660          $self->{column_prev} = $self->{column};
6661          $self->{column}++;
6662          $self->{nc}
6663              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6664        } else {
6665          $self->{set_nc}->($self);
6666        }
6667      
6668            redo A;
6669          } elsif ($self->{nc} == 0x0027) { # '
6670            ## XML5: Same as "anything else".
6671            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6672            $self->{ca}->{value} = '';
6673            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6674            
6675        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6676          $self->{line_prev} = $self->{line};
6677          $self->{column_prev} = $self->{column};
6678          $self->{column}++;
6679          $self->{nc}
6680              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6681        } else {
6682          $self->{set_nc}->($self);
6683        }
6684      
6685            redo A;
6686          } elsif ($self->{nc} == 0x003E) { # >
6687            ## XML5: Same as "anything else".
6688            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6689            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6690            
6691        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6692          $self->{line_prev} = $self->{line};
6693          $self->{column_prev} = $self->{column};
6694          $self->{column}++;
6695          $self->{nc}
6696              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6697        } else {
6698          $self->{set_nc}->($self);
6699        }
6700      
6701            return  ($self->{ct}); # ATTLIST
6702            redo A;
6703          } elsif ($self->{nc} == 0x0028) { # (
6704            ## XML5: Same as "anything else".
6705            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6706            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6707            
6708        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6709          $self->{line_prev} = $self->{line};
6710          $self->{column_prev} = $self->{column};
6711          $self->{column}++;
6712          $self->{nc}
6713              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6714        } else {
6715          $self->{set_nc}->($self);
6716        }
6717      
6718            redo A;
6719          } elsif ($self->{nc} == -1) {
6720            ## XML5: No parse error.
6721            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6722            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6723            
6724        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6725          $self->{line_prev} = $self->{line};
6726          $self->{column_prev} = $self->{column};
6727          $self->{column}++;
6728          $self->{nc}
6729              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6730        } else {
6731          $self->{set_nc}->($self);
6732        }
6733      
6734            return  ($self->{ct});
6735            redo A;
6736          } else {
6737            ## XML5: Not defined yet.
6738            $self->{ca}->{type} .= chr $self->{nc};
6739            ## Stay in the state.
6740            
6741        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6742          $self->{line_prev} = $self->{line};
6743          $self->{column_prev} = $self->{column};
6744          $self->{column}++;
6745          $self->{nc}
6746              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6747        } else {
6748          $self->{set_nc}->($self);
6749        }
6750      
6751            redo A;
6752          }
6753        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6754          if ($is_space->{$self->{nc}}) {
6755            ## Stay in the state.
6756            
6757        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6758          $self->{line_prev} = $self->{line};
6759          $self->{column_prev} = $self->{column};
6760          $self->{column}++;
6761          $self->{nc}
6762              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6763        } else {
6764          $self->{set_nc}->($self);
6765        }
6766      
6767            redo A;
6768          } elsif ($self->{nc} == 0x0028) { # (
6769            ## XML5: Same as "anything else".
6770            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6771            
6772        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6773          $self->{line_prev} = $self->{line};
6774          $self->{column_prev} = $self->{column};
6775          $self->{column}++;
6776          $self->{nc}
6777              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6778        } else {
6779          $self->{set_nc}->($self);
6780        }
6781      
6782            redo A;
6783          } elsif ($self->{nc} == 0x0023) { # #
6784            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6785            
6786        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6787          $self->{line_prev} = $self->{line};
6788          $self->{column_prev} = $self->{column};
6789          $self->{column}++;
6790          $self->{nc}
6791              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6792        } else {
6793          $self->{set_nc}->($self);
6794        }
6795      
6796            redo A;
6797          } elsif ($self->{nc} == 0x0022) { # "
6798            ## XML5: Same as "anything else".
6799            $self->{ca}->{value} = '';
6800            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6801            
6802        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6803          $self->{line_prev} = $self->{line};
6804          $self->{column_prev} = $self->{column};
6805          $self->{column}++;
6806          $self->{nc}
6807              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6808        } else {
6809          $self->{set_nc}->($self);
6810        }
6811      
6812            redo A;
6813          } elsif ($self->{nc} == 0x0027) { # '
6814            ## XML5: Same as "anything else".
6815            $self->{ca}->{value} = '';
6816            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6817            
6818        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6819          $self->{line_prev} = $self->{line};
6820          $self->{column_prev} = $self->{column};
6821          $self->{column}++;
6822          $self->{nc}
6823              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6824        } else {
6825          $self->{set_nc}->($self);
6826        }
6827      
6828            redo A;
6829          } elsif ($self->{nc} == 0x003E) { # >
6830            ## XML5: Same as "anything else".
6831            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6832            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6833            
6834        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6835          $self->{line_prev} = $self->{line};
6836          $self->{column_prev} = $self->{column};
6837          $self->{column}++;
6838          $self->{nc}
6839              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6840        } else {
6841          $self->{set_nc}->($self);
6842        }
6843      
6844            return  ($self->{ct}); # ATTLIST
6845            redo A;
6846          } elsif ($self->{nc} == -1) {
6847            ## XML5: No parse error.
6848            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6849            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6850            
6851        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6852          $self->{line_prev} = $self->{line};
6853          $self->{column_prev} = $self->{column};
6854          $self->{column}++;
6855          $self->{nc}
6856              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6857        } else {
6858          $self->{set_nc}->($self);
6859        }
6860      
6861            return  ($self->{ct});
6862            redo A;
6863          } else {
6864            ## XML5: Switch to the "DOCTYPE bogus comment state".
6865            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6866            $self->{ca}->{value} = '';
6867            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6868            ## Reconsume.
6869            redo A;
6870          }
6871        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6872          if ($is_space->{$self->{nc}}) {
6873            ## Stay in the state.
6874            
6875        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6876          $self->{line_prev} = $self->{line};
6877          $self->{column_prev} = $self->{column};
6878          $self->{column}++;
6879          $self->{nc}
6880              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6881        } else {
6882          $self->{set_nc}->($self);
6883        }
6884      
6885            redo A;
6886          } elsif ($self->{nc} == 0x007C) { # |
6887            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6888            ## Stay in the state.
6889            
6890        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6891          $self->{line_prev} = $self->{line};
6892          $self->{column_prev} = $self->{column};
6893          $self->{column}++;
6894          $self->{nc}
6895              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6896        } else {
6897          $self->{set_nc}->($self);
6898        }
6899      
6900            redo A;
6901          } elsif ($self->{nc} == 0x0029) { # )
6902            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6903            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6904            
6905        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6906          $self->{line_prev} = $self->{line};
6907          $self->{column_prev} = $self->{column};
6908          $self->{column}++;
6909          $self->{nc}
6910              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6911        } else {
6912          $self->{set_nc}->($self);
6913        }
6914      
6915            redo A;
6916          } elsif ($self->{nc} == 0x003E) { # >
6917            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6918            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6919            
6920        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6921          $self->{line_prev} = $self->{line};
6922          $self->{column_prev} = $self->{column};
6923          $self->{column}++;
6924          $self->{nc}
6925              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6926        } else {
6927          $self->{set_nc}->($self);
6928        }
6929      
6930            return  ($self->{ct}); # ATTLIST
6931            redo A;
6932          } elsif ($self->{nc} == -1) {
6933            ## XML5: No parse error.
6934            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6935            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6936            
6937        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6938          $self->{line_prev} = $self->{line};
6939          $self->{column_prev} = $self->{column};
6940          $self->{column}++;
6941          $self->{nc}
6942              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6943        } else {
6944          $self->{set_nc}->($self);
6945        }
6946      
6947            return  ($self->{ct});
6948            redo A;
6949          } else {
6950            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6951            $self->{state} = ALLOWED_TOKEN_STATE;
6952            
6953        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6954          $self->{line_prev} = $self->{line};
6955          $self->{column_prev} = $self->{column};
6956          $self->{column}++;
6957          $self->{nc}
6958              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6959        } else {
6960          $self->{set_nc}->($self);
6961        }
6962      
6963            redo A;
6964          }
6965        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6966          if ($is_space->{$self->{nc}}) {
6967            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6968            
6969        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6970          $self->{line_prev} = $self->{line};
6971          $self->{column_prev} = $self->{column};
6972          $self->{column}++;
6973          $self->{nc}
6974              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6975        } else {
6976          $self->{set_nc}->($self);
6977        }
6978      
6979            redo A;
6980          } elsif ($self->{nc} == 0x007C) { # |
6981            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6982            
6983        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6984          $self->{line_prev} = $self->{line};
6985          $self->{column_prev} = $self->{column};
6986          $self->{column}++;
6987          $self->{nc}
6988              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6989        } else {
6990          $self->{set_nc}->($self);
6991        }
6992      
6993            redo A;
6994          } elsif ($self->{nc} == 0x0029) { # )
6995            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6996            
6997        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6998          $self->{line_prev} = $self->{line};
6999          $self->{column_prev} = $self->{column};
7000          $self->{column}++;
7001          $self->{nc}
7002              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7003        } else {
7004          $self->{set_nc}->($self);
7005        }
7006      
7007            redo A;
7008          } elsif ($self->{nc} == 0x003E) { # >
7009            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7010            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7011            
7012        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7013          $self->{line_prev} = $self->{line};
7014          $self->{column_prev} = $self->{column};
7015          $self->{column}++;
7016          $self->{nc}
7017              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7018        } else {
7019          $self->{set_nc}->($self);
7020        }
7021      
7022            return  ($self->{ct}); # ATTLIST
7023            redo A;
7024          } elsif ($self->{nc} == -1) {
7025            ## XML5: No parse error.
7026            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7027            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7028            
7029        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7030          $self->{line_prev} = $self->{line};
7031          $self->{column_prev} = $self->{column};
7032          $self->{column}++;
7033          $self->{nc}
7034              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7035        } else {
7036          $self->{set_nc}->($self);
7037        }
7038      
7039            return  ($self->{ct});
7040            redo A;
7041          } else {
7042            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
7043            ## Stay in the state.
7044            
7045        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7046          $self->{line_prev} = $self->{line};
7047          $self->{column_prev} = $self->{column};
7048          $self->{column}++;
7049          $self->{nc}
7050              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7051        } else {
7052          $self->{set_nc}->($self);
7053        }
7054      
7055            redo A;
7056          }
7057        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
7058          if ($is_space->{$self->{nc}}) {
7059            ## Stay in the state.
7060            
7061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7062          $self->{line_prev} = $self->{line};
7063          $self->{column_prev} = $self->{column};
7064          $self->{column}++;
7065          $self->{nc}
7066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7067        } else {
7068          $self->{set_nc}->($self);
7069        }
7070      
7071            redo A;
7072          } elsif ($self->{nc} == 0x007C) { # |
7073            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
7074            
7075        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7076          $self->{line_prev} = $self->{line};
7077          $self->{column_prev} = $self->{column};
7078          $self->{column}++;
7079          $self->{nc}
7080              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7081        } else {
7082          $self->{set_nc}->($self);
7083        }
7084      
7085            redo A;
7086          } elsif ($self->{nc} == 0x0029) { # )
7087            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7088            
7089        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7090          $self->{line_prev} = $self->{line};
7091          $self->{column_prev} = $self->{column};
7092          $self->{column}++;
7093          $self->{nc}
7094              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7095        } else {
7096          $self->{set_nc}->($self);
7097        }
7098      
7099            redo A;
7100          } elsif ($self->{nc} == 0x003E) { # >
7101            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7102            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7103            
7104        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7105          $self->{line_prev} = $self->{line};
7106          $self->{column_prev} = $self->{column};
7107          $self->{column}++;
7108          $self->{nc}
7109              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7110        } else {
7111          $self->{set_nc}->($self);
7112        }
7113      
7114            return  ($self->{ct}); # ATTLIST
7115            redo A;
7116          } elsif ($self->{nc} == -1) {
7117            ## XML5: No parse error.
7118            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7119            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7120            
7121        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7122          $self->{line_prev} = $self->{line};
7123          $self->{column_prev} = $self->{column};
7124          $self->{column}++;
7125          $self->{nc}
7126              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7127        } else {
7128          $self->{set_nc}->($self);
7129        }
7130      
7131            return  ($self->{ct});
7132            redo A;
7133          } else {
7134            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7135                            line => $self->{line_prev},
7136                            column => $self->{column_prev});
7137            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7138            $self->{state} = ALLOWED_TOKEN_STATE;
7139            
7140        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7141          $self->{line_prev} = $self->{line};
7142          $self->{column_prev} = $self->{column};
7143          $self->{column}++;
7144          $self->{nc}
7145              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7146        } else {
7147          $self->{set_nc}->($self);
7148        }
7149      
7150            redo A;
7151          }
7152        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7153          if ($is_space->{$self->{nc}}) {
7154            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7155            
7156        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7157          $self->{line_prev} = $self->{line};
7158          $self->{column_prev} = $self->{column};
7159          $self->{column}++;
7160          $self->{nc}
7161              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7162        } else {
7163          $self->{set_nc}->($self);
7164        }
7165      
7166            redo A;
7167          } elsif ($self->{nc} == 0x0023) { # #
7168            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7169            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7170            
7171        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7172          $self->{line_prev} = $self->{line};
7173          $self->{column_prev} = $self->{column};
7174          $self->{column}++;
7175          $self->{nc}
7176              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7177        } else {
7178          $self->{set_nc}->($self);
7179        }
7180      
7181            redo A;
7182          } elsif ($self->{nc} == 0x0022) { # "
7183            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7184            $self->{ca}->{value} = '';
7185            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7186            
7187        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7188          $self->{line_prev} = $self->{line};
7189          $self->{column_prev} = $self->{column};
7190          $self->{column}++;
7191          $self->{nc}
7192              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7193        } else {
7194          $self->{set_nc}->($self);
7195        }
7196      
7197            redo A;
7198          } elsif ($self->{nc} == 0x0027) { # '
7199            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7200            $self->{ca}->{value} = '';
7201            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7202            
7203        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7204          $self->{line_prev} = $self->{line};
7205          $self->{column_prev} = $self->{column};
7206          $self->{column}++;
7207          $self->{nc}
7208              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7209        } else {
7210          $self->{set_nc}->($self);
7211        }
7212      
7213            redo A;
7214          } elsif ($self->{nc} == 0x003E) { # >
7215            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7216            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7217            
7218        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7219          $self->{line_prev} = $self->{line};
7220          $self->{column_prev} = $self->{column};
7221          $self->{column}++;
7222          $self->{nc}
7223              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7224        } else {
7225          $self->{set_nc}->($self);
7226        }
7227      
7228            return  ($self->{ct}); # ATTLIST
7229            redo A;
7230          } elsif ($self->{nc} == -1) {
7231            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7232            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7233            
7234        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7235          $self->{line_prev} = $self->{line};
7236          $self->{column_prev} = $self->{column};
7237          $self->{column}++;
7238          $self->{nc}
7239              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7240        } else {
7241          $self->{set_nc}->($self);
7242        }
7243      
7244            return  ($self->{ct});
7245            redo A;
7246          } else {
7247            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7248            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7249            ## Reconsume.
7250            redo A;
7251          }
7252        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7253          if ($is_space->{$self->{nc}}) {
7254            ## Stay in the state.
7255            
7256        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7257          $self->{line_prev} = $self->{line};
7258          $self->{column_prev} = $self->{column};
7259          $self->{column}++;
7260          $self->{nc}
7261              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7262        } else {
7263          $self->{set_nc}->($self);
7264        }
7265      
7266            redo A;
7267          } elsif ($self->{nc} == 0x0023) { # #
7268            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7269            
7270        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7271          $self->{line_prev} = $self->{line};
7272          $self->{column_prev} = $self->{column};
7273          $self->{column}++;
7274          $self->{nc}
7275              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7276        } else {
7277          $self->{set_nc}->($self);
7278        }
7279      
7280            redo A;
7281          } elsif ($self->{nc} == 0x0022) { # "
7282            $self->{ca}->{value} = '';
7283            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7284            
7285        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7286          $self->{line_prev} = $self->{line};
7287          $self->{column_prev} = $self->{column};
7288          $self->{column}++;
7289          $self->{nc}
7290              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7291        } else {
7292          $self->{set_nc}->($self);
7293        }
7294      
7295            redo A;
7296          } elsif ($self->{nc} == 0x0027) { # '
7297            $self->{ca}->{value} = '';
7298            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7299            
7300        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7301          $self->{line_prev} = $self->{line};
7302          $self->{column_prev} = $self->{column};
7303          $self->{column}++;
7304          $self->{nc}
7305              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7306        } else {
7307          $self->{set_nc}->($self);
7308        }
7309      
7310            redo A;
7311          } elsif ($self->{nc} == 0x003E) { # >
7312            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7313            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7314            
7315        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7316          $self->{line_prev} = $self->{line};
7317          $self->{column_prev} = $self->{column};
7318          $self->{column}++;
7319          $self->{nc}
7320              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7321        } else {
7322          $self->{set_nc}->($self);
7323        }
7324      
7325            return  ($self->{ct}); # ATTLIST
7326            redo A;
7327          } elsif ($self->{nc} == -1) {
7328            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7329            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7330            
7331        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7332          $self->{line_prev} = $self->{line};
7333          $self->{column_prev} = $self->{column};
7334          $self->{column}++;
7335          $self->{nc}
7336              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7337        } else {
7338          $self->{set_nc}->($self);
7339        }
7340      
7341            return  ($self->{ct});
7342            redo A;
7343          } else {
7344            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7345            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7346            ## Reconsume.
7347            redo A;
7348          }
7349        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7350          if ($is_space->{$self->{nc}}) {
7351            ## XML5: No parse error.
7352            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7353            $self->{state} = BOGUS_MD_STATE;
7354            ## Reconsume.
7355            redo A;
7356          } elsif ($self->{nc} == 0x0022) { # "
7357            ## XML5: Same as "anything else".
7358            $self->{ca}->{value} = '';
7359            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7360            
7361        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7362          $self->{line_prev} = $self->{line};
7363          $self->{column_prev} = $self->{column};
7364          $self->{column}++;
7365          $self->{nc}
7366              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7367        } else {
7368          $self->{set_nc}->($self);
7369        }
7370      
7371            redo A;
7372          } elsif ($self->{nc} == 0x0027) { # '
7373            ## XML5: Same as "anything else".
7374            $self->{ca}->{value} = '';
7375            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7376            
7377        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7378          $self->{line_prev} = $self->{line};
7379          $self->{column_prev} = $self->{column};
7380          $self->{column}++;
7381          $self->{nc}
7382              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7383        } else {
7384          $self->{set_nc}->($self);
7385        }
7386      
7387            redo A;
7388          } elsif ($self->{nc} == 0x003E) { # >
7389            ## XML5: Same as "anything else".
7390            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7391            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7392            
7393        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7394          $self->{line_prev} = $self->{line};
7395          $self->{column_prev} = $self->{column};
7396          $self->{column}++;
7397          $self->{nc}
7398              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7399        } else {
7400          $self->{set_nc}->($self);
7401        }
7402      
7403            return  ($self->{ct}); # ATTLIST
7404            redo A;
7405          } elsif ($self->{nc} == -1) {
7406            ## XML5: No parse error.
7407            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7408            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7409            
7410        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7411          $self->{line_prev} = $self->{line};
7412          $self->{column_prev} = $self->{column};
7413          $self->{column}++;
7414          $self->{nc}
7415              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7416        } else {
7417          $self->{set_nc}->($self);
7418        }
7419      
7420            return  ($self->{ct});
7421            redo A;
7422          } else {
7423            $self->{ca}->{default} = chr $self->{nc};
7424            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7425            
7426        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7427          $self->{line_prev} = $self->{line};
7428          $self->{column_prev} = $self->{column};
7429          $self->{column}++;
7430          $self->{nc}
7431              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7432        } else {
7433          $self->{set_nc}->($self);
7434        }
7435      
7436            redo A;
7437          }
7438        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7439          if ($is_space->{$self->{nc}}) {
7440            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7441            
7442        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7443          $self->{line_prev} = $self->{line};
7444          $self->{column_prev} = $self->{column};
7445          $self->{column}++;
7446          $self->{nc}
7447              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7448        } else {
7449          $self->{set_nc}->($self);
7450        }
7451      
7452            redo A;
7453          } elsif ($self->{nc} == 0x0022) { # "
7454            ## XML5: Same as "anything else".
7455            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7456            $self->{ca}->{value} = '';
7457            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7458            
7459        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7460          $self->{line_prev} = $self->{line};
7461          $self->{column_prev} = $self->{column};
7462          $self->{column}++;
7463          $self->{nc}
7464              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7465        } else {
7466          $self->{set_nc}->($self);
7467        }
7468      
7469            redo A;
7470          } elsif ($self->{nc} == 0x0027) { # '
7471            ## XML5: Same as "anything else".
7472            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7473            $self->{ca}->{value} = '';
7474            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7475            
7476        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7477          $self->{line_prev} = $self->{line};
7478          $self->{column_prev} = $self->{column};
7479          $self->{column}++;
7480          $self->{nc}
7481              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7482        } else {
7483          $self->{set_nc}->($self);
7484        }
7485      
7486            redo A;
7487          } elsif ($self->{nc} == 0x003E) { # >
7488            ## XML5: Same as "anything else".
7489            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7490            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7491            
7492        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7493          $self->{line_prev} = $self->{line};
7494          $self->{column_prev} = $self->{column};
7495          $self->{column}++;
7496          $self->{nc}
7497              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7498        } else {
7499          $self->{set_nc}->($self);
7500        }
7501      
7502            return  ($self->{ct}); # ATTLIST
7503            redo A;
7504          } elsif ($self->{nc} == -1) {
7505            ## XML5: No parse error.
7506            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7507            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7508            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7509            
7510        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7511          $self->{line_prev} = $self->{line};
7512          $self->{column_prev} = $self->{column};
7513          $self->{column}++;
7514          $self->{nc}
7515              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7516        } else {
7517          $self->{set_nc}->($self);
7518        }
7519      
7520            return  ($self->{ct});
7521            redo A;
7522          } else {
7523            $self->{ca}->{default} .= chr $self->{nc};
7524            ## Stay in the state.
7525            
7526        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7527          $self->{line_prev} = $self->{line};
7528          $self->{column_prev} = $self->{column};
7529          $self->{column}++;
7530          $self->{nc}
7531              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7532        } else {
7533          $self->{set_nc}->($self);
7534        }
7535      
7536            redo A;
7537          }
7538        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7539          if ($is_space->{$self->{nc}}) {
7540            ## Stay in the state.
7541            
7542        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7543          $self->{line_prev} = $self->{line};
7544          $self->{column_prev} = $self->{column};
7545          $self->{column}++;
7546          $self->{nc}
7547              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7548        } else {
7549          $self->{set_nc}->($self);
7550        }
7551      
7552            redo A;
7553          } elsif ($self->{nc} == 0x0022) { # "
7554            $self->{ca}->{value} = '';
7555            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7556            
7557        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7558          $self->{line_prev} = $self->{line};
7559          $self->{column_prev} = $self->{column};
7560          $self->{column}++;
7561          $self->{nc}
7562              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7563        } else {
7564          $self->{set_nc}->($self);
7565        }
7566      
7567            redo A;
7568          } elsif ($self->{nc} == 0x0027) { # '
7569            $self->{ca}->{value} = '';
7570            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7571            
7572        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7573          $self->{line_prev} = $self->{line};
7574          $self->{column_prev} = $self->{column};
7575          $self->{column}++;
7576          $self->{nc}
7577              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7578        } else {
7579          $self->{set_nc}->($self);
7580        }
7581      
7582            redo A;
7583          } elsif ($self->{nc} == 0x003E) { # >
7584            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7585            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7586            
7587        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7588          $self->{line_prev} = $self->{line};
7589          $self->{column_prev} = $self->{column};
7590          $self->{column}++;
7591          $self->{nc}
7592              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7593        } else {
7594          $self->{set_nc}->($self);
7595        }
7596      
7597            return  ($self->{ct}); # ATTLIST
7598            redo A;
7599          } elsif ($self->{nc} == -1) {
7600            ## XML5: No parse error.
7601            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7602            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7603            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7604            
7605        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7606          $self->{line_prev} = $self->{line};
7607          $self->{column_prev} = $self->{column};
7608          $self->{column}++;
7609          $self->{nc}
7610              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7611        } else {
7612          $self->{set_nc}->($self);
7613        }
7614      
7615            return  ($self->{ct});
7616            redo A;
7617          } else {
7618            ## XML5: Not defined yet.
7619            if ($self->{ca}->{default} eq 'FIXED') {
7620              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7621            } else {
7622              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7623              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7624            }
7625            ## Reconsume.
7626            redo A;
7627          }
7628        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7629          if ($is_space->{$self->{nc}} or
7630              $self->{nc} == -1 or
7631              $self->{nc} == 0x003E) { # >
7632            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7633            ## Reconsume.
7634            redo A;
7635          } else {
7636            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7637            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7638            ## Reconsume.
7639            redo A;
7640          }
7641        } elsif ($self->{state} == NDATA_STATE) {
7642          ## ASCII case-insensitive
7643          if ($self->{nc} == [
7644                undef,
7645                0x0044, # D
7646                0x0041, # A
7647                0x0054, # T
7648              ]->[length $self->{kwd}] or
7649              $self->{nc} == [
7650                undef,
7651                0x0064, # d
7652                0x0061, # a
7653                0x0074, # t
7654              ]->[length $self->{kwd}]) {
7655            
7656            ## Stay in the state.
7657            $self->{kwd} .= chr $self->{nc};
7658            
7659        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7660          $self->{line_prev} = $self->{line};
7661          $self->{column_prev} = $self->{column};
7662          $self->{column}++;
7663          $self->{nc}
7664              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7665        } else {
7666          $self->{set_nc}->($self);
7667        }
7668      
7669            redo A;
7670          } elsif ((length $self->{kwd}) == 4 and
7671                   ($self->{nc} == 0x0041 or # A
7672                    $self->{nc} == 0x0061)) { # a
7673            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7674              
7675              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7676                              text => 'NDATA',
7677                              line => $self->{line_prev},
7678                              column => $self->{column_prev} - 4);
7679            } else {
7680              
7681            }
7682            $self->{state} = AFTER_NDATA_STATE;
7683            
7684        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7685          $self->{line_prev} = $self->{line};
7686          $self->{column_prev} = $self->{column};
7687          $self->{column}++;
7688          $self->{nc}
7689              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7690        } else {
7691          $self->{set_nc}->($self);
7692        }
7693      
7694            redo A;
7695          } else {
7696            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7697                            line => $self->{line_prev},
7698                            column => $self->{column_prev} + 1
7699                                - length $self->{kwd});
7700            
7701            $self->{state} = BOGUS_MD_STATE;
7702            ## Reconsume.
7703            redo A;
7704          }
7705        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7706          if ($is_space->{$self->{nc}}) {
7707            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7708            
7709        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7710          $self->{line_prev} = $self->{line};
7711          $self->{column_prev} = $self->{column};
7712          $self->{column}++;
7713          $self->{nc}
7714              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7715        } else {
7716          $self->{set_nc}->($self);
7717        }
7718      
7719            redo A;
7720          } elsif ($self->{nc} == 0x003E) { # >
7721            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7722            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7723            
7724        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7725          $self->{line_prev} = $self->{line};
7726          $self->{column_prev} = $self->{column};
7727          $self->{column}++;
7728          $self->{nc}
7729              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7730        } else {
7731          $self->{set_nc}->($self);
7732        }
7733      
7734            return  ($self->{ct}); # ENTITY
7735            redo A;
7736          } elsif ($self->{nc} == -1) {
7737            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7738            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7739            
7740        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7741          $self->{line_prev} = $self->{line};
7742          $self->{column_prev} = $self->{column};
7743          $self->{column}++;
7744          $self->{nc}
7745              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7746        } else {
7747          $self->{set_nc}->($self);
7748        }
7749      
7750            return  ($self->{ct}); # ENTITY
7751            redo A;
7752          } else {
7753            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7754                            line => $self->{line_prev},
7755                            column => $self->{column_prev} + 1
7756                                - length $self->{kwd});
7757            $self->{state} = BOGUS_MD_STATE;
7758            ## Reconsume.
7759            redo A;
7760          }
7761        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7762          if ($is_space->{$self->{nc}}) {
7763            ## Stay in the state.
7764            
7765        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7766          $self->{line_prev} = $self->{line};
7767          $self->{column_prev} = $self->{column};
7768          $self->{column}++;
7769          $self->{nc}
7770              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7771        } else {
7772          $self->{set_nc}->($self);
7773        }
7774      
7775            redo A;
7776          } elsif ($self->{nc} == 0x003E) { # >
7777            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7778            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7779            
7780        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7781          $self->{line_prev} = $self->{line};
7782          $self->{column_prev} = $self->{column};
7783          $self->{column}++;
7784          $self->{nc}
7785              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7786        } else {
7787          $self->{set_nc}->($self);
7788        }
7789      
7790            return  ($self->{ct}); # ENTITY
7791            redo A;
7792          } elsif ($self->{nc} == -1) {
7793            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7794            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7795            
7796        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7797          $self->{line_prev} = $self->{line};
7798          $self->{column_prev} = $self->{column};
7799          $self->{column}++;
7800          $self->{nc}
7801              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7802        } else {
7803          $self->{set_nc}->($self);
7804        }
7805      
7806            return  ($self->{ct}); # ENTITY
7807            redo A;
7808          } else {
7809            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7810            $self->{state} = NOTATION_NAME_STATE;
7811            
7812        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7813          $self->{line_prev} = $self->{line};
7814          $self->{column_prev} = $self->{column};
7815          $self->{column}++;
7816          $self->{nc}
7817              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7818        } else {
7819          $self->{set_nc}->($self);
7820        }
7821      
7822            redo A;
7823          }
7824        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7825          if ($is_space->{$self->{nc}}) {
7826            $self->{state} = AFTER_MD_DEF_STATE;
7827            
7828        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7829          $self->{line_prev} = $self->{line};
7830          $self->{column_prev} = $self->{column};
7831          $self->{column}++;
7832          $self->{nc}
7833              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7834        } else {
7835          $self->{set_nc}->($self);
7836        }
7837      
7838            redo A;
7839          } elsif ($self->{nc} == 0x003E) { # >
7840            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7841            
7842        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7843          $self->{line_prev} = $self->{line};
7844          $self->{column_prev} = $self->{column};
7845          $self->{column}++;
7846          $self->{nc}
7847              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7848        } else {
7849          $self->{set_nc}->($self);
7850        }
7851      
7852            return  ($self->{ct}); # ENTITY
7853            redo A;
7854          } elsif ($self->{nc} == -1) {
7855            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7856            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7857            
7858        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7859          $self->{line_prev} = $self->{line};
7860          $self->{column_prev} = $self->{column};
7861          $self->{column}++;
7862          $self->{nc}
7863              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7864        } else {
7865          $self->{set_nc}->($self);
7866        }
7867      
7868            return  ($self->{ct}); # ENTITY
7869            redo A;
7870          } else {
7871            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7872            ## Stay in the state.
7873            
7874        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7875          $self->{line_prev} = $self->{line};
7876          $self->{column_prev} = $self->{column};
7877          $self->{column}++;
7878          $self->{nc}
7879              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7880        } else {
7881          $self->{set_nc}->($self);
7882        }
7883      
7884            redo A;
7885          }
7886        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7887          if ($self->{nc} == 0x0022) { # "
7888            $self->{state} = AFTER_MD_DEF_STATE;
7889            
7890        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7891          $self->{line_prev} = $self->{line};
7892          $self->{column_prev} = $self->{column};
7893          $self->{column}++;
7894          $self->{nc}
7895              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7896        } else {
7897          $self->{set_nc}->($self);
7898        }
7899      
7900            redo A;
7901          } elsif ($self->{nc} == 0x0026) { # &
7902            $self->{prev_state} = $self->{state};
7903            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7904            $self->{entity_add} = 0x0022; # "
7905            
7906        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7907          $self->{line_prev} = $self->{line};
7908          $self->{column_prev} = $self->{column};
7909          $self->{column}++;
7910          $self->{nc}
7911              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7912        } else {
7913          $self->{set_nc}->($self);
7914        }
7915      
7916            redo A;
7917    ## TODO: %
7918          } elsif ($self->{nc} == -1) {
7919            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7920            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7921            ## Reconsume.
7922            return  ($self->{ct}); # ENTITY
7923            redo A;
7924          } else {
7925            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7926            
7927        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7928          $self->{line_prev} = $self->{line};
7929          $self->{column_prev} = $self->{column};
7930          $self->{column}++;
7931          $self->{nc}
7932              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7933        } else {
7934          $self->{set_nc}->($self);
7935        }
7936      
7937            redo A;
7938          }
7939        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7940          if ($self->{nc} == 0x0027) { # '
7941            $self->{state} = AFTER_MD_DEF_STATE;
7942            
7943        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7944          $self->{line_prev} = $self->{line};
7945          $self->{column_prev} = $self->{column};
7946          $self->{column}++;
7947          $self->{nc}
7948              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7949        } else {
7950          $self->{set_nc}->($self);
7951        }
7952      
7953            redo A;
7954          } elsif ($self->{nc} == 0x0026) { # &
7955            $self->{prev_state} = $self->{state};
7956            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7957            $self->{entity_add} = 0x0027; # '
7958            
7959        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7960          $self->{line_prev} = $self->{line};
7961          $self->{column_prev} = $self->{column};
7962          $self->{column}++;
7963          $self->{nc}
7964              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7965        } else {
7966          $self->{set_nc}->($self);
7967        }
7968      
7969            redo A;
7970    ## TODO: %
7971          } elsif ($self->{nc} == -1) {
7972            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7973            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7974            ## Reconsume.
7975            return  ($self->{ct}); # ENTITY
7976            redo A;
7977          } else {
7978            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7979            
7980        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7981          $self->{line_prev} = $self->{line};
7982          $self->{column_prev} = $self->{column};
7983          $self->{column}++;
7984          $self->{nc}
7985              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7986        } else {
7987          $self->{set_nc}->($self);
7988        }
7989      
7990            redo A;
7991          }
7992        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7993          if ($is_space->{$self->{nc}} or
7994              {
7995                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7996                $self->{entity_add} => 1,
7997              }->{$self->{nc}}) {
7998            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
7999                            line => $self->{line_prev},
8000                            column => $self->{column_prev}
8001                                + ($self->{nc} == -1 ? 1 : 0));
8002            ## Don't consume
8003            ## Return nothing.
8004            #
8005          } elsif ($self->{nc} == 0x0023) { # #
8006            $self->{ca} = $self->{ct};
8007            $self->{state} = ENTITY_HASH_STATE;
8008            $self->{kwd} = '#';
8009            
8010        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8011          $self->{line_prev} = $self->{line};
8012          $self->{column_prev} = $self->{column};
8013          $self->{column}++;
8014          $self->{nc}
8015              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8016        } else {
8017          $self->{set_nc}->($self);
8018        }
8019      
8020            redo A;
8021          } else {
8022            #
8023          }
8024    
8025          $self->{ct}->{value} .= '&';
8026          $self->{state} = $self->{prev_state};
8027          ## Reconsume.
8028          redo A;
8029        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
8030          if ($is_space->{$self->{nc}}) {
8031            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
8032            
8033        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8034          $self->{line_prev} = $self->{line};
8035          $self->{column_prev} = $self->{column};
8036          $self->{column}++;
8037          $self->{nc}
8038              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8039        } else {
8040          $self->{set_nc}->($self);
8041        }
8042      
8043            redo A;
8044          } elsif ($self->{nc} == 0x0028) { # (
8045            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8046            $self->{ct}->{content} = ['('];
8047            $self->{group_depth} = 1;
8048            
8049        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8050          $self->{line_prev} = $self->{line};
8051          $self->{column_prev} = $self->{column};
8052          $self->{column}++;
8053          $self->{nc}
8054              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8055        } else {
8056          $self->{set_nc}->($self);
8057        }
8058      
8059            redo A;
8060          } elsif ($self->{nc} == 0x003E) { # >
8061            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
8062            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8063            
8064        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8065          $self->{line_prev} = $self->{line};
8066          $self->{column_prev} = $self->{column};
8067          $self->{column}++;
8068          $self->{nc}
8069              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8070        } else {
8071          $self->{set_nc}->($self);
8072        }
8073      
8074            return  ($self->{ct}); # ELEMENT
8075            redo A;
8076          } elsif ($self->{nc} == -1) {
8077            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8078            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8079            
8080        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8081          $self->{line_prev} = $self->{line};
8082          $self->{column_prev} = $self->{column};
8083          $self->{column}++;
8084          $self->{nc}
8085              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8086        } else {
8087          $self->{set_nc}->($self);
8088        }
8089      
8090            return  ($self->{ct}); # ELEMENT
8091            redo A;
8092          } else {
8093            $self->{ct}->{content} = [chr $self->{nc}];
8094            $self->{state} = CONTENT_KEYWORD_STATE;
8095            
8096        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8097          $self->{line_prev} = $self->{line};
8098          $self->{column_prev} = $self->{column};
8099          $self->{column}++;
8100          $self->{nc}
8101              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8102        } else {
8103          $self->{set_nc}->($self);
8104        }
8105      
8106            redo A;
8107          }
8108        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8109          if ($is_space->{$self->{nc}}) {
8110            $self->{state} = AFTER_MD_DEF_STATE;
8111            
8112        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8113          $self->{line_prev} = $self->{line};
8114          $self->{column_prev} = $self->{column};
8115          $self->{column}++;
8116          $self->{nc}
8117              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8118        } else {
8119          $self->{set_nc}->($self);
8120        }
8121      
8122            redo A;
8123          } elsif ($self->{nc} == 0x003E) { # >
8124            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8125            
8126        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8127          $self->{line_prev} = $self->{line};
8128          $self->{column_prev} = $self->{column};
8129          $self->{column}++;
8130          $self->{nc}
8131              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8132        } else {
8133          $self->{set_nc}->($self);
8134        }
8135      
8136            return  ($self->{ct}); # ELEMENT
8137            redo A;
8138          } elsif ($self->{nc} == -1) {
8139            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8140            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8141            
8142        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8143          $self->{line_prev} = $self->{line};
8144          $self->{column_prev} = $self->{column};
8145          $self->{column}++;
8146          $self->{nc}
8147              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8148        } else {
8149          $self->{set_nc}->($self);
8150        }
8151      
8152            return  ($self->{ct}); # ELEMENT
8153            redo A;
8154          } else {
8155            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8156            ## Stay in the state.
8157            
8158        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8159          $self->{line_prev} = $self->{line};
8160          $self->{column_prev} = $self->{column};
8161          $self->{column}++;
8162          $self->{nc}
8163              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8164        } else {
8165          $self->{set_nc}->($self);
8166        }
8167      
8168            redo A;
8169          }
8170        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8171          if ($is_space->{$self->{nc}}) {
8172            ## Stay in the state.
8173            
8174        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8175          $self->{line_prev} = $self->{line};
8176          $self->{column_prev} = $self->{column};
8177          $self->{column}++;
8178          $self->{nc}
8179              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8180        } else {
8181          $self->{set_nc}->($self);
8182        }
8183      
8184            redo A;
8185          } elsif ($self->{nc} == 0x0028) { # (
8186            $self->{group_depth}++;
8187            push @{$self->{ct}->{content}}, chr $self->{nc};
8188            ## Stay in the state.
8189            
8190        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8191          $self->{line_prev} = $self->{line};
8192          $self->{column_prev} = $self->{column};
8193          $self->{column}++;
8194          $self->{nc}
8195              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8196        } else {
8197          $self->{set_nc}->($self);
8198        }
8199      
8200            redo A;
8201          } elsif ($self->{nc} == 0x007C or # |
8202                   $self->{nc} == 0x002C) { # ,
8203            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8204            ## Stay in the state.
8205            
8206        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8207          $self->{line_prev} = $self->{line};
8208          $self->{column_prev} = $self->{column};
8209          $self->{column}++;
8210          $self->{nc}
8211              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8212        } else {
8213          $self->{set_nc}->($self);
8214        }
8215      
8216            redo A;
8217          } elsif ($self->{nc} == 0x0029) { # )
8218            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8219            push @{$self->{ct}->{content}}, chr $self->{nc};
8220            $self->{group_depth}--;
8221            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8222            
8223        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8224          $self->{line_prev} = $self->{line};
8225          $self->{column_prev} = $self->{column};
8226          $self->{column}++;
8227          $self->{nc}
8228              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8229        } else {
8230          $self->{set_nc}->($self);
8231        }
8232      
8233            redo A;
8234          } elsif ($self->{nc} == 0x003E) { # >
8235            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8236            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8237            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8238            
8239        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8240          $self->{line_prev} = $self->{line};
8241          $self->{column_prev} = $self->{column};
8242          $self->{column}++;
8243          $self->{nc}
8244              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8245        } else {
8246          $self->{set_nc}->($self);
8247        }
8248      
8249            return  ($self->{ct}); # ELEMENT
8250            redo A;
8251          } elsif ($self->{nc} == -1) {
8252            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8253            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8254            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8255            
8256        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8257          $self->{line_prev} = $self->{line};
8258          $self->{column_prev} = $self->{column};
8259          $self->{column}++;
8260          $self->{nc}
8261              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8262        } else {
8263          $self->{set_nc}->($self);
8264        }
8265      
8266            return  ($self->{ct}); # ELEMENT
8267            redo A;
8268          } else {
8269            push @{$self->{ct}->{content}}, chr $self->{nc};
8270            $self->{state} = CM_ELEMENT_NAME_STATE;
8271            
8272        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8273          $self->{line_prev} = $self->{line};
8274          $self->{column_prev} = $self->{column};
8275          $self->{column}++;
8276          $self->{nc}
8277              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8278        } else {
8279          $self->{set_nc}->($self);
8280        }
8281      
8282            redo A;
8283          }
8284        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8285          if ($is_space->{$self->{nc}}) {
8286            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8287            
8288        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8289          $self->{line_prev} = $self->{line};
8290          $self->{column_prev} = $self->{column};
8291          $self->{column}++;
8292          $self->{nc}
8293              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8294        } else {
8295          $self->{set_nc}->($self);
8296        }
8297      
8298            redo A;
8299          } elsif ($self->{nc} == 0x002A or # *
8300                   $self->{nc} == 0x002B or # +
8301                   $self->{nc} == 0x003F) { # ?
8302            push @{$self->{ct}->{content}}, chr $self->{nc};
8303            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8304            
8305        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8306          $self->{line_prev} = $self->{line};
8307          $self->{column_prev} = $self->{column};
8308          $self->{column}++;
8309          $self->{nc}
8310              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8311        } else {
8312          $self->{set_nc}->($self);
8313        }
8314      
8315            redo A;
8316          } elsif ($self->{nc} == 0x007C or # |
8317                   $self->{nc} == 0x002C) { # ,
8318            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8319            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8320            
8321        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8322          $self->{line_prev} = $self->{line};
8323          $self->{column_prev} = $self->{column};
8324          $self->{column}++;
8325          $self->{nc}
8326              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8327        } else {
8328          $self->{set_nc}->($self);
8329        }
8330      
8331            redo A;
8332          } elsif ($self->{nc} == 0x0029) { # )
8333            $self->{group_depth}--;
8334            push @{$self->{ct}->{content}}, chr $self->{nc};
8335            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8336            
8337        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8338          $self->{line_prev} = $self->{line};
8339          $self->{column_prev} = $self->{column};
8340          $self->{column}++;
8341          $self->{nc}
8342              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8343        } else {
8344          $self->{set_nc}->($self);
8345        }
8346      
8347            redo A;
8348          } elsif ($self->{nc} == 0x003E) { # >
8349            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8350            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8351            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8352            
8353        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8354          $self->{line_prev} = $self->{line};
8355          $self->{column_prev} = $self->{column};
8356          $self->{column}++;
8357          $self->{nc}
8358              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8359        } else {
8360          $self->{set_nc}->($self);
8361        }
8362      
8363            return  ($self->{ct}); # ELEMENT
8364            redo A;
8365          } elsif ($self->{nc} == -1) {
8366            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8367            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8368            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8369            
8370        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8371          $self->{line_prev} = $self->{line};
8372          $self->{column_prev} = $self->{column};
8373          $self->{column}++;
8374          $self->{nc}
8375              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8376        } else {
8377          $self->{set_nc}->($self);
8378        }
8379      
8380            return  ($self->{ct}); # ELEMENT
8381            redo A;
8382          } else {
8383            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8384            ## Stay in the state.
8385            
8386        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8387          $self->{line_prev} = $self->{line};
8388          $self->{column_prev} = $self->{column};
8389          $self->{column}++;
8390          $self->{nc}
8391              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8392        } else {
8393          $self->{set_nc}->($self);
8394        }
8395      
8396            redo A;
8397          }
8398        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8399          if ($is_space->{$self->{nc}}) {
8400            ## Stay in the state.
8401            
8402        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8403          $self->{line_prev} = $self->{line};
8404          $self->{column_prev} = $self->{column};
8405          $self->{column}++;
8406          $self->{nc}
8407              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8408        } else {
8409          $self->{set_nc}->($self);
8410        }
8411      
8412            redo A;
8413          } elsif ($self->{nc} == 0x007C or # |
8414                   $self->{nc} == 0x002C) { # ,
8415            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8416            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8417            
8418        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8419          $self->{line_prev} = $self->{line};
8420          $self->{column_prev} = $self->{column};
8421          $self->{column}++;
8422          $self->{nc}
8423              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8424        } else {
8425          $self->{set_nc}->($self);
8426        }
8427      
8428            redo A;
8429          } elsif ($self->{nc} == 0x0029) { # )
8430            $self->{group_depth}--;
8431            push @{$self->{ct}->{content}}, chr $self->{nc};
8432            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8433            
8434        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8435          $self->{line_prev} = $self->{line};
8436          $self->{column_prev} = $self->{column};
8437          $self->{column}++;
8438          $self->{nc}
8439              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8440        } else {
8441          $self->{set_nc}->($self);
8442        }
8443      
8444            redo A;
8445          } elsif ($self->{nc} == 0x003E) { # >
8446            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8447            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8448            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8449            
8450        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8451          $self->{line_prev} = $self->{line};
8452          $self->{column_prev} = $self->{column};
8453          $self->{column}++;
8454          $self->{nc}
8455              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8456        } else {
8457          $self->{set_nc}->($self);
8458        }
8459      
8460            return  ($self->{ct}); # ELEMENT
8461            redo A;
8462          } elsif ($self->{nc} == -1) {
8463            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8464            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8465            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8466            
8467        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8468          $self->{line_prev} = $self->{line};
8469          $self->{column_prev} = $self->{column};
8470          $self->{column}++;
8471          $self->{nc}
8472              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8473        } else {
8474          $self->{set_nc}->($self);
8475        }
8476      
8477            return  ($self->{ct}); # ELEMENT
8478            redo A;
8479          } else {
8480            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8481            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8482            $self->{state} = BOGUS_MD_STATE;
8483                    
8484      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8485        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5028  sub _get_next_token ($) { Line 8493  sub _get_next_token ($) {
8493        
8494          redo A;          redo A;
8495        }        }
8496        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8497          if ($is_space->{$self->{nc}}) {
8498            if ($self->{group_depth}) {
8499              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8500            } else {
8501              $self->{state} = AFTER_MD_DEF_STATE;
8502            }
8503            
8504        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8505          $self->{line_prev} = $self->{line};
8506          $self->{column_prev} = $self->{column};
8507          $self->{column}++;
8508          $self->{nc}
8509              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8510        } else {
8511          $self->{set_nc}->($self);
8512        }
8513      
8514            redo A;
8515          } elsif ($self->{nc} == 0x002A or # *
8516                   $self->{nc} == 0x002B or # +
8517                   $self->{nc} == 0x003F) { # ?
8518            push @{$self->{ct}->{content}}, chr $self->{nc};
8519            if ($self->{group_depth}) {
8520              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8521            } else {
8522              $self->{state} = AFTER_MD_DEF_STATE;
8523            }
8524            
8525        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8526          $self->{line_prev} = $self->{line};
8527          $self->{column_prev} = $self->{column};
8528          $self->{column}++;
8529          $self->{nc}
8530              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8531        } else {
8532          $self->{set_nc}->($self);
8533        }
8534      
8535            redo A;
8536          } elsif ($self->{nc} == 0x0029) { # )
8537            if ($self->{group_depth}) {
8538              $self->{group_depth}--;
8539              push @{$self->{ct}->{content}}, chr $self->{nc};
8540              ## Stay in the state.
8541              
8542        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8543          $self->{line_prev} = $self->{line};
8544          $self->{column_prev} = $self->{column};
8545          $self->{column}++;
8546          $self->{nc}
8547              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8548        } else {
8549          $self->{set_nc}->($self);
8550        }
8551      
8552              redo A;
8553            } else {
8554              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8555              $self->{state} = BOGUS_MD_STATE;
8556              ## Reconsume.
8557              redo A;
8558            }
8559          } elsif ($self->{nc} == 0x003E) { # >
8560            if ($self->{group_depth}) {
8561              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8562              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8563            }
8564            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8565                    
8566        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8567          $self->{line_prev} = $self->{line};
8568          $self->{column_prev} = $self->{column};
8569          $self->{column}++;
8570          $self->{nc}
8571              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8572        } else {
8573          $self->{set_nc}->($self);
8574        }
8575      
8576            return  ($self->{ct}); # ELEMENT
8577            redo A;
8578          } elsif ($self->{nc} == -1) {
8579            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8580            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8581            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8582            
8583        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8584          $self->{line_prev} = $self->{line};
8585          $self->{column_prev} = $self->{column};
8586          $self->{column}++;
8587          $self->{nc}
8588              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8589        } else {
8590          $self->{set_nc}->($self);
8591        }
8592      
8593            return  ($self->{ct}); # ELEMENT
8594            redo A;
8595          } else {
8596            if ($self->{group_depth}) {
8597              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8598            } else {
8599              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8600              $self->{state} = BOGUS_MD_STATE;
8601            }
8602            ## Reconsume.
8603            redo A;
8604          }
8605        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8606          if ($is_space->{$self->{nc}}) {
8607            ## Stay in the state.
8608            
8609        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8610          $self->{line_prev} = $self->{line};
8611          $self->{column_prev} = $self->{column};
8612          $self->{column}++;
8613          $self->{nc}
8614              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8615        } else {
8616          $self->{set_nc}->($self);
8617        }
8618      
8619            redo A;
8620          } elsif ($self->{nc} == 0x003E) { # >
8621            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8622            
8623        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8624          $self->{line_prev} = $self->{line};
8625          $self->{column_prev} = $self->{column};
8626          $self->{column}++;
8627          $self->{nc}
8628              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8629        } else {
8630          $self->{set_nc}->($self);
8631        }
8632      
8633            return  ($self->{ct}); # ENTITY/ELEMENT
8634            redo A;
8635          } elsif ($self->{nc} == -1) {
8636            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8637            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8638            
8639        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8640          $self->{line_prev} = $self->{line};
8641          $self->{column_prev} = $self->{column};
8642          $self->{column}++;
8643          $self->{nc}
8644              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8645        } else {
8646          $self->{set_nc}->($self);
8647        }
8648      
8649            return  ($self->{ct}); # ENTITY/ELEMENT
8650            redo A;
8651          } else {
8652            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8653            $self->{state} = BOGUS_MD_STATE;
8654            ## Reconsume.
8655            redo A;
8656          }
8657        } elsif ($self->{state} == BOGUS_MD_STATE) {
8658          if ($self->{nc} == 0x003E) { # >
8659            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8660            
8661        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8662          $self->{line_prev} = $self->{line};
8663          $self->{column_prev} = $self->{column};
8664          $self->{column}++;
8665          $self->{nc}
8666              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8667        } else {
8668          $self->{set_nc}->($self);
8669        }
8670      
8671            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8672            redo A;
8673          } elsif ($self->{nc} == -1) {
8674            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8675            ## Reconsume.
8676            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8677            redo A;
8678          } else {
8679            ## Stay in the state.
8680            
8681        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8682          $self->{line_prev} = $self->{line};
8683          $self->{column_prev} = $self->{column};
8684          $self->{column}++;
8685          $self->{nc}
8686              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8687        } else {
8688          $self->{set_nc}->($self);
8689        }
8690      
8691            redo A;
8692          }
8693      } else {      } else {
8694        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8695      }      }
# Line 5039  sub _get_next_token ($) { Line 8700  sub _get_next_token ($) {
8700    
8701  1;  1;
8702  ## $Date$  ## $Date$
8703                                    

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.29

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24