/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13 by wakaba, Thu Oct 16 03:39:57 2008 UTC revision 1.32 by wakaba, Sat Sep 5 09:57:55 2009 UTC
# Line 16  BEGIN { Line 16  BEGIN {
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18      END_OF_DOCTYPE_TOKEN      END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 29  BEGIN { Line 34  BEGIN {
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36        END_OF_DOCTYPE_TOKEN        END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
# Line 45  sub END_OF_FILE_TOKEN () { 5 } Line 55  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } ## NOTE: XML only.  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58  sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only  sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65  ## XML5: XML5 has "empty tag token".  In this implementation, it is  ## XML5: XML5 has "empty tag token".  In this implementation, it is
66  ## represented as a start tag token with $self->{self_closing} flag  ## represented as a start tag token with $self->{self_closing} flag
# Line 90  sub COMMENT_START_STATE () { 14 } Line 105  sub COMMENT_START_STATE () { 14 }
105  sub COMMENT_START_DASH_STATE () { 15 }  sub COMMENT_START_DASH_STATE () { 15 }
106  sub COMMENT_STATE () { 16 }  sub COMMENT_STATE () { 16 }
107  sub COMMENT_END_STATE () { 17 }  sub COMMENT_END_STATE () { 17 }
108    sub COMMENT_END_BANG_STATE () { 102 }
109    sub COMMENT_END_SPACE_STATE () { 103 } ## LAST
110  sub COMMENT_END_DASH_STATE () { 18 }  sub COMMENT_END_DASH_STATE () { 18 }
111  sub BOGUS_COMMENT_STATE () { 19 }  sub BOGUS_COMMENT_STATE () { 19 }
112  sub DOCTYPE_STATE () { 20 }  sub DOCTYPE_STATE () { 20 }
# Line 136  sub PI_AFTER_STATE () { 55 } Line 153  sub PI_AFTER_STATE () { 55 }
153  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
154  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
155  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
156  sub DOCTYPE_TAG_STATE () { 59 }  sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
157  sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 60 }  sub DOCTYPE_TAG_STATE () { 60 }
158    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
159    sub MD_ATTLIST_STATE () { 62 }
160    sub MD_E_STATE () { 63 }
161    sub MD_ELEMENT_STATE () { 64 }
162    sub MD_ENTITY_STATE () { 65 }
163    sub MD_NOTATION_STATE () { 66 }
164    sub DOCTYPE_MD_STATE () { 67 }
165    sub BEFORE_MD_NAME_STATE () { 68 }
166    sub MD_NAME_STATE () { 69 }
167    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
168    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
171    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
172    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
173    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
174    sub ALLOWED_TOKEN_STATE () { 77 }
175    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
176    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
177    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
179    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
180    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
181    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
182    sub BEFORE_NDATA_STATE () { 85 }
183    sub NDATA_STATE () { 86 }
184    sub AFTER_NDATA_STATE () { 87 }
185    sub BEFORE_NOTATION_NAME_STATE () { 88 }
186    sub NOTATION_NAME_STATE () { 89 }
187    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
188    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
189    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
190    sub AFTER_ELEMENT_NAME_STATE () { 93 }
191    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
192    sub CONTENT_KEYWORD_STATE () { 95 }
193    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
194    sub CM_ELEMENT_NAME_STATE () { 97 }
195    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
196    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
197    sub AFTER_MD_DEF_STATE () { 100 }
198    sub BOGUS_MD_STATE () { 101 }
199    
200  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
201  ## list and descriptions)  ## list and descriptions)
# Line 1192  sub _get_next_token ($) { Line 1250  sub _get_next_token ($) {
1250          if ({          if ({
1251               0x0022 => 1, # "               0x0022 => 1, # "
1252               0x0027 => 1, # '               0x0027 => 1, # '
1253                 0x003C => 1, # <
1254               0x003D => 1, # =               0x003D => 1, # =
1255              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1256                        
# Line 1374  sub _get_next_token ($) { Line 1433  sub _get_next_token ($) {
1433    
1434          redo A;          redo A;
1435        } else {        } else {
1436          if ($self->{nc} == 0x0022 or # "          if ({
1437              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1438                 0x0027 => 1, # '
1439                 0x003C => 1, # <
1440                }->{$self->{nc}}) {
1441                        
1442            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1443            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 1546  sub _get_next_token ($) { Line 1608  sub _get_next_token ($) {
1608                        
1609          }          }
1610    
1611          if ($self->{nc} == 0x0022 or # "          if ({
1612              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1613                 0x0027 => 1, # '
1614                 0x003C => 1, # <
1615                }->{$self->{nc}}) {
1616                        
1617            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1618            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
# Line 1684  sub _get_next_token ($) { Line 1749  sub _get_next_token ($) {
1749    
1750          redo A;          redo A;
1751        } else {        } else {
1752          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, <
1753                        
1754            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1755            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
# Line 1711  sub _get_next_token ($) { Line 1776  sub _get_next_token ($) {
1776          redo A;          redo A;
1777        }        }
1778      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1779        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1780          ## ATTLIST attribute value double quoted state".
1781                
1782        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1783                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1784          ## XML5: "Tag attribute name before state".            
1785          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1786              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1787              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1788            } else {
1789              
1790              ## XML5: "Tag attribute name before state".
1791              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1792            }
1793                    
1794      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1795        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1752  sub _get_next_token ($) { Line 1825  sub _get_next_token ($) {
1825      }      }
1826        
1827          redo A;          redo A;
1828          } elsif ($self->{is_xml} and
1829                   $is_space->{$self->{nc}}) {
1830            
1831            $self->{ca}->{value} .= ' ';
1832            ## Stay in the state.
1833            
1834        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1835          $self->{line_prev} = $self->{line};
1836          $self->{column_prev} = $self->{column};
1837          $self->{column}++;
1838          $self->{nc}
1839              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1840        } else {
1841          $self->{set_nc}->($self);
1842        }
1843      
1844            redo A;
1845        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1846          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1847          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1848                        
1849            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1850    
1851              $self->{state} = DATA_STATE;
1852              $self->{s_kwd} = '';
1853              ## reconsume
1854              return  ($self->{ct}); # start tag
1855              redo A;
1856          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1857            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1858            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1766  sub _get_next_token ($) { Line 1862  sub _get_next_token ($) {
1862              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1863                            
1864            }            }
1865    
1866              $self->{state} = DATA_STATE;
1867              $self->{s_kwd} = '';
1868              ## reconsume
1869              return  ($self->{ct}); # end tag
1870              redo A;
1871            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1872              ## XML5: No parse error above; not defined yet.
1873              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1874              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1875              ## Reconsume.
1876              return  ($self->{ct}); # ATTLIST
1877              redo A;
1878          } else {          } else {
1879            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1880          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1881        } else {        } else {
1882            ## XML5 [ATTLIST]: Not defined yet.
1883          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1884                        
1885            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1786  sub _get_next_token ($) { Line 1889  sub _get_next_token ($) {
1889          }          }
1890          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1891          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1892                                q["&<],                                qq["&<\x09\x0C\x20],
1893                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1894    
1895          ## Stay in the state          ## Stay in the state
# Line 1804  sub _get_next_token ($) { Line 1907  sub _get_next_token ($) {
1907          redo A;          redo A;
1908        }        }
1909      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1910        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1911          ## ATTLIST attribute value single quoted state".
1912    
1913        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1914                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1915          ## XML5: "Before attribute name state" (sic).            
1916          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1917              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1918              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1919            } else {
1920              
1921              ## XML5: "Before attribute name state" (sic).
1922              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1923            }
1924                    
1925      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1926        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1845  sub _get_next_token ($) { Line 1956  sub _get_next_token ($) {
1956      }      }
1957        
1958          redo A;          redo A;
1959          } elsif ($self->{is_xml} and
1960                   $is_space->{$self->{nc}}) {
1961            
1962            $self->{ca}->{value} .= ' ';
1963            ## Stay in the state.
1964            
1965        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1966          $self->{line_prev} = $self->{line};
1967          $self->{column_prev} = $self->{column};
1968          $self->{column}++;
1969          $self->{nc}
1970              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1971        } else {
1972          $self->{set_nc}->($self);
1973        }
1974      
1975            redo A;
1976        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1977          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1978          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1979                        
1980            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1981    
1982              $self->{state} = DATA_STATE;
1983              $self->{s_kwd} = '';
1984              ## reconsume
1985              return  ($self->{ct}); # start tag
1986              redo A;
1987          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1988            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1989            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1859  sub _get_next_token ($) { Line 1993  sub _get_next_token ($) {
1993              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1994                            
1995            }            }
1996    
1997              $self->{state} = DATA_STATE;
1998              $self->{s_kwd} = '';
1999              ## reconsume
2000              return  ($self->{ct}); # end tag
2001              redo A;
2002            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2003              ## XML5: No parse error above; not defined yet.
2004              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2005              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2006              ## Reconsume.
2007              return  ($self->{ct}); # ATTLIST
2008              redo A;
2009          } else {          } else {
2010            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2011          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2012        } else {        } else {
2013            ## XML5 [ATTLIST]: Not defined yet.
2014          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
2015                        
2016            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1879  sub _get_next_token ($) { Line 2020  sub _get_next_token ($) {
2020          }          }
2021          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2022          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2023                                q['&<],                                qq['&<\x09\x0C\x20],
2024                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2025    
2026          ## Stay in the state          ## Stay in the state
# Line 1900  sub _get_next_token ($) { Line 2041  sub _get_next_token ($) {
2041        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
2042    
2043        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2044                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2045          ## XML5: "Tag attribute name before state".            
2046          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2047              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2048            } else {
2049              
2050              ## XML5: "Tag attribute name before state".
2051              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2052            }
2053                    
2054      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2055        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1943  sub _get_next_token ($) { Line 2090  sub _get_next_token ($) {
2090          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2091                        
2092            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2093    
2094              $self->{state} = DATA_STATE;
2095              $self->{s_kwd} = '';
2096              
2097        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2098          $self->{line_prev} = $self->{line};
2099          $self->{column_prev} = $self->{column};
2100          $self->{column}++;
2101          $self->{nc}
2102              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2103        } else {
2104          $self->{set_nc}->($self);
2105        }
2106      
2107              return  ($self->{ct}); # start tag
2108              redo A;
2109          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2110            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2111            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1952  sub _get_next_token ($) { Line 2115  sub _get_next_token ($) {
2115              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2116                            
2117            }            }
2118          } else {  
2119            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2120          }            $self->{s_kwd} = '';
2121          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2122      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2123        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2124        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1968  sub _get_next_token ($) { Line 2129  sub _get_next_token ($) {
2129        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2130      }      }
2131        
2132              return  ($self->{ct}); # end tag
2133          return  ($self->{ct}); # start tag or end tag            redo A;
2134            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2135          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2136              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2137              
2138        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2139          $self->{line_prev} = $self->{line};
2140          $self->{column_prev} = $self->{column};
2141          $self->{column}++;
2142          $self->{nc}
2143              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2144        } else {
2145          $self->{set_nc}->($self);
2146        }
2147      
2148              return  ($self->{ct}); # ATTLIST
2149              redo A;
2150            } else {
2151              die "$0: $self->{ct}->{type}: Unknown token type";
2152            }
2153        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2154          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2155                        
2156              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2157            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2158    
2159              $self->{state} = DATA_STATE;
2160              $self->{s_kwd} = '';
2161              ## reconsume
2162              return  ($self->{ct}); # start tag
2163              redo A;
2164          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2165              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2166            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2167            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2168                            
# Line 1986  sub _get_next_token ($) { Line 2171  sub _get_next_token ($) {
2171              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2172                            
2173            }            }
2174    
2175              $self->{state} = DATA_STATE;
2176              $self->{s_kwd} = '';
2177              ## reconsume
2178              return  ($self->{ct}); # end tag
2179              redo A;
2180            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2181              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2182              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2183              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2184              ## Reconsume.
2185              return  ($self->{ct}); # ATTLIST
2186              redo A;
2187          } else {          } else {
2188            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2189          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2190        } else {        } else {
2191          if ({          if ({
2192               0x0022 => 1, # "               0x0022 => 1, # "
2193               0x0027 => 1, # '               0x0027 => 1, # '
2194               0x003D => 1, # =               0x003D => 1, # =
2195                 0x003C => 1, # <
2196              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2197                        
2198            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 2010  sub _get_next_token ($) { Line 2202  sub _get_next_token ($) {
2202          }          }
2203          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2204          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2205                                q["'=& >],                                qq["'=& \x09\x0C>],
2206                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2207    
2208          ## Stay in the state          ## Stay in the state
# Line 2188  sub _get_next_token ($) { Line 2380  sub _get_next_token ($) {
2380          redo A;          redo A;
2381        }        }
2382      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2383          ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2384    
2385        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2386        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2387                
# Line 2249  sub _get_next_token ($) { Line 2443  sub _get_next_token ($) {
2443          redo A;          redo A;
2444        }        }
2445      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2446        ## XML5: "Markup declaration state" and "DOCTYPE markup        ## XML5: "Markup declaration state".
       ## declaration state".  
2447                
2448        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2449                    
# Line 2648  sub _get_next_token ($) { Line 2841  sub _get_next_token ($) {
2841          redo A;          redo A;
2842        }        }
2843      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2844          ## XML5: "Comment state" and "DOCTYPE comment state".
2845    
2846        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2847                    
2848          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2700  sub _get_next_token ($) { Line 2895  sub _get_next_token ($) {
2895          redo A;          redo A;
2896        }        }
2897      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2898        ## XML5: "comment dash state".        ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2899    
2900        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2901                    
# Line 2749  sub _get_next_token ($) { Line 2944  sub _get_next_token ($) {
2944        
2945          redo A;          redo A;
2946        }        }
2947      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE or
2948                 $self->{state} == COMMENT_END_BANG_STATE) {
2949          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2950          ## (No comment end bang state.)
2951    
2952        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2953          if ($self->{in_subset}) {          if ($self->{in_subset}) {
2954                        
# Line 2775  sub _get_next_token ($) { Line 2974  sub _get_next_token ($) {
2974    
2975          redo A;          redo A;
2976        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2977            if ($self->{state} == COMMENT_END_BANG_STATE) {
2978              
2979              $self->{ct}->{data} .= '--!'; # comment
2980              $self->{state} = COMMENT_END_DASH_STATE;
2981            } else {
2982              
2983              ## XML5: Not a parse error.
2984              $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2985                              line => $self->{line_prev},
2986                              column => $self->{column_prev});
2987              $self->{ct}->{data} .= '-'; # comment
2988              ## Stay in the state
2989            }
2990                    
2991          ## XML5: Not a parse error.      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2992          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',        $self->{line_prev} = $self->{line};
2993                          line => $self->{line_prev},        $self->{column_prev} = $self->{column};
2994                          column => $self->{column_prev});        $self->{column}++;
2995          $self->{ct}->{data} .= '-'; # comment        $self->{nc}
2996          ## Stay in the state            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2997        } else {
2998          $self->{set_nc}->($self);
2999        }
3000      
3001            redo A;
3002          } elsif ($self->{state} != COMMENT_END_BANG_STATE and
3003                   $is_space->{$self->{nc}}) {
3004            
3005            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end space'); # XXX error type
3006            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3007            $self->{state} = COMMENT_END_SPACE_STATE;
3008            
3009        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3010          $self->{line_prev} = $self->{line};
3011          $self->{column_prev} = $self->{column};
3012          $self->{column}++;
3013          $self->{nc}
3014              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3015        } else {
3016          $self->{set_nc}->($self);
3017        }
3018      
3019            redo A;
3020          } elsif ($self->{state} != COMMENT_END_BANG_STATE and
3021                   $self->{nc} == 0x0021) { # !
3022            
3023            $self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end bang'); # XXX error type
3024            $self->{state} = COMMENT_END_BANG_STATE;
3025                    
3026      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3027        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2804  sub _get_next_token ($) { Line 3044  sub _get_next_token ($) {
3044            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
3045            $self->{s_kwd} = '';            $self->{s_kwd} = '';
3046          }          }
3047          ## reconsume          ## Reconsume.
3048    
3049          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
3050    
3051          redo A;          redo A;
3052        } else {        } else {
3053                    
3054          ## XML5: Not a parse error.          if ($self->{state} == COMMENT_END_BANG_STATE) {
3055          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',            $self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment
3056                          line => $self->{line_prev},          } else {
3057                          column => $self->{column_prev});            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3058          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          }
3059          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3060                    
3061      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2830  sub _get_next_token ($) { Line 3070  sub _get_next_token ($) {
3070        
3071          redo A;          redo A;
3072        }        }
3073        } elsif ($self->{state} == COMMENT_END_SPACE_STATE) {
3074          ## XML5: Not exist.
3075    
3076          if ($self->{nc} == 0x003E) { # >
3077            if ($self->{in_subset}) {
3078              
3079              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3080            } else {
3081              
3082              $self->{state} = DATA_STATE;
3083              $self->{s_kwd} = '';
3084            }
3085            
3086        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3087          $self->{line_prev} = $self->{line};
3088          $self->{column_prev} = $self->{column};
3089          $self->{column}++;
3090          $self->{nc}
3091              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3092        } else {
3093          $self->{set_nc}->($self);
3094        }
3095      
3096    
3097            return  ($self->{ct}); # comment
3098    
3099            redo A;
3100          } elsif ($is_space->{$self->{nc}}) {
3101            
3102            $self->{ct}->{data} .= chr ($self->{nc}); # comment
3103            ## Stay in the state.
3104            
3105        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3106          $self->{line_prev} = $self->{line};
3107          $self->{column_prev} = $self->{column};
3108          $self->{column}++;
3109          $self->{nc}
3110              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3111        } else {
3112          $self->{set_nc}->($self);
3113        }
3114      
3115            redo A;
3116          } elsif ($self->{nc} == -1) {
3117            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
3118            if ($self->{in_subset}) {
3119              
3120              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3121            } else {
3122              
3123              $self->{state} = DATA_STATE;
3124              $self->{s_kwd} = '';
3125            }
3126            ## Reconsume.
3127    
3128            return  ($self->{ct}); # comment
3129    
3130            redo A;
3131          } else {
3132            
3133            $self->{ct}->{data} .= chr ($self->{nc}); # comment
3134            $self->{state} = COMMENT_STATE;
3135            
3136        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3137          $self->{line_prev} = $self->{line};
3138          $self->{column_prev} = $self->{column};
3139          $self->{column}++;
3140          $self->{nc}
3141              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3142        } else {
3143          $self->{set_nc}->($self);
3144        }
3145      
3146            redo A;
3147          }
3148      } elsif ($self->{state} == DOCTYPE_STATE) {      } elsif ($self->{state} == DOCTYPE_STATE) {
3149        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3150                    
# Line 2846  sub _get_next_token ($) { Line 3161  sub _get_next_token ($) {
3161      }      }
3162        
3163          redo A;          redo A;
3164          } elsif ($self->{nc} == -1) {
3165            
3166            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3167            $self->{ct}->{quirks} = 1;
3168    
3169            $self->{state} = DATA_STATE;
3170            ## Reconsume.
3171            return  ($self->{ct}); # DOCTYPE (quirks)
3172    
3173            redo A;
3174        } else {        } else {
3175                    
3176          ## XML5: Unless EOF, swith to the bogus comment state.          ## XML5: Swith to the bogus comment state.
3177          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3178          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3179          ## reconsume          ## reconsume
# Line 2893  sub _get_next_token ($) { Line 3218  sub _get_next_token ($) {
3218          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3219    
3220          redo A;          redo A;
3221          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3222            
3223            $self->{ct}->{name} # DOCTYPE
3224                = chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3225            delete $self->{ct}->{quirks};
3226            $self->{state} = DOCTYPE_NAME_STATE;
3227            
3228        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3229          $self->{line_prev} = $self->{line};
3230          $self->{column_prev} = $self->{column};
3231          $self->{column}++;
3232          $self->{nc}
3233              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3234        } else {
3235          $self->{set_nc}->($self);
3236        }
3237      
3238            redo A;
3239        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3240                    
3241          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
# Line 2979  sub _get_next_token ($) { Line 3322  sub _get_next_token ($) {
3322          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3323    
3324          redo A;          redo A;
3325          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
3326            
3327            $self->{ct}->{name} # DOCTYPE
3328                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
3329            delete $self->{ct}->{quirks};
3330            ## Stay in the state.
3331            
3332        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3333          $self->{line_prev} = $self->{line};
3334          $self->{column_prev} = $self->{column};
3335          $self->{column}++;
3336          $self->{nc}
3337              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3338        } else {
3339          $self->{set_nc}->($self);
3340        }
3341      
3342            redo A;
3343        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3344                    
3345          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
# Line 3010  sub _get_next_token ($) { Line 3371  sub _get_next_token ($) {
3371          redo A;          redo A;
3372        } else {        } else {
3373                    
3374          $self->{ct}->{name}          $self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE
3375            .= chr ($self->{nc}); # DOCTYPE          ## Stay in the state.
         ## Stay in the state  
3376                    
3377      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3378        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3046  sub _get_next_token ($) { Line 3406  sub _get_next_token ($) {
3406        
3407          redo A;          redo A;
3408        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3409            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3410              
3411              $self->{state} = DATA_STATE;
3412              $self->{s_kwd} = '';
3413            } else {
3414              
3415              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3416              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3417            }
3418                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3419                    
3420      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3421        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3060  sub _get_next_token ($) { Line 3427  sub _get_next_token ($) {
3427        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3428      }      }
3429        
3430            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3431          redo A;          redo A;
3432        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3433            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3434              
3435              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3436              $self->{state} = DATA_STATE;
3437              $self->{s_kwd} = '';
3438              $self->{ct}->{quirks} = 1;
3439            } else {
3440              
3441              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3442              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3443            }
3444                    
3445          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3446          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3447          redo A;          redo A;
3448        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3449                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
# Line 3109  sub _get_next_token ($) { Line 3479  sub _get_next_token ($) {
3479      }      }
3480        
3481          redo A;          redo A;
3482        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{nc} == 0x0022 and # "
3483                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3484                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3485            
3486            $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3487            $self->{ct}->{value} = ''; # ENTITY
3488            
3489        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3490          $self->{line_prev} = $self->{line};
3491          $self->{column_prev} = $self->{column};
3492          $self->{column}++;
3493          $self->{nc}
3494              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3495        } else {
3496          $self->{set_nc}->($self);
3497        }
3498      
3499            redo A;
3500          } elsif ($self->{nc} == 0x0027 and # '
3501                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3502                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3503            
3504            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3505            $self->{ct}->{value} = ''; # ENTITY
3506            
3507        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3508          $self->{line_prev} = $self->{line};
3509          $self->{column_prev} = $self->{column};
3510          $self->{column}++;
3511          $self->{nc}
3512              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3513        } else {
3514          $self->{set_nc}->($self);
3515        }
3516      
3517            redo A;
3518          } elsif ($self->{is_xml} and
3519                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3520                   $self->{nc} == 0x005B) { # [
3521                    
3522          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3523          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 3128  sub _get_next_token ($) { Line 3536  sub _get_next_token ($) {
3536          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3537          redo A;          redo A;
3538        } else {        } else {
3539                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3540          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');  
3541          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3542              
3543              $self->{ct}->{quirks} = 1;
3544              $self->{state} = BOGUS_DOCTYPE_STATE;
3545            } else {
3546              
3547              $self->{state} = BOGUS_MD_STATE;
3548            }
3549    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3550                    
3551      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3552        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3204  sub _get_next_token ($) { Line 3618  sub _get_next_token ($) {
3618        
3619          redo A;          redo A;
3620        } else {        } else {
3621                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3622                          line => $self->{line_prev},                          line => $self->{line_prev},
3623                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3624          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3625              
3626          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3627              $self->{state} = BOGUS_DOCTYPE_STATE;
3628            } else {
3629              
3630              $self->{state} = BOGUS_MD_STATE;
3631            }
3632          ## Reconsume.          ## Reconsume.
3633          redo A;          redo A;
3634        }        }
# Line 3272  sub _get_next_token ($) { Line 3690  sub _get_next_token ($) {
3690        
3691          redo A;          redo A;
3692        } else {        } else {
3693                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3694                          line => $self->{line_prev},                          line => $self->{line_prev},
3695                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3696          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3697              
3698          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3699              $self->{state} = BOGUS_DOCTYPE_STATE;
3700            } else {
3701              
3702              $self->{state} = BOGUS_MD_STATE;
3703            }
3704          ## Reconsume.          ## Reconsume.
3705          redo A;          redo A;
3706        }        }
# Line 3331  sub _get_next_token ($) { Line 3753  sub _get_next_token ($) {
3753        
3754          redo A;          redo A;
3755        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3756          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3757            
3758          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3759          $self->{s_kwd} = '';            
3760              $self->{state} = DATA_STATE;
3761              $self->{s_kwd} = '';
3762              $self->{ct}->{quirks} = 1;
3763            } else {
3764              
3765              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3766            }
3767            
3768                    
3769      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3770        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3347  sub _get_next_token ($) { Line 3776  sub _get_next_token ($) {
3776        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3777      }      }
3778        
3779            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3780          redo A;          redo A;
3781        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3782            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3783              
3784              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3785              $self->{state} = DATA_STATE;
3786              $self->{s_kwd} = '';
3787              $self->{ct}->{quirks} = 1;
3788            } else {
3789              
3790              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3791              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3792            }
3793                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3794          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3795          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3796          redo A;          redo A;
3797        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3798                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3799                   $self->{nc} == 0x005B) { # [
3800                    
3801          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3802          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3384  sub _get_next_token ($) { Line 3816  sub _get_next_token ($) {
3816          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3817          redo A;          redo A;
3818        } else {        } else {
           
3819          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3820    
3821          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3822              
3823              $self->{ct}->{quirks} = 1;
3824              $self->{state} = BOGUS_DOCTYPE_STATE;
3825            } else {
3826              
3827              $self->{state} = BOGUS_MD_STATE;
3828            }
3829    
3830                    
3831      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3832        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3419  sub _get_next_token ($) { Line 3857  sub _get_next_token ($) {
3857        
3858          redo A;          redo A;
3859        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3860          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3861    
3862          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3863          $self->{s_kwd} = '';            
3864              $self->{state} = DATA_STATE;
3865              $self->{s_kwd} = '';
3866              $self->{ct}->{quirks} = 1;
3867            } else {
3868              
3869              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3870            }
3871    
3872                    
3873      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3874        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3435  sub _get_next_token ($) { Line 3880  sub _get_next_token ($) {
3880        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3881      }      }
3882        
3883            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3884          redo A;          redo A;
3885        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3886          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3887    
3888          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3889          $self->{s_kwd} = '';            
3890          ## reconsume            $self->{state} = DATA_STATE;
3891              $self->{s_kwd} = '';
3892          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3893            } else {
3894              
3895              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3896            }
3897            
3898            ## Reconsume.
3899          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3900          redo A;          redo A;
3901        } else {        } else {
3902                    
3903          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3904          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3905                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3906    
# Line 3490  sub _get_next_token ($) { Line 3935  sub _get_next_token ($) {
3935        
3936          redo A;          redo A;
3937        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3938          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3939    
3940          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3941          $self->{s_kwd} = '';            
3942              $self->{state} = DATA_STATE;
3943              $self->{s_kwd} = '';
3944              $self->{ct}->{quirks} = 1;
3945            } else {
3946              
3947              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3948            }
3949    
3950                    
3951      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3952        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3506  sub _get_next_token ($) { Line 3958  sub _get_next_token ($) {
3958        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3959      }      }
3960        
3961            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3962          redo A;          redo A;
3963        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3964          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3965    
3966          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3967          $self->{s_kwd} = '';            
3968              $self->{state} = DATA_STATE;
3969              $self->{s_kwd} = '';
3970              $self->{ct}->{quirks} = 1;
3971            } else {
3972              
3973              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3974            }
3975          
3976          ## reconsume          ## reconsume
3977            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3978          redo A;          redo A;
3979        } else {        } else {
3980                    
3981          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3982          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3983                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3984    
# Line 3562  sub _get_next_token ($) { Line 4014  sub _get_next_token ($) {
4014          redo A;          redo A;
4015        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
4016                    
4017          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
4018          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
4019                    
4020      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3578  sub _get_next_token ($) { Line 4030  sub _get_next_token ($) {
4030          redo A;          redo A;
4031        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
4032                    
4033          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
4034          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
4035                    
4036      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3593  sub _get_next_token ($) { Line 4045  sub _get_next_token ($) {
4045        
4046          redo A;          redo A;
4047        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4048          if ($self->{is_xml}) {          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4049                        if ($self->{is_xml}) {
4050            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');              
4051                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4052              } else {
4053                
4054              }
4055              $self->{state} = DATA_STATE;
4056              $self->{s_kwd} = '';
4057          } else {          } else {
4058                        if ($self->{ct}->{type} == NOTATION_TOKEN) {
4059                
4060              } else {
4061                
4062                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
4063              }
4064              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4065          }          }
4066          $self->{state} = DATA_STATE;          
         $self->{s_kwd} = '';  
4067                    
4068      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4069        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3612  sub _get_next_token ($) { Line 4075  sub _get_next_token ($) {
4075        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4076      }      }
4077        
4078            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
4079          redo A;          redo A;
4080        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4081            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4082              
4083              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4084              
4085              $self->{state} = DATA_STATE;
4086              $self->{s_kwd} = '';
4087              $self->{ct}->{quirks} = 1;
4088            } else {
4089              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4090              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4091            }
4092                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4093          ## reconsume          ## reconsume
4094            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4095          redo A;          redo A;
4096        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4097                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4098                   $self->{nc} == 0x005B) { # [
4099                    
4100          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4101          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3648  sub _get_next_token ($) { Line 4115  sub _get_next_token ($) {
4115          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4116          redo A;          redo A;
4117        } else {        } else {
           
4118          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
4119    
4120          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4121              
4122              $self->{ct}->{quirks} = 1;
4123              $self->{state} = BOGUS_DOCTYPE_STATE;
4124            } else {
4125              
4126              $self->{state} = BOGUS_MD_STATE;
4127            }
4128    
4129                    
4130      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4131        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3715  sub _get_next_token ($) { Line 4188  sub _get_next_token ($) {
4188        
4189          redo A;          redo A;
4190        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
4191          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4192                    
4193      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4194        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3731  sub _get_next_token ($) { Line 4201  sub _get_next_token ($) {
4201      }      }
4202        
4203    
4204          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4205          return  ($self->{ct}); # DOCTYPE            
4206              $self->{state} = DATA_STATE;
4207              $self->{s_kwd} = '';
4208              $self->{ct}->{quirks} = 1;
4209            } else {
4210              
4211              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4212            }
4213    
4214            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4215          redo A;          redo A;
4216        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4217            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4218              
4219              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4220              $self->{state} = DATA_STATE;
4221              $self->{s_kwd} = '';
4222              $self->{ct}->{quirks} = 1;
4223            } else {
4224              
4225              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4226              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4227            }
4228                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4229          ## reconsume          ## reconsume
4230            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4231          redo A;          redo A;
4232        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4233                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4234                   $self->{nc} == 0x005B) { # [
4235                    
4236          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4237    
# Line 3768  sub _get_next_token ($) { Line 4252  sub _get_next_token ($) {
4252          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4253          redo A;          redo A;
4254        } else {        } else {
           
4255          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4256    
4257          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4258                        
4259              $self->{ct}->{quirks} = 1;
4260              $self->{state} = BOGUS_DOCTYPE_STATE;
4261            } else {
4262              
4263              $self->{state} = BOGUS_MD_STATE;
4264            }
4265    
4266                    
4267      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4268        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3803  sub _get_next_token ($) { Line 4293  sub _get_next_token ($) {
4293        
4294          redo A;          redo A;
4295        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4296          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4297    
4298          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4299          $self->{s_kwd} = '';            
4300              $self->{state} = DATA_STATE;
4301              $self->{s_kwd} = '';
4302              $self->{ct}->{quirks} = 1;
4303            } else {
4304              
4305              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4306            }
4307            
4308                    
4309      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4310        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3819  sub _get_next_token ($) { Line 4316  sub _get_next_token ($) {
4316        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4317      }      }
4318        
4319            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4320          redo A;          redo A;
4321        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4322          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4323    
4324          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4325          $self->{s_kwd} = '';            
4326              $self->{state} = DATA_STATE;
4327              $self->{s_kwd} = '';
4328              $self->{ct}->{quirks} = 1;
4329            } else {
4330              
4331              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4332            }
4333            
4334          ## reconsume          ## reconsume
4335            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4336          redo A;          redo A;
4337        } else {        } else {
4338                    
4339          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4340          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4341                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4342    
# Line 3896  sub _get_next_token ($) { Line 4393  sub _get_next_token ($) {
4393    
4394          redo A;          redo A;
4395        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4396          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4397    
4398          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4399          $self->{s_kwd} = '';            
4400          ## reconsume            $self->{state} = DATA_STATE;
4401              $self->{s_kwd} = '';
4402          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4403          return  ($self->{ct}); # DOCTYPE          } else {
4404              
4405              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4406            }
4407    
4408            ## reconsume
4409            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4410          redo A;          redo A;
4411        } else {        } else {
4412                    
4413          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4414          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4415                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4416    
# Line 3930  sub _get_next_token ($) { Line 4430  sub _get_next_token ($) {
4430        }        }
4431      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4432        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4433                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4434          ## Stay in the state            
4435              $self->{state} = BEFORE_NDATA_STATE;
4436            } else {
4437              
4438              ## Stay in the state
4439            }
4440                    
4441      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4442        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3945  sub _get_next_token ($) { Line 4450  sub _get_next_token ($) {
4450        
4451          redo A;          redo A;
4452        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4453            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4454              
4455              $self->{state} = DATA_STATE;
4456              $self->{s_kwd} = '';
4457            } else {
4458              
4459              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4460            }
4461    
4462                    
4463          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4464          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4465          $self->{column_prev} = $self->{column};
4466          $self->{column}++;
4467          $self->{nc}
4468              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4469        } else {
4470          $self->{set_nc}->($self);
4471        }
4472      
4473            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4474            redo A;
4475          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4476                   ($self->{nc} == 0x004E or # N
4477                    $self->{nc} == 0x006E)) { # n
4478            
4479            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4480            $self->{state} = NDATA_STATE;
4481            $self->{kwd} = chr $self->{nc};
4482                    
4483      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4484        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3959  sub _get_next_token ($) { Line 4490  sub _get_next_token ($) {
4490        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4491      }      }
4492        
   
         return  ($self->{ct}); # DOCTYPE  
   
4493          redo A;          redo A;
4494        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4495                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4496          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');            
4497          $self->{state} = DATA_STATE;            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4498          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4499          ## reconsume            $self->{s_kwd} = '';
4500              $self->{ct}->{quirks} = 1;
4501          $self->{ct}->{quirks} = 1;          } else {
4502          return  ($self->{ct}); # DOCTYPE            
4503              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4504              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4505            }
4506    
4507            ## reconsume
4508            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4509          redo A;          redo A;
4510        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4511                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4512                   $self->{nc} == 0x005B) { # [
4513                    
4514          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4515          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 3993  sub _get_next_token ($) { Line 4528  sub _get_next_token ($) {
4528          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4529          redo A;          redo A;
4530        } else {        } else {
           
4531          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
         #$self->{ct}->{quirks} = 1;  
4532    
4533          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4534              
4535              #$self->{ct}->{quirks} = 1;
4536              $self->{state} = BOGUS_DOCTYPE_STATE;
4537            } else {
4538              
4539              $self->{state} = BOGUS_MD_STATE;
4540            }
4541    
4542            
4543        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4544          $self->{line_prev} = $self->{line};
4545          $self->{column_prev} = $self->{column};
4546          $self->{column}++;
4547          $self->{nc}
4548              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4549        } else {
4550          $self->{set_nc}->($self);
4551        }
4552      
4553            redo A;
4554          }
4555        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4556          if ($is_space->{$self->{nc}}) {
4557            
4558            ## Stay in the state.
4559            
4560        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4561          $self->{line_prev} = $self->{line};
4562          $self->{column_prev} = $self->{column};
4563          $self->{column}++;
4564          $self->{nc}
4565              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4566        } else {
4567          $self->{set_nc}->($self);
4568        }
4569      
4570            redo A;
4571          } elsif ($self->{nc} == 0x003E) { # >
4572            
4573            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4574            
4575        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4576          $self->{line_prev} = $self->{line};
4577          $self->{column_prev} = $self->{column};
4578          $self->{column}++;
4579          $self->{nc}
4580              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4581        } else {
4582          $self->{set_nc}->($self);
4583        }
4584      
4585            return  ($self->{ct}); # ENTITY
4586            redo A;
4587          } elsif ($self->{nc} == 0x004E or # N
4588                   $self->{nc} == 0x006E) { # n
4589            
4590            $self->{state} = NDATA_STATE;
4591            $self->{kwd} = chr $self->{nc};
4592            
4593        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4594          $self->{line_prev} = $self->{line};
4595          $self->{column_prev} = $self->{column};
4596          $self->{column}++;
4597          $self->{nc}
4598              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4599        } else {
4600          $self->{set_nc}->($self);
4601        }
4602      
4603            redo A;
4604          } elsif ($self->{nc} == -1) {
4605            
4606            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4607            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4608            ## reconsume
4609            return  ($self->{ct}); # ENTITY
4610            redo A;
4611          } else {
4612            
4613            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4614            $self->{state} = BOGUS_MD_STATE;
4615                    
4616      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4617        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4221  sub _get_next_token ($) { Line 4835  sub _get_next_token ($) {
4835              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4836              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4837            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4838                    if ($self->{is_xml}) {
4839              
4840              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4841                              line => $self->{line_prev},
4842                              column => $self->{column_prev}
4843                                  + ($self->{nc} == -1 ? 1 : 0));
4844            } else {
4845              
4846              ## No error
4847            }
4848          ## Don't consume          ## Don't consume
         ## No error  
4849          ## Return nothing.          ## Return nothing.
4850          #          #
4851        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
# Line 4242  sub _get_next_token ($) { Line 4864  sub _get_next_token ($) {
4864      }      }
4865        
4866          redo A;          redo A;
4867        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4868                   (0x0041 <= $self->{nc} and
4869                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4870                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4871                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
# Line 4296  sub _get_next_token ($) { Line 4919  sub _get_next_token ($) {
4919          redo A;          redo A;
4920        }        }
4921      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4922        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
           $self->{nc} == 0x0058) { # X  
4923                    
4924          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4925          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
# Line 4313  sub _get_next_token ($) { Line 4935  sub _get_next_token ($) {
4935      }      }
4936        
4937          redo A;          redo A;
4938          } elsif ($self->{nc} == 0x0058) { # X
4939            
4940            if ($self->{is_xml}) {
4941              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4942            }
4943            $self->{state} = HEXREF_X_STATE;
4944            $self->{kwd} .= chr $self->{nc};
4945            
4946        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4947          $self->{line_prev} = $self->{line};
4948          $self->{column_prev} = $self->{column};
4949          $self->{column}++;
4950          $self->{nc}
4951              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4952        } else {
4953          $self->{set_nc}->($self);
4954        }
4955      
4956            redo A;
4957        } elsif (0x0030 <= $self->{nc} and        } elsif (0x0030 <= $self->{nc} and
4958                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4959                    
# Line 4403  sub _get_next_token ($) { Line 5044  sub _get_next_token ($) {
5044        my $code = $self->{kwd};        my $code = $self->{kwd};
5045        my $l = $self->{line_prev};        my $l = $self->{line_prev};
5046        my $c = $self->{column_prev};        my $c = $self->{column_prev};
5047        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
5048              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
5049              ($self->{is_xml} and $code == 0x0000)) {
5050                    
5051          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
5052                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4556  sub _get_next_token ($) { Line 5199  sub _get_next_token ($) {
5199        my $code = $self->{kwd};        my $code = $self->{kwd};
5200        my $l = $self->{line_prev};        my $l = $self->{line_prev};
5201        my $c = $self->{column_prev};        my $c = $self->{column_prev};
5202        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
5203              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
5204              ($self->{is_xml} and $code == 0x0000)) {
5205                    
5206          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
5207                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4590  sub _get_next_token ($) { Line 5235  sub _get_next_token ($) {
5235          redo A;          redo A;
5236        }        }
5237      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5238        if (length $self->{kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5239            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5240            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5241              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5242             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5243              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5244             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5245              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5246             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5247                    {
5248                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5249                      $self->{entity_add} => 1,
5250                    }->{$self->{nc}}))) {
5251          our $EntityChar;          our $EntityChar;
5252          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5253          if (defined $EntityChar->{$self->{kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5254                $self->{ge}->{$self->{kwd}}) {
5255            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5256                            if (defined $self->{ge}->{$self->{kwd}}) {
5257              $self->{entity__value} = $EntityChar->{$self->{kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5258                    
5259                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5260                  } else {
5261                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5262                      
5263                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5264                                      value => $self->{kwd});
5265                    } else {
5266                      
5267                    }
5268                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5269                  }
5270                } else {
5271                  if ($self->{is_xml}) {
5272                    
5273                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5274                                    value => $self->{kwd},
5275                                    level => {
5276                                              'amp;' => $self->{level}->{warn},
5277                                              'quot;' => $self->{level}->{warn},
5278                                              'lt;' => $self->{level}->{warn},
5279                                              'gt;' => $self->{level}->{warn},
5280                                              'apos;' => $self->{level}->{warn},
5281                                             }->{$self->{kwd}} ||
5282                                             $self->{level}->{must});
5283                  } else {
5284                    
5285                  }
5286                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5287                }
5288              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5289                            
5290      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4720  sub _get_next_token ($) { Line 5400  sub _get_next_token ($) {
5400      ## XML-only states      ## XML-only states
5401    
5402      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
5403          ## XML5: "Pi state" and "DOCTYPE pi state".
5404    
5405        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
5406            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
5407            $self->{nc} == -1) {            $self->{nc} == -1) {
5408            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5409            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5410            ## "DOCTYPE pi state": Parse error, switch to the "data
5411            ## state".
5412          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5413                          line => $self->{line_prev},                          line => $self->{line_prev},
5414                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 4737  sub _get_next_token ($) { Line 5423  sub _get_next_token ($) {
5423                        };                        };
5424          redo A;          redo A;
5425        } else {        } else {
5426            ## XML5: "DOCTYPE pi state": Stay in the state.
5427          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
5428                         target => chr $self->{nc},                         target => chr $self->{nc},
5429                         data => '',                         data => '',
# Line 4851  sub _get_next_token ($) { Line 5538  sub _get_next_token ($) {
5538        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5539          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5540          if ($self->{in_subset}) {          if ($self->{in_subset}) {
5541            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5542          } else {          } else {
5543            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
5544            $self->{s_kwd} = '';            $self->{s_kwd} = '';
# Line 4879  sub _get_next_token ($) { Line 5566  sub _get_next_token ($) {
5566          redo A;          redo A;
5567        }        }
5568      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
5569          ## XML5: Part of "Pi after state".
5570    
5571        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5572          if ($self->{in_subset}) {          if ($self->{in_subset}) {
5573            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 4928  sub _get_next_token ($) { Line 5617  sub _get_next_token ($) {
5617          redo A;          redo A;
5618        }        }
5619      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5620        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5621    
5622        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5623          if ($self->{in_subset}) {          if ($self->{in_subset}) {
5624            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 4990  sub _get_next_token ($) { Line 5680  sub _get_next_token ($) {
5680          ## XML5: Not defined yet.          ## XML5: Not defined yet.
5681    
5682          ## TODO:          ## TODO:
5683    
5684            if (not $self->{stop_processing} and
5685                not $self->{document}->xml_standalone) {
5686              $self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type
5687                              level => $self->{level}->{info});
5688              $self->{stop_processing} = 1;
5689            }
5690    
5691                    
5692      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5693        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5141  sub _get_next_token ($) { Line 5839  sub _get_next_token ($) {
5839        }        }
5840      } elsif ($self->{state} == DOCTYPE_TAG_STATE) {      } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5841        if ($self->{nc} == 0x0021) { # !        if ($self->{nc} == 0x0021) { # !
5842          $self->{state} = MARKUP_DECLARATION_OPEN_STATE;          $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5843                    
5844      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5845        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5195  sub _get_next_token ($) { Line 5893  sub _get_next_token ($) {
5893        
5894          redo A;          redo A;
5895        }        }
5896        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5897          ## XML5: "DOCTYPE markup declaration state".
5898          
5899          if ($self->{nc} == 0x002D) { # -
5900            $self->{state} = MD_HYPHEN_STATE;
5901                    
5902        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5903          $self->{line_prev} = $self->{line};
5904          $self->{column_prev} = $self->{column};
5905          $self->{column}++;
5906          $self->{nc}
5907              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5908        } else {
5909          $self->{set_nc}->($self);
5910        }
5911      
5912            redo A;
5913          } elsif ($self->{nc} == 0x0045 or # E
5914                   $self->{nc} == 0x0065) { # e
5915            $self->{state} = MD_E_STATE;
5916            $self->{kwd} = chr $self->{nc};
5917            
5918        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5919          $self->{line_prev} = $self->{line};
5920          $self->{column_prev} = $self->{column};
5921          $self->{column}++;
5922          $self->{nc}
5923              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5924        } else {
5925          $self->{set_nc}->($self);
5926        }
5927      
5928            redo A;
5929          } elsif ($self->{nc} == 0x0041 or # A
5930                   $self->{nc} == 0x0061) { # a
5931            $self->{state} = MD_ATTLIST_STATE;
5932            $self->{kwd} = chr $self->{nc};
5933            
5934        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5935          $self->{line_prev} = $self->{line};
5936          $self->{column_prev} = $self->{column};
5937          $self->{column}++;
5938          $self->{nc}
5939              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5940        } else {
5941          $self->{set_nc}->($self);
5942        }
5943      
5944            redo A;
5945          } elsif ($self->{nc} == 0x004E or # N
5946                   $self->{nc} == 0x006E) { # n
5947            $self->{state} = MD_NOTATION_STATE;
5948            $self->{kwd} = chr $self->{nc};
5949            
5950        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5951          $self->{line_prev} = $self->{line};
5952          $self->{column_prev} = $self->{column};
5953          $self->{column}++;
5954          $self->{nc}
5955              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5956        } else {
5957          $self->{set_nc}->($self);
5958        }
5959      
5960            redo A;
5961          } else {
5962            #
5963          }
5964          
5965          ## XML5: No parse error.
5966          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5967                          line => $self->{line_prev},
5968                          column => $self->{column_prev} - 1);
5969          ## Reconsume.
5970          $self->{state} = BOGUS_COMMENT_STATE;
5971          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5972          redo A;
5973        } elsif ($self->{state} == MD_E_STATE) {
5974          if ($self->{nc} == 0x004E or # N
5975              $self->{nc} == 0x006E) { # n
5976            $self->{state} = MD_ENTITY_STATE;
5977            $self->{kwd} .= chr $self->{nc};
5978            
5979        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5980          $self->{line_prev} = $self->{line};
5981          $self->{column_prev} = $self->{column};
5982          $self->{column}++;
5983          $self->{nc}
5984              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5985        } else {
5986          $self->{set_nc}->($self);
5987        }
5988      
5989            redo A;
5990          } elsif ($self->{nc} == 0x004C or # L
5991                   $self->{nc} == 0x006C) { # l
5992            ## XML5: <!ELEMENT> not supported.
5993            $self->{state} = MD_ELEMENT_STATE;
5994            $self->{kwd} .= chr $self->{nc};
5995            
5996        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5997          $self->{line_prev} = $self->{line};
5998          $self->{column_prev} = $self->{column};
5999          $self->{column}++;
6000          $self->{nc}
6001              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6002        } else {
6003          $self->{set_nc}->($self);
6004        }
6005      
6006            redo A;
6007          } else {
6008            ## XML5: No parse error.
6009            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6010                            line => $self->{line_prev},
6011                            column => $self->{column_prev} - 2
6012                                + 1 * ($self->{nc} == -1));
6013            ## Reconsume.
6014            $self->{state} = BOGUS_COMMENT_STATE;
6015            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6016            redo A;
6017          }
6018        } elsif ($self->{state} == MD_ENTITY_STATE) {
6019          if ($self->{nc} == [
6020                undef,
6021                undef,
6022                0x0054, # T
6023                0x0049, # I
6024                0x0054, # T
6025              ]->[length $self->{kwd}] or
6026              $self->{nc} == [
6027                undef,
6028                undef,
6029                0x0074, # t
6030                0x0069, # i
6031                0x0074, # t
6032              ]->[length $self->{kwd}]) {
6033            ## Stay in the state.
6034            $self->{kwd} .= chr $self->{nc};
6035            
6036        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6037          $self->{line_prev} = $self->{line};
6038          $self->{column_prev} = $self->{column};
6039          $self->{column}++;
6040          $self->{nc}
6041              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6042        } else {
6043          $self->{set_nc}->($self);
6044        }
6045      
6046            redo A;
6047          } elsif ((length $self->{kwd}) == 5 and
6048                   ($self->{nc} == 0x0059 or # Y
6049                    $self->{nc} == 0x0079)) { # y
6050            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
6051              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6052                              text => 'ENTITY',
6053                              line => $self->{line_prev},
6054                              column => $self->{column_prev} - 4);
6055            }
6056            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
6057                           line => $self->{line_prev},
6058                           column => $self->{column_prev} - 6};
6059            $self->{state} = DOCTYPE_MD_STATE;
6060            
6061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6062          $self->{line_prev} = $self->{line};
6063          $self->{column_prev} = $self->{column};
6064          $self->{column}++;
6065          $self->{nc}
6066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6067        } else {
6068          $self->{set_nc}->($self);
6069        }
6070      
6071            redo A;
6072          } else {
6073            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6074                            line => $self->{line_prev},
6075                            column => $self->{column_prev} - 1
6076                                - (length $self->{kwd})
6077                                + 1 * ($self->{nc} == -1));
6078            $self->{state} = BOGUS_COMMENT_STATE;
6079            ## Reconsume.
6080            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6081            redo A;
6082          }
6083        } elsif ($self->{state} == MD_ELEMENT_STATE) {
6084          if ($self->{nc} == [
6085               undef,
6086               undef,
6087               0x0045, # E
6088               0x004D, # M
6089               0x0045, # E
6090               0x004E, # N
6091              ]->[length $self->{kwd}] or
6092              $self->{nc} == [
6093               undef,
6094               undef,
6095               0x0065, # e
6096               0x006D, # m
6097               0x0065, # e
6098               0x006E, # n
6099              ]->[length $self->{kwd}]) {
6100            ## Stay in the state.
6101            $self->{kwd} .= chr $self->{nc};
6102            
6103        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6104          $self->{line_prev} = $self->{line};
6105          $self->{column_prev} = $self->{column};
6106          $self->{column}++;
6107          $self->{nc}
6108              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6109        } else {
6110          $self->{set_nc}->($self);
6111        }
6112      
6113            redo A;
6114          } elsif ((length $self->{kwd}) == 6 and
6115                   ($self->{nc} == 0x0054 or # T
6116                    $self->{nc} == 0x0074)) { # t
6117            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
6118              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6119                              text => 'ELEMENT',
6120                              line => $self->{line_prev},
6121                              column => $self->{column_prev} - 5);
6122            }
6123            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
6124                           line => $self->{line_prev},
6125                           column => $self->{column_prev} - 7};
6126            $self->{state} = DOCTYPE_MD_STATE;
6127            
6128        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6129          $self->{line_prev} = $self->{line};
6130          $self->{column_prev} = $self->{column};
6131          $self->{column}++;
6132          $self->{nc}
6133              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6134        } else {
6135          $self->{set_nc}->($self);
6136        }
6137      
6138            redo A;
6139          } else {
6140            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6141                            line => $self->{line_prev},
6142                            column => $self->{column_prev} - 1
6143                                - (length $self->{kwd})
6144                                + 1 * ($self->{nc} == -1));
6145            $self->{state} = BOGUS_COMMENT_STATE;
6146            ## Reconsume.
6147            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6148            redo A;
6149          }
6150        } elsif ($self->{state} == MD_ATTLIST_STATE) {
6151          if ($self->{nc} == [
6152               undef,
6153               0x0054, # T
6154               0x0054, # T
6155               0x004C, # L
6156               0x0049, # I
6157               0x0053, # S
6158              ]->[length $self->{kwd}] or
6159              $self->{nc} == [
6160               undef,
6161               0x0074, # t
6162               0x0074, # t
6163               0x006C, # l
6164               0x0069, # i
6165               0x0073, # s
6166              ]->[length $self->{kwd}]) {
6167            ## Stay in the state.
6168            $self->{kwd} .= chr $self->{nc};
6169            
6170        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6171          $self->{line_prev} = $self->{line};
6172          $self->{column_prev} = $self->{column};
6173          $self->{column}++;
6174          $self->{nc}
6175              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6176        } else {
6177          $self->{set_nc}->($self);
6178        }
6179      
6180            redo A;
6181          } elsif ((length $self->{kwd}) == 6 and
6182                   ($self->{nc} == 0x0054 or # T
6183                    $self->{nc} == 0x0074)) { # t
6184            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
6185              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6186                              text => 'ATTLIST',
6187                              line => $self->{line_prev},
6188                              column => $self->{column_prev} - 5);
6189            }
6190            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
6191                           attrdefs => [],
6192                           line => $self->{line_prev},
6193                           column => $self->{column_prev} - 7};
6194            $self->{state} = DOCTYPE_MD_STATE;
6195            
6196        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6197          $self->{line_prev} = $self->{line};
6198          $self->{column_prev} = $self->{column};
6199          $self->{column}++;
6200          $self->{nc}
6201              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6202        } else {
6203          $self->{set_nc}->($self);
6204        }
6205      
6206            redo A;
6207          } else {
6208            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6209                            line => $self->{line_prev},
6210                            column => $self->{column_prev} - 1
6211                                 - (length $self->{kwd})
6212                                 + 1 * ($self->{nc} == -1));
6213            $self->{state} = BOGUS_COMMENT_STATE;
6214            ## Reconsume.
6215            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6216            redo A;
6217          }
6218        } elsif ($self->{state} == MD_NOTATION_STATE) {
6219          if ($self->{nc} == [
6220               undef,
6221               0x004F, # O
6222               0x0054, # T
6223               0x0041, # A
6224               0x0054, # T
6225               0x0049, # I
6226               0x004F, # O
6227              ]->[length $self->{kwd}] or
6228              $self->{nc} == [
6229               undef,
6230               0x006F, # o
6231               0x0074, # t
6232               0x0061, # a
6233               0x0074, # t
6234               0x0069, # i
6235               0x006F, # o
6236              ]->[length $self->{kwd}]) {
6237            ## Stay in the state.
6238            $self->{kwd} .= chr $self->{nc};
6239            
6240        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6241          $self->{line_prev} = $self->{line};
6242          $self->{column_prev} = $self->{column};
6243          $self->{column}++;
6244          $self->{nc}
6245              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6246        } else {
6247          $self->{set_nc}->($self);
6248        }
6249      
6250            redo A;
6251          } elsif ((length $self->{kwd}) == 7 and
6252                   ($self->{nc} == 0x004E or # N
6253                    $self->{nc} == 0x006E)) { # n
6254            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6255              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6256                              text => 'NOTATION',
6257                              line => $self->{line_prev},
6258                              column => $self->{column_prev} - 6);
6259            }
6260            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6261                           line => $self->{line_prev},
6262                           column => $self->{column_prev} - 8};
6263            $self->{state} = DOCTYPE_MD_STATE;
6264            
6265        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6266          $self->{line_prev} = $self->{line};
6267          $self->{column_prev} = $self->{column};
6268          $self->{column}++;
6269          $self->{nc}
6270              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6271        } else {
6272          $self->{set_nc}->($self);
6273        }
6274      
6275            redo A;
6276          } else {
6277            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6278                            line => $self->{line_prev},
6279                            column => $self->{column_prev} - 1
6280                                - (length $self->{kwd})
6281                                + 1 * ($self->{nc} == -1));
6282            $self->{state} = BOGUS_COMMENT_STATE;
6283            ## Reconsume.
6284            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6285            redo A;
6286          }
6287        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6288          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6289          ## "DOCTYPE NOTATION state".
6290    
6291          if ($is_space->{$self->{nc}}) {
6292            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6293            $self->{state} = BEFORE_MD_NAME_STATE;
6294            
6295        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6296          $self->{line_prev} = $self->{line};
6297          $self->{column_prev} = $self->{column};
6298          $self->{column}++;
6299          $self->{nc}
6300              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6301        } else {
6302          $self->{set_nc}->($self);
6303        }
6304      
6305            redo A;
6306          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6307                   $self->{nc} == 0x0025) { # %
6308            ## XML5: Switch to the "DOCTYPE bogus comment state".
6309            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6310            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6311            
6312        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6313          $self->{line_prev} = $self->{line};
6314          $self->{column_prev} = $self->{column};
6315          $self->{column}++;
6316          $self->{nc}
6317              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6318        } else {
6319          $self->{set_nc}->($self);
6320        }
6321      
6322            redo A;
6323          } elsif ($self->{nc} == -1) {
6324            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6325            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6326            ## Reconsume.
6327            redo A;
6328          } elsif ($self->{nc} == 0x003E) { # >
6329            ## XML5: Switch to the "DOCTYPE bogus comment state".
6330            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6331            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6332            
6333        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6334          $self->{line_prev} = $self->{line};
6335          $self->{column_prev} = $self->{column};
6336          $self->{column}++;
6337          $self->{nc}
6338              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6339        } else {
6340          $self->{set_nc}->($self);
6341        }
6342      
6343            redo A;
6344          } else {
6345            ## XML5: Switch to the "DOCTYPE bogus comment state".
6346            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6347            $self->{state} = BEFORE_MD_NAME_STATE;
6348            redo A;
6349          }
6350        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6351          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6352          ## before state", "DOCTYPE ATTLIST name before state".
6353    
6354          if ($is_space->{$self->{nc}}) {
6355            ## Stay in the state.
6356            
6357        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6358          $self->{line_prev} = $self->{line};
6359          $self->{column_prev} = $self->{column};
6360          $self->{column}++;
6361          $self->{nc}
6362              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6363        } else {
6364          $self->{set_nc}->($self);
6365        }
6366      
6367            redo A;
6368          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6369                   $self->{nc} == 0x0025) { # %
6370            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6371            
6372        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6373          $self->{line_prev} = $self->{line};
6374          $self->{column_prev} = $self->{column};
6375          $self->{column}++;
6376          $self->{nc}
6377              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6378        } else {
6379          $self->{set_nc}->($self);
6380        }
6381      
6382            redo A;
6383          } elsif ($self->{nc} == 0x003E) { # >
6384            ## XML5: Same as "Anything else".
6385            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6386            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6387            
6388        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6389          $self->{line_prev} = $self->{line};
6390          $self->{column_prev} = $self->{column};
6391          $self->{column}++;
6392          $self->{nc}
6393              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6394        } else {
6395          $self->{set_nc}->($self);
6396        }
6397      
6398            redo A;
6399          } elsif ($self->{nc} == -1) {
6400            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6401            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6402            ## Reconsume.
6403            redo A;
6404          } else {
6405            ## XML5: [ATTLIST] Not defined yet.
6406            $self->{ct}->{name} .= chr $self->{nc};
6407            $self->{state} = MD_NAME_STATE;
6408            
6409        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6410          $self->{line_prev} = $self->{line};
6411          $self->{column_prev} = $self->{column};
6412          $self->{column}++;
6413          $self->{nc}
6414              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6415        } else {
6416          $self->{set_nc}->($self);
6417        }
6418      
6419            redo A;
6420          }
6421        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6422          if ($is_space->{$self->{nc}}) {
6423            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6424            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6425            $self->{state} = BEFORE_MD_NAME_STATE;
6426            
6427        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6428          $self->{line_prev} = $self->{line};
6429          $self->{column_prev} = $self->{column};
6430          $self->{column}++;
6431          $self->{nc}
6432              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6433        } else {
6434          $self->{set_nc}->($self);
6435        }
6436      
6437            redo A;
6438          } elsif ($self->{nc} == 0x003E) { # >
6439            ## XML5: Same as "Anything else".
6440            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6441            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6442            
6443        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6444          $self->{line_prev} = $self->{line};
6445          $self->{column_prev} = $self->{column};
6446          $self->{column}++;
6447          $self->{nc}
6448              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6449        } else {
6450          $self->{set_nc}->($self);
6451        }
6452      
6453            redo A;
6454          } elsif ($self->{nc} == -1) {
6455            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6456            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6457            ## Reconsume.
6458            redo A;
6459          } else {
6460            ## XML5: No parse error.
6461            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6462            $self->{state} = BOGUS_COMMENT_STATE;
6463            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6464            ## Reconsume.
6465            redo A;
6466          }
6467        } elsif ($self->{state} == MD_NAME_STATE) {
6468          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6469          
6470          if ($is_space->{$self->{nc}}) {
6471            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6472              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6473            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6474              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6475            } else { # ENTITY/NOTATION
6476              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6477            }
6478            
6479        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6480          $self->{line_prev} = $self->{line};
6481          $self->{column_prev} = $self->{column};
6482          $self->{column}++;
6483          $self->{nc}
6484              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6485        } else {
6486          $self->{set_nc}->($self);
6487        }
6488      
6489            redo A;
6490          } elsif ($self->{nc} == 0x003E) { # >
6491            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6492              #
6493            } else {
6494              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6495            }
6496            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6497            
6498        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6499          $self->{line_prev} = $self->{line};
6500          $self->{column_prev} = $self->{column};
6501          $self->{column}++;
6502          $self->{nc}
6503              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6504        } else {
6505          $self->{set_nc}->($self);
6506        }
6507      
6508            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6509            redo A;
6510          } elsif ($self->{nc} == -1) {
6511            ## XML5: [ATTLIST] No parse error.
6512            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6513            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6514            ## Reconsume.
6515            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6516            redo A;
6517          } else {
6518            ## XML5: [ATTLIST] Not defined yet.
6519            $self->{ct}->{name} .= chr $self->{nc};
6520            ## Stay in the state.
6521            
6522        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6523          $self->{line_prev} = $self->{line};
6524          $self->{column_prev} = $self->{column};
6525          $self->{column}++;
6526          $self->{nc}
6527              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6528        } else {
6529          $self->{set_nc}->($self);
6530        }
6531      
6532            redo A;
6533          }
6534        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6535          if ($is_space->{$self->{nc}}) {
6536            ## Stay in the state.
6537            
6538        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6539          $self->{line_prev} = $self->{line};
6540          $self->{column_prev} = $self->{column};
6541          $self->{column}++;
6542          $self->{nc}
6543              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6544        } else {
6545          $self->{set_nc}->($self);
6546        }
6547      
6548            redo A;
6549          } elsif ($self->{nc} == 0x003E) { # >
6550            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6551            
6552        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6553          $self->{line_prev} = $self->{line};
6554          $self->{column_prev} = $self->{column};
6555          $self->{column}++;
6556          $self->{nc}
6557              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6558        } else {
6559          $self->{set_nc}->($self);
6560        }
6561      
6562            return  ($self->{ct}); # ATTLIST
6563            redo A;
6564          } elsif ($self->{nc} == -1) {
6565            ## XML5: No parse error.
6566            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6567            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6568            return  ($self->{ct});
6569            redo A;
6570          } else {
6571            ## XML5: Not defined yet.
6572            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6573                           tokens => [],
6574                           line => $self->{line}, column => $self->{column}};
6575            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6576            
6577        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6578          $self->{line_prev} = $self->{line};
6579          $self->{column_prev} = $self->{column};
6580          $self->{column}++;
6581          $self->{nc}
6582              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6583        } else {
6584          $self->{set_nc}->($self);
6585        }
6586      
6587            redo A;
6588          }
6589        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6590          if ($is_space->{$self->{nc}}) {
6591            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6592            
6593        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6594          $self->{line_prev} = $self->{line};
6595          $self->{column_prev} = $self->{column};
6596          $self->{column}++;
6597          $self->{nc}
6598              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6599        } else {
6600          $self->{set_nc}->($self);
6601        }
6602      
6603            redo A;
6604          } elsif ($self->{nc} == 0x003E) { # >
6605            ## XML5: Same as "anything else".
6606            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6607            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6608            
6609        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6610          $self->{line_prev} = $self->{line};
6611          $self->{column_prev} = $self->{column};
6612          $self->{column}++;
6613          $self->{nc}
6614              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6615        } else {
6616          $self->{set_nc}->($self);
6617        }
6618      
6619            return  ($self->{ct}); # ATTLIST
6620            redo A;
6621          } elsif ($self->{nc} == 0x0028) { # (
6622            ## XML5: Same as "anything else".
6623            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6624            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6625            
6626        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6627          $self->{line_prev} = $self->{line};
6628          $self->{column_prev} = $self->{column};
6629          $self->{column}++;
6630          $self->{nc}
6631              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6632        } else {
6633          $self->{set_nc}->($self);
6634        }
6635      
6636            redo A;
6637          } elsif ($self->{nc} == -1) {
6638            ## XML5: No parse error.
6639            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6640            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6641            
6642        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6643          $self->{line_prev} = $self->{line};
6644          $self->{column_prev} = $self->{column};
6645          $self->{column}++;
6646          $self->{nc}
6647              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6648        } else {
6649          $self->{set_nc}->($self);
6650        }
6651      
6652            return  ($self->{ct}); # ATTLIST
6653            redo A;
6654          } else {
6655            ## XML5: Not defined yet.
6656            $self->{ca}->{name} .= chr $self->{nc};
6657            ## Stay in the state.
6658            
6659        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6660          $self->{line_prev} = $self->{line};
6661          $self->{column_prev} = $self->{column};
6662          $self->{column}++;
6663          $self->{nc}
6664              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6665        } else {
6666          $self->{set_nc}->($self);
6667        }
6668      
6669            redo A;
6670          }
6671        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6672          if ($is_space->{$self->{nc}}) {
6673            ## Stay in the state.
6674            
6675        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6676          $self->{line_prev} = $self->{line};
6677          $self->{column_prev} = $self->{column};
6678          $self->{column}++;
6679          $self->{nc}
6680              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6681        } else {
6682          $self->{set_nc}->($self);
6683        }
6684      
6685            redo A;
6686          } elsif ($self->{nc} == 0x003E) { # >
6687            ## XML5: Same as "anything else".
6688            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6689            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6690            
6691        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6692          $self->{line_prev} = $self->{line};
6693          $self->{column_prev} = $self->{column};
6694          $self->{column}++;
6695          $self->{nc}
6696              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6697        } else {
6698          $self->{set_nc}->($self);
6699        }
6700      
6701            return  ($self->{ct}); # ATTLIST
6702            redo A;
6703          } elsif ($self->{nc} == 0x0028) { # (
6704            ## XML5: Same as "anything else".
6705            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6706            
6707        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6708          $self->{line_prev} = $self->{line};
6709          $self->{column_prev} = $self->{column};
6710          $self->{column}++;
6711          $self->{nc}
6712              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6713        } else {
6714          $self->{set_nc}->($self);
6715        }
6716      
6717            redo A;
6718          } elsif ($self->{nc} == -1) {
6719            ## XML5: No parse error.
6720            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6721            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6722            
6723        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6724          $self->{line_prev} = $self->{line};
6725          $self->{column_prev} = $self->{column};
6726          $self->{column}++;
6727          $self->{nc}
6728              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6729        } else {
6730          $self->{set_nc}->($self);
6731        }
6732      
6733            return  ($self->{ct});
6734            redo A;
6735          } else {
6736            ## XML5: Not defined yet.
6737            $self->{ca}->{type} = chr $self->{nc};
6738            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6739            
6740        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6741          $self->{line_prev} = $self->{line};
6742          $self->{column_prev} = $self->{column};
6743          $self->{column}++;
6744          $self->{nc}
6745              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6746        } else {
6747          $self->{set_nc}->($self);
6748        }
6749      
6750            redo A;
6751          }
6752        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6753          if ($is_space->{$self->{nc}}) {
6754            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6755            
6756        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6757          $self->{line_prev} = $self->{line};
6758          $self->{column_prev} = $self->{column};
6759          $self->{column}++;
6760          $self->{nc}
6761              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6762        } else {
6763          $self->{set_nc}->($self);
6764        }
6765      
6766            redo A;
6767          } elsif ($self->{nc} == 0x0023) { # #
6768            ## XML5: Same as "anything else".
6769            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6770            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6771            
6772        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6773          $self->{line_prev} = $self->{line};
6774          $self->{column_prev} = $self->{column};
6775          $self->{column}++;
6776          $self->{nc}
6777              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6778        } else {
6779          $self->{set_nc}->($self);
6780        }
6781      
6782            redo A;
6783          } elsif ($self->{nc} == 0x0022) { # "
6784            ## XML5: Same as "anything else".
6785            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6786            $self->{ca}->{value} = '';
6787            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6788            
6789        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6790          $self->{line_prev} = $self->{line};
6791          $self->{column_prev} = $self->{column};
6792          $self->{column}++;
6793          $self->{nc}
6794              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6795        } else {
6796          $self->{set_nc}->($self);
6797        }
6798      
6799            redo A;
6800          } elsif ($self->{nc} == 0x0027) { # '
6801            ## XML5: Same as "anything else".
6802            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6803            $self->{ca}->{value} = '';
6804            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6805            
6806        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6807          $self->{line_prev} = $self->{line};
6808          $self->{column_prev} = $self->{column};
6809          $self->{column}++;
6810          $self->{nc}
6811              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6812        } else {
6813          $self->{set_nc}->($self);
6814        }
6815      
6816            redo A;
6817          } elsif ($self->{nc} == 0x003E) { # >
6818            ## XML5: Same as "anything else".
6819            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6820            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6821            
6822        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6823          $self->{line_prev} = $self->{line};
6824          $self->{column_prev} = $self->{column};
6825          $self->{column}++;
6826          $self->{nc}
6827              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6828        } else {
6829          $self->{set_nc}->($self);
6830        }
6831      
6832            return  ($self->{ct}); # ATTLIST
6833            redo A;
6834          } elsif ($self->{nc} == 0x0028) { # (
6835            ## XML5: Same as "anything else".
6836            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6837            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6838            
6839        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6840          $self->{line_prev} = $self->{line};
6841          $self->{column_prev} = $self->{column};
6842          $self->{column}++;
6843          $self->{nc}
6844              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6845        } else {
6846          $self->{set_nc}->($self);
6847        }
6848      
6849            redo A;
6850          } elsif ($self->{nc} == -1) {
6851            ## XML5: No parse error.
6852            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6853            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6854            
6855        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6856          $self->{line_prev} = $self->{line};
6857          $self->{column_prev} = $self->{column};
6858          $self->{column}++;
6859          $self->{nc}
6860              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6861        } else {
6862          $self->{set_nc}->($self);
6863        }
6864      
6865            return  ($self->{ct});
6866            redo A;
6867          } else {
6868            ## XML5: Not defined yet.
6869            $self->{ca}->{type} .= chr $self->{nc};
6870            ## Stay in the state.
6871            
6872        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6873          $self->{line_prev} = $self->{line};
6874          $self->{column_prev} = $self->{column};
6875          $self->{column}++;
6876          $self->{nc}
6877              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6878        } else {
6879          $self->{set_nc}->($self);
6880        }
6881      
6882            redo A;
6883          }
6884        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6885          if ($is_space->{$self->{nc}}) {
6886            ## Stay in the state.
6887            
6888        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6889          $self->{line_prev} = $self->{line};
6890          $self->{column_prev} = $self->{column};
6891          $self->{column}++;
6892          $self->{nc}
6893              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6894        } else {
6895          $self->{set_nc}->($self);
6896        }
6897      
6898            redo A;
6899          } elsif ($self->{nc} == 0x0028) { # (
6900            ## XML5: Same as "anything else".
6901            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6902            
6903        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6904          $self->{line_prev} = $self->{line};
6905          $self->{column_prev} = $self->{column};
6906          $self->{column}++;
6907          $self->{nc}
6908              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6909        } else {
6910          $self->{set_nc}->($self);
6911        }
6912      
6913            redo A;
6914          } elsif ($self->{nc} == 0x0023) { # #
6915            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6916            
6917        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6918          $self->{line_prev} = $self->{line};
6919          $self->{column_prev} = $self->{column};
6920          $self->{column}++;
6921          $self->{nc}
6922              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6923        } else {
6924          $self->{set_nc}->($self);
6925        }
6926      
6927            redo A;
6928          } elsif ($self->{nc} == 0x0022) { # "
6929            ## XML5: Same as "anything else".
6930            $self->{ca}->{value} = '';
6931            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6932            
6933        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6934          $self->{line_prev} = $self->{line};
6935          $self->{column_prev} = $self->{column};
6936          $self->{column}++;
6937          $self->{nc}
6938              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6939        } else {
6940          $self->{set_nc}->($self);
6941        }
6942      
6943            redo A;
6944          } elsif ($self->{nc} == 0x0027) { # '
6945            ## XML5: Same as "anything else".
6946            $self->{ca}->{value} = '';
6947            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6948            
6949        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6950          $self->{line_prev} = $self->{line};
6951          $self->{column_prev} = $self->{column};
6952          $self->{column}++;
6953          $self->{nc}
6954              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6955        } else {
6956          $self->{set_nc}->($self);
6957        }
6958      
6959            redo A;
6960          } elsif ($self->{nc} == 0x003E) { # >
6961            ## XML5: Same as "anything else".
6962            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6963            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6964            
6965        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6966          $self->{line_prev} = $self->{line};
6967          $self->{column_prev} = $self->{column};
6968          $self->{column}++;
6969          $self->{nc}
6970              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6971        } else {
6972          $self->{set_nc}->($self);
6973        }
6974      
6975            return  ($self->{ct}); # ATTLIST
6976            redo A;
6977          } elsif ($self->{nc} == -1) {
6978            ## XML5: No parse error.
6979            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6980            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6981            
6982        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6983          $self->{line_prev} = $self->{line};
6984          $self->{column_prev} = $self->{column};
6985          $self->{column}++;
6986          $self->{nc}
6987              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6988        } else {
6989          $self->{set_nc}->($self);
6990        }
6991      
6992            return  ($self->{ct});
6993            redo A;
6994          } else {
6995            ## XML5: Switch to the "DOCTYPE bogus comment state".
6996            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6997            $self->{ca}->{value} = '';
6998            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6999            ## Reconsume.
7000            redo A;
7001          }
7002        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
7003          if ($is_space->{$self->{nc}}) {
7004            ## Stay in the state.
7005            
7006        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7007          $self->{line_prev} = $self->{line};
7008          $self->{column_prev} = $self->{column};
7009          $self->{column}++;
7010          $self->{nc}
7011              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7012        } else {
7013          $self->{set_nc}->($self);
7014        }
7015      
7016            redo A;
7017          } elsif ($self->{nc} == 0x007C) { # |
7018            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
7019            ## Stay in the state.
7020            
7021        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7022          $self->{line_prev} = $self->{line};
7023          $self->{column_prev} = $self->{column};
7024          $self->{column}++;
7025          $self->{nc}
7026              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7027        } else {
7028          $self->{set_nc}->($self);
7029        }
7030      
7031            redo A;
7032          } elsif ($self->{nc} == 0x0029) { # )
7033            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
7034            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7035            
7036        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7037          $self->{line_prev} = $self->{line};
7038          $self->{column_prev} = $self->{column};
7039          $self->{column}++;
7040          $self->{nc}
7041              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7042        } else {
7043          $self->{set_nc}->($self);
7044        }
7045      
7046            redo A;
7047          } elsif ($self->{nc} == 0x003E) { # >
7048            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7049            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7050            
7051        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7052          $self->{line_prev} = $self->{line};
7053          $self->{column_prev} = $self->{column};
7054          $self->{column}++;
7055          $self->{nc}
7056              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7057        } else {
7058          $self->{set_nc}->($self);
7059        }
7060      
7061            return  ($self->{ct}); # ATTLIST
7062            redo A;
7063          } elsif ($self->{nc} == -1) {
7064            ## XML5: No parse error.
7065            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7066            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7067            
7068        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7069          $self->{line_prev} = $self->{line};
7070          $self->{column_prev} = $self->{column};
7071          $self->{column}++;
7072          $self->{nc}
7073              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7074        } else {
7075          $self->{set_nc}->($self);
7076        }
7077      
7078            return  ($self->{ct});
7079            redo A;
7080          } else {
7081            push @{$self->{ca}->{tokens}}, chr $self->{nc};
7082            $self->{state} = ALLOWED_TOKEN_STATE;
7083            
7084        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7085          $self->{line_prev} = $self->{line};
7086          $self->{column_prev} = $self->{column};
7087          $self->{column}++;
7088          $self->{nc}
7089              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7090        } else {
7091          $self->{set_nc}->($self);
7092        }
7093      
7094            redo A;
7095          }
7096        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
7097          if ($is_space->{$self->{nc}}) {
7098            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
7099            
7100        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7101          $self->{line_prev} = $self->{line};
7102          $self->{column_prev} = $self->{column};
7103          $self->{column}++;
7104          $self->{nc}
7105              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7106        } else {
7107          $self->{set_nc}->($self);
7108        }
7109      
7110            redo A;
7111          } elsif ($self->{nc} == 0x007C) { # |
7112            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
7113            
7114        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7115          $self->{line_prev} = $self->{line};
7116          $self->{column_prev} = $self->{column};
7117          $self->{column}++;
7118          $self->{nc}
7119              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7120        } else {
7121          $self->{set_nc}->($self);
7122        }
7123      
7124            redo A;
7125          } elsif ($self->{nc} == 0x0029) { # )
7126            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7127            
7128        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7129          $self->{line_prev} = $self->{line};
7130          $self->{column_prev} = $self->{column};
7131          $self->{column}++;
7132          $self->{nc}
7133              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7134        } else {
7135          $self->{set_nc}->($self);
7136        }
7137      
7138            redo A;
7139          } elsif ($self->{nc} == 0x003E) { # >
7140            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7141            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7142            
7143        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7144          $self->{line_prev} = $self->{line};
7145          $self->{column_prev} = $self->{column};
7146          $self->{column}++;
7147          $self->{nc}
7148              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7149        } else {
7150          $self->{set_nc}->($self);
7151        }
7152      
7153            return  ($self->{ct}); # ATTLIST
7154            redo A;
7155          } elsif ($self->{nc} == -1) {
7156            ## XML5: No parse error.
7157            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7158            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7159            
7160        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7161          $self->{line_prev} = $self->{line};
7162          $self->{column_prev} = $self->{column};
7163          $self->{column}++;
7164          $self->{nc}
7165              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7166        } else {
7167          $self->{set_nc}->($self);
7168        }
7169      
7170            return  ($self->{ct});
7171            redo A;
7172          } else {
7173            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
7174            ## Stay in the state.
7175            
7176        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7177          $self->{line_prev} = $self->{line};
7178          $self->{column_prev} = $self->{column};
7179          $self->{column}++;
7180          $self->{nc}
7181              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7182        } else {
7183          $self->{set_nc}->($self);
7184        }
7185      
7186            redo A;
7187          }
7188        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
7189          if ($is_space->{$self->{nc}}) {
7190            ## Stay in the state.
7191            
7192        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7193          $self->{line_prev} = $self->{line};
7194          $self->{column_prev} = $self->{column};
7195          $self->{column}++;
7196          $self->{nc}
7197              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7198        } else {
7199          $self->{set_nc}->($self);
7200        }
7201      
7202            redo A;
7203          } elsif ($self->{nc} == 0x007C) { # |
7204            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
7205            
7206        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7207          $self->{line_prev} = $self->{line};
7208          $self->{column_prev} = $self->{column};
7209          $self->{column}++;
7210          $self->{nc}
7211              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7212        } else {
7213          $self->{set_nc}->($self);
7214        }
7215      
7216            redo A;
7217          } elsif ($self->{nc} == 0x0029) { # )
7218            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7219            
7220        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7221          $self->{line_prev} = $self->{line};
7222          $self->{column_prev} = $self->{column};
7223          $self->{column}++;
7224          $self->{nc}
7225              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7226        } else {
7227          $self->{set_nc}->($self);
7228        }
7229      
7230            redo A;
7231          } elsif ($self->{nc} == 0x003E) { # >
7232            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7233            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7234            
7235        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7236          $self->{line_prev} = $self->{line};
7237          $self->{column_prev} = $self->{column};
7238          $self->{column}++;
7239          $self->{nc}
7240              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7241        } else {
7242          $self->{set_nc}->($self);
7243        }
7244      
7245            return  ($self->{ct}); # ATTLIST
7246            redo A;
7247          } elsif ($self->{nc} == -1) {
7248            ## XML5: No parse error.
7249            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7250            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7251            
7252        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7253          $self->{line_prev} = $self->{line};
7254          $self->{column_prev} = $self->{column};
7255          $self->{column}++;
7256          $self->{nc}
7257              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7258        } else {
7259          $self->{set_nc}->($self);
7260        }
7261      
7262            return  ($self->{ct});
7263            redo A;
7264          } else {
7265            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7266                            line => $self->{line_prev},
7267                            column => $self->{column_prev});
7268            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7269            $self->{state} = ALLOWED_TOKEN_STATE;
7270            
7271        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7272          $self->{line_prev} = $self->{line};
7273          $self->{column_prev} = $self->{column};
7274          $self->{column}++;
7275          $self->{nc}
7276              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7277        } else {
7278          $self->{set_nc}->($self);
7279        }
7280      
7281            redo A;
7282          }
7283        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7284          if ($is_space->{$self->{nc}}) {
7285            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7286            
7287        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7288          $self->{line_prev} = $self->{line};
7289          $self->{column_prev} = $self->{column};
7290          $self->{column}++;
7291          $self->{nc}
7292              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7293        } else {
7294          $self->{set_nc}->($self);
7295        }
7296      
7297            redo A;
7298          } elsif ($self->{nc} == 0x0023) { # #
7299            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7300            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7301            
7302        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7303          $self->{line_prev} = $self->{line};
7304          $self->{column_prev} = $self->{column};
7305          $self->{column}++;
7306          $self->{nc}
7307              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7308        } else {
7309          $self->{set_nc}->($self);
7310        }
7311      
7312            redo A;
7313          } elsif ($self->{nc} == 0x0022) { # "
7314            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7315            $self->{ca}->{value} = '';
7316            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7317            
7318        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7319          $self->{line_prev} = $self->{line};
7320          $self->{column_prev} = $self->{column};
7321          $self->{column}++;
7322          $self->{nc}
7323              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7324        } else {
7325          $self->{set_nc}->($self);
7326        }
7327      
7328            redo A;
7329          } elsif ($self->{nc} == 0x0027) { # '
7330            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7331            $self->{ca}->{value} = '';
7332            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7333            
7334        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7335          $self->{line_prev} = $self->{line};
7336          $self->{column_prev} = $self->{column};
7337          $self->{column}++;
7338          $self->{nc}
7339              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7340        } else {
7341          $self->{set_nc}->($self);
7342        }
7343      
7344            redo A;
7345          } elsif ($self->{nc} == 0x003E) { # >
7346            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7347            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7348            
7349        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7350          $self->{line_prev} = $self->{line};
7351          $self->{column_prev} = $self->{column};
7352          $self->{column}++;
7353          $self->{nc}
7354              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7355        } else {
7356          $self->{set_nc}->($self);
7357        }
7358      
7359            return  ($self->{ct}); # ATTLIST
7360            redo A;
7361          } elsif ($self->{nc} == -1) {
7362            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7363            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7364            
7365        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7366          $self->{line_prev} = $self->{line};
7367          $self->{column_prev} = $self->{column};
7368          $self->{column}++;
7369          $self->{nc}
7370              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7371        } else {
7372          $self->{set_nc}->($self);
7373        }
7374      
7375            return  ($self->{ct});
7376            redo A;
7377          } else {
7378            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7379            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7380            ## Reconsume.
7381            redo A;
7382          }
7383        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7384          if ($is_space->{$self->{nc}}) {
7385            ## Stay in the state.
7386            
7387        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7388          $self->{line_prev} = $self->{line};
7389          $self->{column_prev} = $self->{column};
7390          $self->{column}++;
7391          $self->{nc}
7392              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7393        } else {
7394          $self->{set_nc}->($self);
7395        }
7396      
7397            redo A;
7398          } elsif ($self->{nc} == 0x0023) { # #
7399            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7400            
7401        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7402          $self->{line_prev} = $self->{line};
7403          $self->{column_prev} = $self->{column};
7404          $self->{column}++;
7405          $self->{nc}
7406              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7407        } else {
7408          $self->{set_nc}->($self);
7409        }
7410      
7411            redo A;
7412          } elsif ($self->{nc} == 0x0022) { # "
7413            $self->{ca}->{value} = '';
7414            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7415            
7416        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7417          $self->{line_prev} = $self->{line};
7418          $self->{column_prev} = $self->{column};
7419          $self->{column}++;
7420          $self->{nc}
7421              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7422        } else {
7423          $self->{set_nc}->($self);
7424        }
7425      
7426            redo A;
7427          } elsif ($self->{nc} == 0x0027) { # '
7428            $self->{ca}->{value} = '';
7429            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7430            
7431        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7432          $self->{line_prev} = $self->{line};
7433          $self->{column_prev} = $self->{column};
7434          $self->{column}++;
7435          $self->{nc}
7436              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7437        } else {
7438          $self->{set_nc}->($self);
7439        }
7440      
7441            redo A;
7442          } elsif ($self->{nc} == 0x003E) { # >
7443            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7444            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7445            
7446        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7447          $self->{line_prev} = $self->{line};
7448          $self->{column_prev} = $self->{column};
7449          $self->{column}++;
7450          $self->{nc}
7451              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7452        } else {
7453          $self->{set_nc}->($self);
7454        }
7455      
7456            return  ($self->{ct}); # ATTLIST
7457            redo A;
7458          } elsif ($self->{nc} == -1) {
7459            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7460            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7461            
7462        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7463          $self->{line_prev} = $self->{line};
7464          $self->{column_prev} = $self->{column};
7465          $self->{column}++;
7466          $self->{nc}
7467              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7468        } else {
7469          $self->{set_nc}->($self);
7470        }
7471      
7472            return  ($self->{ct});
7473            redo A;
7474          } else {
7475            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7476            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7477            ## Reconsume.
7478            redo A;
7479          }
7480        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7481          if ($is_space->{$self->{nc}}) {
7482            ## XML5: No parse error.
7483            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7484            $self->{state} = BOGUS_MD_STATE;
7485            ## Reconsume.
7486            redo A;
7487          } elsif ($self->{nc} == 0x0022) { # "
7488            ## XML5: Same as "anything else".
7489            $self->{ca}->{value} = '';
7490            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7491            
7492        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7493          $self->{line_prev} = $self->{line};
7494          $self->{column_prev} = $self->{column};
7495          $self->{column}++;
7496          $self->{nc}
7497              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7498        } else {
7499          $self->{set_nc}->($self);
7500        }
7501      
7502            redo A;
7503          } elsif ($self->{nc} == 0x0027) { # '
7504            ## XML5: Same as "anything else".
7505            $self->{ca}->{value} = '';
7506            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7507            
7508        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7509          $self->{line_prev} = $self->{line};
7510          $self->{column_prev} = $self->{column};
7511          $self->{column}++;
7512          $self->{nc}
7513              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7514        } else {
7515          $self->{set_nc}->($self);
7516        }
7517      
7518            redo A;
7519          } elsif ($self->{nc} == 0x003E) { # >
7520            ## XML5: Same as "anything else".
7521            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7522            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7523            
7524        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7525          $self->{line_prev} = $self->{line};
7526          $self->{column_prev} = $self->{column};
7527          $self->{column}++;
7528          $self->{nc}
7529              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7530        } else {
7531          $self->{set_nc}->($self);
7532        }
7533      
7534            return  ($self->{ct}); # ATTLIST
7535            redo A;
7536          } elsif ($self->{nc} == -1) {
7537            ## XML5: No parse error.
7538            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7539            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7540            
7541        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7542          $self->{line_prev} = $self->{line};
7543          $self->{column_prev} = $self->{column};
7544          $self->{column}++;
7545          $self->{nc}
7546              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7547        } else {
7548          $self->{set_nc}->($self);
7549        }
7550      
7551            return  ($self->{ct});
7552            redo A;
7553          } else {
7554            $self->{ca}->{default} = chr $self->{nc};
7555            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7556            
7557        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7558          $self->{line_prev} = $self->{line};
7559          $self->{column_prev} = $self->{column};
7560          $self->{column}++;
7561          $self->{nc}
7562              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7563        } else {
7564          $self->{set_nc}->($self);
7565        }
7566      
7567            redo A;
7568          }
7569        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7570          if ($is_space->{$self->{nc}}) {
7571            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7572            
7573        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7574          $self->{line_prev} = $self->{line};
7575          $self->{column_prev} = $self->{column};
7576          $self->{column}++;
7577          $self->{nc}
7578              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7579        } else {
7580          $self->{set_nc}->($self);
7581        }
7582      
7583            redo A;
7584          } elsif ($self->{nc} == 0x0022) { # "
7585            ## XML5: Same as "anything else".
7586            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7587            $self->{ca}->{value} = '';
7588            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7589            
7590        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7591          $self->{line_prev} = $self->{line};
7592          $self->{column_prev} = $self->{column};
7593          $self->{column}++;
7594          $self->{nc}
7595              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7596        } else {
7597          $self->{set_nc}->($self);
7598        }
7599      
7600            redo A;
7601          } elsif ($self->{nc} == 0x0027) { # '
7602            ## XML5: Same as "anything else".
7603            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7604            $self->{ca}->{value} = '';
7605            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7606            
7607        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7608          $self->{line_prev} = $self->{line};
7609          $self->{column_prev} = $self->{column};
7610          $self->{column}++;
7611          $self->{nc}
7612              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7613        } else {
7614          $self->{set_nc}->($self);
7615        }
7616      
7617            redo A;
7618          } elsif ($self->{nc} == 0x003E) { # >
7619            ## XML5: Same as "anything else".
7620            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7621            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7622            
7623        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7624          $self->{line_prev} = $self->{line};
7625          $self->{column_prev} = $self->{column};
7626          $self->{column}++;
7627          $self->{nc}
7628              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7629        } else {
7630          $self->{set_nc}->($self);
7631        }
7632      
7633            return  ($self->{ct}); # ATTLIST
7634            redo A;
7635          } elsif ($self->{nc} == -1) {
7636            ## XML5: No parse error.
7637            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7638            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7639            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7640            
7641        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7642          $self->{line_prev} = $self->{line};
7643          $self->{column_prev} = $self->{column};
7644          $self->{column}++;
7645          $self->{nc}
7646              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7647        } else {
7648          $self->{set_nc}->($self);
7649        }
7650      
7651            return  ($self->{ct});
7652            redo A;
7653          } else {
7654            $self->{ca}->{default} .= chr $self->{nc};
7655            ## Stay in the state.
7656            
7657        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7658          $self->{line_prev} = $self->{line};
7659          $self->{column_prev} = $self->{column};
7660          $self->{column}++;
7661          $self->{nc}
7662              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7663        } else {
7664          $self->{set_nc}->($self);
7665        }
7666      
7667            redo A;
7668          }
7669        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7670          if ($is_space->{$self->{nc}}) {
7671            ## Stay in the state.
7672            
7673        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7674          $self->{line_prev} = $self->{line};
7675          $self->{column_prev} = $self->{column};
7676          $self->{column}++;
7677          $self->{nc}
7678              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7679        } else {
7680          $self->{set_nc}->($self);
7681        }
7682      
7683            redo A;
7684          } elsif ($self->{nc} == 0x0022) { # "
7685            $self->{ca}->{value} = '';
7686            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7687            
7688        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7689          $self->{line_prev} = $self->{line};
7690          $self->{column_prev} = $self->{column};
7691          $self->{column}++;
7692          $self->{nc}
7693              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7694        } else {
7695          $self->{set_nc}->($self);
7696        }
7697      
7698            redo A;
7699          } elsif ($self->{nc} == 0x0027) { # '
7700            $self->{ca}->{value} = '';
7701            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7702            
7703        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7704          $self->{line_prev} = $self->{line};
7705          $self->{column_prev} = $self->{column};
7706          $self->{column}++;
7707          $self->{nc}
7708              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7709        } else {
7710          $self->{set_nc}->($self);
7711        }
7712      
7713            redo A;
7714          } elsif ($self->{nc} == 0x003E) { # >
7715            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7716            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7717            
7718        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7719          $self->{line_prev} = $self->{line};
7720          $self->{column_prev} = $self->{column};
7721          $self->{column}++;
7722          $self->{nc}
7723              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7724        } else {
7725          $self->{set_nc}->($self);
7726        }
7727      
7728            return  ($self->{ct}); # ATTLIST
7729            redo A;
7730          } elsif ($self->{nc} == -1) {
7731            ## XML5: No parse error.
7732            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7733            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7734            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7735            
7736        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7737          $self->{line_prev} = $self->{line};
7738          $self->{column_prev} = $self->{column};
7739          $self->{column}++;
7740          $self->{nc}
7741              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7742        } else {
7743          $self->{set_nc}->($self);
7744        }
7745      
7746            return  ($self->{ct});
7747            redo A;
7748          } else {
7749            ## XML5: Not defined yet.
7750            if ($self->{ca}->{default} eq 'FIXED') {
7751              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7752            } else {
7753              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7754              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7755            }
7756            ## Reconsume.
7757            redo A;
7758          }
7759        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7760          if ($is_space->{$self->{nc}} or
7761              $self->{nc} == -1 or
7762              $self->{nc} == 0x003E) { # >
7763            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7764            ## Reconsume.
7765            redo A;
7766          } else {
7767            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7768            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7769            ## Reconsume.
7770            redo A;
7771          }
7772        } elsif ($self->{state} == NDATA_STATE) {
7773          ## ASCII case-insensitive
7774          if ($self->{nc} == [
7775                undef,
7776                0x0044, # D
7777                0x0041, # A
7778                0x0054, # T
7779              ]->[length $self->{kwd}] or
7780              $self->{nc} == [
7781                undef,
7782                0x0064, # d
7783                0x0061, # a
7784                0x0074, # t
7785              ]->[length $self->{kwd}]) {
7786            
7787            ## Stay in the state.
7788            $self->{kwd} .= chr $self->{nc};
7789            
7790        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7791          $self->{line_prev} = $self->{line};
7792          $self->{column_prev} = $self->{column};
7793          $self->{column}++;
7794          $self->{nc}
7795              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7796        } else {
7797          $self->{set_nc}->($self);
7798        }
7799      
7800            redo A;
7801          } elsif ((length $self->{kwd}) == 4 and
7802                   ($self->{nc} == 0x0041 or # A
7803                    $self->{nc} == 0x0061)) { # a
7804            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7805              
7806              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7807                              text => 'NDATA',
7808                              line => $self->{line_prev},
7809                              column => $self->{column_prev} - 4);
7810            } else {
7811              
7812            }
7813            $self->{state} = AFTER_NDATA_STATE;
7814            
7815        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7816          $self->{line_prev} = $self->{line};
7817          $self->{column_prev} = $self->{column};
7818          $self->{column}++;
7819          $self->{nc}
7820              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7821        } else {
7822          $self->{set_nc}->($self);
7823        }
7824      
7825            redo A;
7826          } else {
7827            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7828                            line => $self->{line_prev},
7829                            column => $self->{column_prev} + 1
7830                                - length $self->{kwd});
7831            
7832            $self->{state} = BOGUS_MD_STATE;
7833            ## Reconsume.
7834            redo A;
7835          }
7836        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7837          if ($is_space->{$self->{nc}}) {
7838            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7839            
7840        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7841          $self->{line_prev} = $self->{line};
7842          $self->{column_prev} = $self->{column};
7843          $self->{column}++;
7844          $self->{nc}
7845              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7846        } else {
7847          $self->{set_nc}->($self);
7848        }
7849      
7850            redo A;
7851          } elsif ($self->{nc} == 0x003E) { # >
7852            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7853            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7854            
7855        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7856          $self->{line_prev} = $self->{line};
7857          $self->{column_prev} = $self->{column};
7858          $self->{column}++;
7859          $self->{nc}
7860              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7861        } else {
7862          $self->{set_nc}->($self);
7863        }
7864      
7865            return  ($self->{ct}); # ENTITY
7866            redo A;
7867          } elsif ($self->{nc} == -1) {
7868            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7869            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7870            
7871        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7872          $self->{line_prev} = $self->{line};
7873          $self->{column_prev} = $self->{column};
7874          $self->{column}++;
7875          $self->{nc}
7876              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7877        } else {
7878          $self->{set_nc}->($self);
7879        }
7880      
7881            return  ($self->{ct}); # ENTITY
7882            redo A;
7883          } else {
7884            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7885                            line => $self->{line_prev},
7886                            column => $self->{column_prev} + 1
7887                                - length $self->{kwd});
7888            $self->{state} = BOGUS_MD_STATE;
7889            ## Reconsume.
7890            redo A;
7891          }
7892        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7893          if ($is_space->{$self->{nc}}) {
7894            ## Stay in the state.
7895            
7896        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7897          $self->{line_prev} = $self->{line};
7898          $self->{column_prev} = $self->{column};
7899          $self->{column}++;
7900          $self->{nc}
7901              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7902        } else {
7903          $self->{set_nc}->($self);
7904        }
7905      
7906            redo A;
7907          } elsif ($self->{nc} == 0x003E) { # >
7908            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7909            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7910            
7911        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7912          $self->{line_prev} = $self->{line};
7913          $self->{column_prev} = $self->{column};
7914          $self->{column}++;
7915          $self->{nc}
7916              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7917        } else {
7918          $self->{set_nc}->($self);
7919        }
7920      
7921            return  ($self->{ct}); # ENTITY
7922            redo A;
7923          } elsif ($self->{nc} == -1) {
7924            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7925            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7926            
7927        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7928          $self->{line_prev} = $self->{line};
7929          $self->{column_prev} = $self->{column};
7930          $self->{column}++;
7931          $self->{nc}
7932              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7933        } else {
7934          $self->{set_nc}->($self);
7935        }
7936      
7937            return  ($self->{ct}); # ENTITY
7938            redo A;
7939          } else {
7940            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7941            $self->{state} = NOTATION_NAME_STATE;
7942            
7943        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7944          $self->{line_prev} = $self->{line};
7945          $self->{column_prev} = $self->{column};
7946          $self->{column}++;
7947          $self->{nc}
7948              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7949        } else {
7950          $self->{set_nc}->($self);
7951        }
7952      
7953            redo A;
7954          }
7955        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7956          if ($is_space->{$self->{nc}}) {
7957            $self->{state} = AFTER_MD_DEF_STATE;
7958            
7959        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7960          $self->{line_prev} = $self->{line};
7961          $self->{column_prev} = $self->{column};
7962          $self->{column}++;
7963          $self->{nc}
7964              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7965        } else {
7966          $self->{set_nc}->($self);
7967        }
7968      
7969            redo A;
7970          } elsif ($self->{nc} == 0x003E) { # >
7971            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7972            
7973        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7974          $self->{line_prev} = $self->{line};
7975          $self->{column_prev} = $self->{column};
7976          $self->{column}++;
7977          $self->{nc}
7978              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7979        } else {
7980          $self->{set_nc}->($self);
7981        }
7982      
7983            return  ($self->{ct}); # ENTITY
7984            redo A;
7985          } elsif ($self->{nc} == -1) {
7986            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7987            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7988            
7989        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7990          $self->{line_prev} = $self->{line};
7991          $self->{column_prev} = $self->{column};
7992          $self->{column}++;
7993          $self->{nc}
7994              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7995        } else {
7996          $self->{set_nc}->($self);
7997        }
7998      
7999            return  ($self->{ct}); # ENTITY
8000            redo A;
8001          } else {
8002            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
8003            ## Stay in the state.
8004            
8005        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8006          $self->{line_prev} = $self->{line};
8007          $self->{column_prev} = $self->{column};
8008          $self->{column}++;
8009          $self->{nc}
8010              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8011        } else {
8012          $self->{set_nc}->($self);
8013        }
8014      
8015            redo A;
8016          }
8017        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
8018          if ($self->{nc} == 0x0022) { # "
8019            $self->{state} = AFTER_MD_DEF_STATE;
8020            
8021        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8022          $self->{line_prev} = $self->{line};
8023          $self->{column_prev} = $self->{column};
8024          $self->{column}++;
8025          $self->{nc}
8026              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8027        } else {
8028          $self->{set_nc}->($self);
8029        }
8030      
8031            redo A;
8032          } elsif ($self->{nc} == 0x0026) { # &
8033            $self->{prev_state} = $self->{state};
8034            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
8035            $self->{entity_add} = 0x0022; # "
8036            
8037        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8038          $self->{line_prev} = $self->{line};
8039          $self->{column_prev} = $self->{column};
8040          $self->{column}++;
8041          $self->{nc}
8042              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8043        } else {
8044          $self->{set_nc}->($self);
8045        }
8046      
8047            redo A;
8048    ## TODO: %
8049          } elsif ($self->{nc} == -1) {
8050            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
8051            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8052            ## Reconsume.
8053            return  ($self->{ct}); # ENTITY
8054            redo A;
8055          } else {
8056            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
8057            
8058        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8059          $self->{line_prev} = $self->{line};
8060          $self->{column_prev} = $self->{column};
8061          $self->{column}++;
8062          $self->{nc}
8063              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8064        } else {
8065          $self->{set_nc}->($self);
8066        }
8067      
8068            redo A;
8069          }
8070        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
8071          if ($self->{nc} == 0x0027) { # '
8072            $self->{state} = AFTER_MD_DEF_STATE;
8073            
8074        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8075          $self->{line_prev} = $self->{line};
8076          $self->{column_prev} = $self->{column};
8077          $self->{column}++;
8078          $self->{nc}
8079              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8080        } else {
8081          $self->{set_nc}->($self);
8082        }
8083      
8084            redo A;
8085          } elsif ($self->{nc} == 0x0026) { # &
8086            $self->{prev_state} = $self->{state};
8087            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
8088            $self->{entity_add} = 0x0027; # '
8089            
8090        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8091          $self->{line_prev} = $self->{line};
8092          $self->{column_prev} = $self->{column};
8093          $self->{column}++;
8094          $self->{nc}
8095              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8096        } else {
8097          $self->{set_nc}->($self);
8098        }
8099      
8100            redo A;
8101    ## TODO: %
8102          } elsif ($self->{nc} == -1) {
8103            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
8104            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8105            ## Reconsume.
8106            return  ($self->{ct}); # ENTITY
8107            redo A;
8108          } else {
8109            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
8110            
8111        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8112          $self->{line_prev} = $self->{line};
8113          $self->{column_prev} = $self->{column};
8114          $self->{column}++;
8115          $self->{nc}
8116              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8117        } else {
8118          $self->{set_nc}->($self);
8119        }
8120      
8121            redo A;
8122          }
8123        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
8124          if ($is_space->{$self->{nc}} or
8125              {
8126                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
8127                $self->{entity_add} => 1,
8128              }->{$self->{nc}}) {
8129            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
8130                            line => $self->{line_prev},
8131                            column => $self->{column_prev}
8132                                + ($self->{nc} == -1 ? 1 : 0));
8133            ## Don't consume
8134            ## Return nothing.
8135            #
8136          } elsif ($self->{nc} == 0x0023) { # #
8137            $self->{ca} = $self->{ct};
8138            $self->{state} = ENTITY_HASH_STATE;
8139            $self->{kwd} = '#';
8140            
8141        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8142          $self->{line_prev} = $self->{line};
8143          $self->{column_prev} = $self->{column};
8144          $self->{column}++;
8145          $self->{nc}
8146              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8147        } else {
8148          $self->{set_nc}->($self);
8149        }
8150      
8151            redo A;
8152          } else {
8153            #
8154          }
8155    
8156          $self->{ct}->{value} .= '&';
8157          $self->{state} = $self->{prev_state};
8158          ## Reconsume.
8159          redo A;
8160        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
8161          if ($is_space->{$self->{nc}}) {
8162            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
8163            
8164        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8165          $self->{line_prev} = $self->{line};
8166          $self->{column_prev} = $self->{column};
8167          $self->{column}++;
8168          $self->{nc}
8169              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8170        } else {
8171          $self->{set_nc}->($self);
8172        }
8173      
8174            redo A;
8175          } elsif ($self->{nc} == 0x0028) { # (
8176            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8177            $self->{ct}->{content} = ['('];
8178            $self->{group_depth} = 1;
8179            
8180        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8181          $self->{line_prev} = $self->{line};
8182          $self->{column_prev} = $self->{column};
8183          $self->{column}++;
8184          $self->{nc}
8185              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8186        } else {
8187          $self->{set_nc}->($self);
8188        }
8189      
8190            redo A;
8191          } elsif ($self->{nc} == 0x003E) { # >
8192            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
8193            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8194            
8195        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8196          $self->{line_prev} = $self->{line};
8197          $self->{column_prev} = $self->{column};
8198          $self->{column}++;
8199          $self->{nc}
8200              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8201        } else {
8202          $self->{set_nc}->($self);
8203        }
8204      
8205            return  ($self->{ct}); # ELEMENT
8206            redo A;
8207          } elsif ($self->{nc} == -1) {
8208            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8209            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8210            
8211        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8212          $self->{line_prev} = $self->{line};
8213          $self->{column_prev} = $self->{column};
8214          $self->{column}++;
8215          $self->{nc}
8216              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8217        } else {
8218          $self->{set_nc}->($self);
8219        }
8220      
8221            return  ($self->{ct}); # ELEMENT
8222            redo A;
8223          } else {
8224            $self->{ct}->{content} = [chr $self->{nc}];
8225            $self->{state} = CONTENT_KEYWORD_STATE;
8226            
8227        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8228          $self->{line_prev} = $self->{line};
8229          $self->{column_prev} = $self->{column};
8230          $self->{column}++;
8231          $self->{nc}
8232              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8233        } else {
8234          $self->{set_nc}->($self);
8235        }
8236      
8237            redo A;
8238          }
8239        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8240          if ($is_space->{$self->{nc}}) {
8241            $self->{state} = AFTER_MD_DEF_STATE;
8242            
8243        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8244          $self->{line_prev} = $self->{line};
8245          $self->{column_prev} = $self->{column};
8246          $self->{column}++;
8247          $self->{nc}
8248              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8249        } else {
8250          $self->{set_nc}->($self);
8251        }
8252      
8253            redo A;
8254          } elsif ($self->{nc} == 0x003E) { # >
8255            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8256            
8257        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8258          $self->{line_prev} = $self->{line};
8259          $self->{column_prev} = $self->{column};
8260          $self->{column}++;
8261          $self->{nc}
8262              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8263        } else {
8264          $self->{set_nc}->($self);
8265        }
8266      
8267            return  ($self->{ct}); # ELEMENT
8268            redo A;
8269          } elsif ($self->{nc} == -1) {
8270            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8271            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8272            
8273        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8274          $self->{line_prev} = $self->{line};
8275          $self->{column_prev} = $self->{column};
8276          $self->{column}++;
8277          $self->{nc}
8278              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8279        } else {
8280          $self->{set_nc}->($self);
8281        }
8282      
8283            return  ($self->{ct}); # ELEMENT
8284            redo A;
8285          } else {
8286            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8287            ## Stay in the state.
8288            
8289        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8290          $self->{line_prev} = $self->{line};
8291          $self->{column_prev} = $self->{column};
8292          $self->{column}++;
8293          $self->{nc}
8294              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8295        } else {
8296          $self->{set_nc}->($self);
8297        }
8298      
8299            redo A;
8300          }
8301        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8302          if ($is_space->{$self->{nc}}) {
8303            ## Stay in the state.
8304            
8305        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8306          $self->{line_prev} = $self->{line};
8307          $self->{column_prev} = $self->{column};
8308          $self->{column}++;
8309          $self->{nc}
8310              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8311        } else {
8312          $self->{set_nc}->($self);
8313        }
8314      
8315            redo A;
8316          } elsif ($self->{nc} == 0x0028) { # (
8317            $self->{group_depth}++;
8318            push @{$self->{ct}->{content}}, chr $self->{nc};
8319            ## Stay in the state.
8320            
8321        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8322          $self->{line_prev} = $self->{line};
8323          $self->{column_prev} = $self->{column};
8324          $self->{column}++;
8325          $self->{nc}
8326              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8327        } else {
8328          $self->{set_nc}->($self);
8329        }
8330      
8331            redo A;
8332          } elsif ($self->{nc} == 0x007C or # |
8333                   $self->{nc} == 0x002C) { # ,
8334            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8335            ## Stay in the state.
8336            
8337        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8338          $self->{line_prev} = $self->{line};
8339          $self->{column_prev} = $self->{column};
8340          $self->{column}++;
8341          $self->{nc}
8342              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8343        } else {
8344          $self->{set_nc}->($self);
8345        }
8346      
8347            redo A;
8348          } elsif ($self->{nc} == 0x0029) { # )
8349            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8350            push @{$self->{ct}->{content}}, chr $self->{nc};
8351            $self->{group_depth}--;
8352            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8353            
8354        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8355          $self->{line_prev} = $self->{line};
8356          $self->{column_prev} = $self->{column};
8357          $self->{column}++;
8358          $self->{nc}
8359              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8360        } else {
8361          $self->{set_nc}->($self);
8362        }
8363      
8364            redo A;
8365          } elsif ($self->{nc} == 0x003E) { # >
8366            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8367            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8368            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8369            
8370        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8371          $self->{line_prev} = $self->{line};
8372          $self->{column_prev} = $self->{column};
8373          $self->{column}++;
8374          $self->{nc}
8375              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8376        } else {
8377          $self->{set_nc}->($self);
8378        }
8379      
8380            return  ($self->{ct}); # ELEMENT
8381            redo A;
8382          } elsif ($self->{nc} == -1) {
8383            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8384            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8385            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8386            
8387        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8388          $self->{line_prev} = $self->{line};
8389          $self->{column_prev} = $self->{column};
8390          $self->{column}++;
8391          $self->{nc}
8392              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8393        } else {
8394          $self->{set_nc}->($self);
8395        }
8396      
8397            return  ($self->{ct}); # ELEMENT
8398            redo A;
8399          } else {
8400            push @{$self->{ct}->{content}}, chr $self->{nc};
8401            $self->{state} = CM_ELEMENT_NAME_STATE;
8402            
8403        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8404          $self->{line_prev} = $self->{line};
8405          $self->{column_prev} = $self->{column};
8406          $self->{column}++;
8407          $self->{nc}
8408              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8409        } else {
8410          $self->{set_nc}->($self);
8411        }
8412      
8413            redo A;
8414          }
8415        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8416          if ($is_space->{$self->{nc}}) {
8417            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8418            
8419        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8420          $self->{line_prev} = $self->{line};
8421          $self->{column_prev} = $self->{column};
8422          $self->{column}++;
8423          $self->{nc}
8424              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8425        } else {
8426          $self->{set_nc}->($self);
8427        }
8428      
8429            redo A;
8430          } elsif ($self->{nc} == 0x002A or # *
8431                   $self->{nc} == 0x002B or # +
8432                   $self->{nc} == 0x003F) { # ?
8433            push @{$self->{ct}->{content}}, chr $self->{nc};
8434            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8435            
8436        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8437          $self->{line_prev} = $self->{line};
8438          $self->{column_prev} = $self->{column};
8439          $self->{column}++;
8440          $self->{nc}
8441              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8442        } else {
8443          $self->{set_nc}->($self);
8444        }
8445      
8446            redo A;
8447          } elsif ($self->{nc} == 0x007C or # |
8448                   $self->{nc} == 0x002C) { # ,
8449            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8450            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8451            
8452        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8453          $self->{line_prev} = $self->{line};
8454          $self->{column_prev} = $self->{column};
8455          $self->{column}++;
8456          $self->{nc}
8457              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8458        } else {
8459          $self->{set_nc}->($self);
8460        }
8461      
8462            redo A;
8463          } elsif ($self->{nc} == 0x0029) { # )
8464            $self->{group_depth}--;
8465            push @{$self->{ct}->{content}}, chr $self->{nc};
8466            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8467            
8468        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8469          $self->{line_prev} = $self->{line};
8470          $self->{column_prev} = $self->{column};
8471          $self->{column}++;
8472          $self->{nc}
8473              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8474        } else {
8475          $self->{set_nc}->($self);
8476        }
8477      
8478            redo A;
8479          } elsif ($self->{nc} == 0x003E) { # >
8480            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8481            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8482            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8483            
8484        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8485          $self->{line_prev} = $self->{line};
8486          $self->{column_prev} = $self->{column};
8487          $self->{column}++;
8488          $self->{nc}
8489              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8490        } else {
8491          $self->{set_nc}->($self);
8492        }
8493      
8494            return  ($self->{ct}); # ELEMENT
8495            redo A;
8496          } elsif ($self->{nc} == -1) {
8497            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8498            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8499            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8500            
8501        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8502          $self->{line_prev} = $self->{line};
8503          $self->{column_prev} = $self->{column};
8504          $self->{column}++;
8505          $self->{nc}
8506              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8507        } else {
8508          $self->{set_nc}->($self);
8509        }
8510      
8511            return  ($self->{ct}); # ELEMENT
8512            redo A;
8513          } else {
8514            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8515            ## Stay in the state.
8516            
8517        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8518          $self->{line_prev} = $self->{line};
8519          $self->{column_prev} = $self->{column};
8520          $self->{column}++;
8521          $self->{nc}
8522              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8523        } else {
8524          $self->{set_nc}->($self);
8525        }
8526      
8527            redo A;
8528          }
8529        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8530          if ($is_space->{$self->{nc}}) {
8531            ## Stay in the state.
8532            
8533        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8534          $self->{line_prev} = $self->{line};
8535          $self->{column_prev} = $self->{column};
8536          $self->{column}++;
8537          $self->{nc}
8538              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8539        } else {
8540          $self->{set_nc}->($self);
8541        }
8542      
8543            redo A;
8544          } elsif ($self->{nc} == 0x007C or # |
8545                   $self->{nc} == 0x002C) { # ,
8546            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8547            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8548            
8549        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8550          $self->{line_prev} = $self->{line};
8551          $self->{column_prev} = $self->{column};
8552          $self->{column}++;
8553          $self->{nc}
8554              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8555        } else {
8556          $self->{set_nc}->($self);
8557        }
8558      
8559            redo A;
8560          } elsif ($self->{nc} == 0x0029) { # )
8561            $self->{group_depth}--;
8562            push @{$self->{ct}->{content}}, chr $self->{nc};
8563            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8564            
8565        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8566          $self->{line_prev} = $self->{line};
8567          $self->{column_prev} = $self->{column};
8568          $self->{column}++;
8569          $self->{nc}
8570              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8571        } else {
8572          $self->{set_nc}->($self);
8573        }
8574      
8575            redo A;
8576          } elsif ($self->{nc} == 0x003E) { # >
8577            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8578            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8579            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8580            
8581        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8582          $self->{line_prev} = $self->{line};
8583          $self->{column_prev} = $self->{column};
8584          $self->{column}++;
8585          $self->{nc}
8586              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8587        } else {
8588          $self->{set_nc}->($self);
8589        }
8590      
8591            return  ($self->{ct}); # ELEMENT
8592            redo A;
8593          } elsif ($self->{nc} == -1) {
8594            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8595            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8596            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8597            
8598        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8599          $self->{line_prev} = $self->{line};
8600          $self->{column_prev} = $self->{column};
8601          $self->{column}++;
8602          $self->{nc}
8603              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8604        } else {
8605          $self->{set_nc}->($self);
8606        }
8607      
8608            return  ($self->{ct}); # ELEMENT
8609            redo A;
8610          } else {
8611            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8612            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8613            $self->{state} = BOGUS_MD_STATE;
8614            
8615        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8616          $self->{line_prev} = $self->{line};
8617          $self->{column_prev} = $self->{column};
8618          $self->{column}++;
8619          $self->{nc}
8620              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8621        } else {
8622          $self->{set_nc}->($self);
8623        }
8624      
8625            redo A;
8626          }
8627        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8628          if ($is_space->{$self->{nc}}) {
8629            if ($self->{group_depth}) {
8630              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8631            } else {
8632              $self->{state} = AFTER_MD_DEF_STATE;
8633            }
8634            
8635        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8636          $self->{line_prev} = $self->{line};
8637          $self->{column_prev} = $self->{column};
8638          $self->{column}++;
8639          $self->{nc}
8640              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8641        } else {
8642          $self->{set_nc}->($self);
8643        }
8644      
8645            redo A;
8646          } elsif ($self->{nc} == 0x002A or # *
8647                   $self->{nc} == 0x002B or # +
8648                   $self->{nc} == 0x003F) { # ?
8649            push @{$self->{ct}->{content}}, chr $self->{nc};
8650            if ($self->{group_depth}) {
8651              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8652            } else {
8653              $self->{state} = AFTER_MD_DEF_STATE;
8654            }
8655            
8656        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8657          $self->{line_prev} = $self->{line};
8658          $self->{column_prev} = $self->{column};
8659          $self->{column}++;
8660          $self->{nc}
8661              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8662        } else {
8663          $self->{set_nc}->($self);
8664        }
8665      
8666            redo A;
8667          } elsif ($self->{nc} == 0x0029) { # )
8668            if ($self->{group_depth}) {
8669              $self->{group_depth}--;
8670              push @{$self->{ct}->{content}}, chr $self->{nc};
8671              ## Stay in the state.
8672              
8673        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8674          $self->{line_prev} = $self->{line};
8675          $self->{column_prev} = $self->{column};
8676          $self->{column}++;
8677          $self->{nc}
8678              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8679        } else {
8680          $self->{set_nc}->($self);
8681        }
8682      
8683              redo A;
8684            } else {
8685              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8686              $self->{state} = BOGUS_MD_STATE;
8687              ## Reconsume.
8688              redo A;
8689            }
8690          } elsif ($self->{nc} == 0x003E) { # >
8691            if ($self->{group_depth}) {
8692              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8693              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8694            }
8695            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8696            
8697        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8698          $self->{line_prev} = $self->{line};
8699          $self->{column_prev} = $self->{column};
8700          $self->{column}++;
8701          $self->{nc}
8702              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8703        } else {
8704          $self->{set_nc}->($self);
8705        }
8706      
8707            return  ($self->{ct}); # ELEMENT
8708            redo A;
8709          } elsif ($self->{nc} == -1) {
8710            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8711            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8712            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8713            
8714        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8715          $self->{line_prev} = $self->{line};
8716          $self->{column_prev} = $self->{column};
8717          $self->{column}++;
8718          $self->{nc}
8719              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8720        } else {
8721          $self->{set_nc}->($self);
8722        }
8723      
8724            return  ($self->{ct}); # ELEMENT
8725            redo A;
8726          } else {
8727            if ($self->{group_depth}) {
8728              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8729            } else {
8730              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8731              $self->{state} = BOGUS_MD_STATE;
8732            }
8733            ## Reconsume.
8734            redo A;
8735          }
8736        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8737          if ($is_space->{$self->{nc}}) {
8738            ## Stay in the state.
8739            
8740        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8741          $self->{line_prev} = $self->{line};
8742          $self->{column_prev} = $self->{column};
8743          $self->{column}++;
8744          $self->{nc}
8745              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8746        } else {
8747          $self->{set_nc}->($self);
8748        }
8749      
8750            redo A;
8751          } elsif ($self->{nc} == 0x003E) { # >
8752            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8753            
8754        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8755          $self->{line_prev} = $self->{line};
8756          $self->{column_prev} = $self->{column};
8757          $self->{column}++;
8758          $self->{nc}
8759              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8760        } else {
8761          $self->{set_nc}->($self);
8762        }
8763      
8764            return  ($self->{ct}); # ENTITY/ELEMENT
8765            redo A;
8766          } elsif ($self->{nc} == -1) {
8767            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8768            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8769            
8770        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8771          $self->{line_prev} = $self->{line};
8772          $self->{column_prev} = $self->{column};
8773          $self->{column}++;
8774          $self->{nc}
8775              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8776        } else {
8777          $self->{set_nc}->($self);
8778        }
8779      
8780            return  ($self->{ct}); # ENTITY/ELEMENT
8781            redo A;
8782          } else {
8783            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8784            $self->{state} = BOGUS_MD_STATE;
8785            ## Reconsume.
8786            redo A;
8787          }
8788        } elsif ($self->{state} == BOGUS_MD_STATE) {
8789          if ($self->{nc} == 0x003E) { # >
8790            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8791            
8792        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8793          $self->{line_prev} = $self->{line};
8794          $self->{column_prev} = $self->{column};
8795          $self->{column}++;
8796          $self->{nc}
8797              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8798        } else {
8799          $self->{set_nc}->($self);
8800        }
8801      
8802            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8803            redo A;
8804          } elsif ($self->{nc} == -1) {
8805            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8806            ## Reconsume.
8807            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8808            redo A;
8809          } else {
8810            ## Stay in the state.
8811            
8812        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8813          $self->{line_prev} = $self->{line};
8814          $self->{column_prev} = $self->{column};
8815          $self->{column}++;
8816          $self->{nc}
8817              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8818        } else {
8819          $self->{set_nc}->($self);
8820        }
8821      
8822            redo A;
8823          }
8824      } else {      } else {
8825        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8826      }      }
# Line 5206  sub _get_next_token ($) { Line 8831  sub _get_next_token ($) {
8831    
8832  1;  1;
8833  ## $Date$  ## $Date$
8834                                    

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.32

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24