/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13 by wakaba, Thu Oct 16 03:39:57 2008 UTC revision 1.15 by wakaba, Sat Oct 18 08:05:29 2008 UTC
# Line 16  BEGIN { Line 16  BEGIN {
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18      END_OF_DOCTYPE_TOKEN      END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 29  BEGIN { Line 34  BEGIN {
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36        END_OF_DOCTYPE_TOKEN        END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
# Line 45  sub END_OF_FILE_TOKEN () { 5 } Line 55  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } ## NOTE: XML only.  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58  sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only  sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65  ## XML5: XML5 has "empty tag token".  In this implementation, it is  ## XML5: XML5 has "empty tag token".  In this implementation, it is
66  ## represented as a start tag token with $self->{self_closing} flag  ## represented as a start tag token with $self->{self_closing} flag
# Line 136  sub PI_AFTER_STATE () { 55 } Line 151  sub PI_AFTER_STATE () { 55 }
151  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
152  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }  sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }  sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154  sub DOCTYPE_TAG_STATE () { 59 }  sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155  sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 60 }  sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    
181  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
182  ## list and descriptions)  ## list and descriptions)
# Line 1711  sub _get_next_token ($) { Line 1750  sub _get_next_token ($) {
1750          redo A;          redo A;
1751        }        }
1752      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1753        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1754          ## ATTLIST attribute value double quoted state".
1755                
1756        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1757                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1758          ## XML5: "Tag attribute name before state".            
1759          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1760              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1761              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1762            } else {
1763              
1764              ## XML5: "Tag attribute name before state".
1765              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1766            }
1767                    
1768      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1769        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1757  sub _get_next_token ($) { Line 1804  sub _get_next_token ($) {
1804          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1805                        
1806            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1807    
1808              $self->{state} = DATA_STATE;
1809              $self->{s_kwd} = '';
1810              ## reconsume
1811              return  ($self->{ct}); # start tag
1812              redo A;
1813          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1814            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1815            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1766  sub _get_next_token ($) { Line 1819  sub _get_next_token ($) {
1819              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1820                            
1821            }            }
1822    
1823              $self->{state} = DATA_STATE;
1824              $self->{s_kwd} = '';
1825              ## reconsume
1826              return  ($self->{ct}); # end tag
1827              redo A;
1828            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1829              ## XML5: No parse error above; not defined yet.
1830              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1831              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1832              ## Reconsume.
1833              return  ($self->{ct}); # ATTLIST
1834              redo A;
1835          } else {          } else {
1836            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1837          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1838        } else {        } else {
1839            ## XML5 [ATTLIST]: Not defined yet.
1840          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1841                        
1842            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1804  sub _get_next_token ($) { Line 1864  sub _get_next_token ($) {
1864          redo A;          redo A;
1865        }        }
1866      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1867        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1868          ## ATTLIST attribute value single quoted state".
1869    
1870        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1871                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1872          ## XML5: "Before attribute name state" (sic).            
1873          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1874              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1875              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1876            } else {
1877              
1878              ## XML5: "Before attribute name state" (sic).
1879              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1880            }
1881                    
1882      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1883        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1850  sub _get_next_token ($) { Line 1918  sub _get_next_token ($) {
1918          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1919                        
1920            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1921    
1922              $self->{state} = DATA_STATE;
1923              $self->{s_kwd} = '';
1924              ## reconsume
1925              return  ($self->{ct}); # start tag
1926              redo A;
1927          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1928            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1929            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1859  sub _get_next_token ($) { Line 1933  sub _get_next_token ($) {
1933              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1934                            
1935            }            }
1936    
1937              $self->{state} = DATA_STATE;
1938              $self->{s_kwd} = '';
1939              ## reconsume
1940              return  ($self->{ct}); # end tag
1941              redo A;
1942            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1943              ## XML5: No parse error above; not defined yet.
1944              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1945              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1946              ## Reconsume.
1947              return  ($self->{ct}); # ATTLIST
1948              redo A;
1949          } else {          } else {
1950            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1951          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1952        } else {        } else {
1953            ## XML5 [ATTLIST]: Not defined yet.
1954          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1955                        
1956            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1900  sub _get_next_token ($) { Line 1981  sub _get_next_token ($) {
1981        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
1982    
1983        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1984                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1985          ## XML5: "Tag attribute name before state".            
1986          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
1987              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1988            } else {
1989              
1990              ## XML5: "Tag attribute name before state".
1991              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1992            }
1993                    
1994      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1995        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1943  sub _get_next_token ($) { Line 2030  sub _get_next_token ($) {
2030          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2031                        
2032            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2033    
2034              $self->{state} = DATA_STATE;
2035              $self->{s_kwd} = '';
2036              
2037        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2038          $self->{line_prev} = $self->{line};
2039          $self->{column_prev} = $self->{column};
2040          $self->{column}++;
2041          $self->{nc}
2042              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2043        } else {
2044          $self->{set_nc}->($self);
2045        }
2046      
2047              return  ($self->{ct}); # start tag
2048              redo A;
2049          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2050            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2051            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1952  sub _get_next_token ($) { Line 2055  sub _get_next_token ($) {
2055              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2056                            
2057            }            }
2058          } else {  
2059            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2060          }            $self->{s_kwd} = '';
2061          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2062      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2063        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2064        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1968  sub _get_next_token ($) { Line 2069  sub _get_next_token ($) {
2069        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2070      }      }
2071        
2072              return  ($self->{ct}); # end tag
2073          return  ($self->{ct}); # start tag or end tag            redo A;
2074            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2075          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2076              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2077              
2078        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2079          $self->{line_prev} = $self->{line};
2080          $self->{column_prev} = $self->{column};
2081          $self->{column}++;
2082          $self->{nc}
2083              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2084        } else {
2085          $self->{set_nc}->($self);
2086        }
2087      
2088              return  ($self->{ct}); # ATTLIST
2089              redo A;
2090            } else {
2091              die "$0: $self->{ct}->{type}: Unknown token type";
2092            }
2093        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2094          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2095                        
2096              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2097            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2098    
2099              $self->{state} = DATA_STATE;
2100              $self->{s_kwd} = '';
2101              ## reconsume
2102              return  ($self->{ct}); # start tag
2103              redo A;
2104          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2105              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2106            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2107            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2108                            
# Line 1986  sub _get_next_token ($) { Line 2111  sub _get_next_token ($) {
2111              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2112                            
2113            }            }
2114    
2115              $self->{state} = DATA_STATE;
2116              $self->{s_kwd} = '';
2117              ## reconsume
2118              return  ($self->{ct}); # end tag
2119              redo A;
2120            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2121              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2122              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2123              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2124              ## Reconsume.
2125              return  ($self->{ct}); # ATTLIST
2126              redo A;
2127          } else {          } else {
2128            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2129          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2130        } else {        } else {
2131          if ({          if ({
2132               0x0022 => 1, # "               0x0022 => 1, # "
# Line 2188  sub _get_next_token ($) { Line 2319  sub _get_next_token ($) {
2319          redo A;          redo A;
2320        }        }
2321      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2322          ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2323    
2324        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2325        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2326                
# Line 2249  sub _get_next_token ($) { Line 2382  sub _get_next_token ($) {
2382          redo A;          redo A;
2383        }        }
2384      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2385        ## XML5: "Markup declaration state" and "DOCTYPE markup        ## XML5: "Markup declaration state".
       ## declaration state".  
2386                
2387        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2388                    
# Line 2648  sub _get_next_token ($) { Line 2780  sub _get_next_token ($) {
2780          redo A;          redo A;
2781        }        }
2782      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2783          ## XML5: "Comment state" and "DOCTYPE comment state".
2784    
2785        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2786                    
2787          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2700  sub _get_next_token ($) { Line 2834  sub _get_next_token ($) {
2834          redo A;          redo A;
2835        }        }
2836      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2837        ## XML5: "comment dash state".        ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2838    
2839        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2840                    
# Line 2750  sub _get_next_token ($) { Line 2884  sub _get_next_token ($) {
2884          redo A;          redo A;
2885        }        }
2886      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2887          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2888    
2889        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2890          if ($self->{in_subset}) {          if ($self->{in_subset}) {
2891                        
# Line 4720  sub _get_next_token ($) { Line 4856  sub _get_next_token ($) {
4856      ## XML-only states      ## XML-only states
4857    
4858      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
4859          ## XML5: "Pi state" and "DOCTYPE pi state".
4860    
4861        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
4862            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
4863            $self->{nc} == -1) {            $self->{nc} == -1) {
4864            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
4865            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
4866            ## "DOCTYPE pi state": Parse error, switch to the "data
4867            ## state".
4868          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
4869                          line => $self->{line_prev},                          line => $self->{line_prev},
4870                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 4737  sub _get_next_token ($) { Line 4879  sub _get_next_token ($) {
4879                        };                        };
4880          redo A;          redo A;
4881        } else {        } else {
4882            ## XML5: "DOCTYPE pi state": Stay in the state.
4883          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
4884                         target => chr $self->{nc},                         target => chr $self->{nc},
4885                         data => '',                         data => '',
# Line 4851  sub _get_next_token ($) { Line 4994  sub _get_next_token ($) {
4994        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4995          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4996          if ($self->{in_subset}) {          if ($self->{in_subset}) {
4997            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
4998          } else {          } else {
4999            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
5000            $self->{s_kwd} = '';            $self->{s_kwd} = '';
# Line 4879  sub _get_next_token ($) { Line 5022  sub _get_next_token ($) {
5022          redo A;          redo A;
5023        }        }
5024      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
5025          ## XML5: Part of "Pi after state".
5026    
5027        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5028          if ($self->{in_subset}) {          if ($self->{in_subset}) {
5029            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 4928  sub _get_next_token ($) { Line 5073  sub _get_next_token ($) {
5073          redo A;          redo A;
5074        }        }
5075      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5076        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5077    
5078        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5079          if ($self->{in_subset}) {          if ($self->{in_subset}) {
5080            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 5141  sub _get_next_token ($) { Line 5287  sub _get_next_token ($) {
5287        }        }
5288      } elsif ($self->{state} == DOCTYPE_TAG_STATE) {      } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5289        if ($self->{nc} == 0x0021) { # !        if ($self->{nc} == 0x0021) { # !
5290          $self->{state} = MARKUP_DECLARATION_OPEN_STATE;          $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5291                    
5292      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5293        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5195  sub _get_next_token ($) { Line 5341  sub _get_next_token ($) {
5341        
5342          redo A;          redo A;
5343        }        }
5344        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5345          ## XML5: "DOCTYPE markup declaration state".
5346          
5347          if ($self->{nc} == 0x002D) { # -
5348            $self->{state} = MD_HYPHEN_STATE;
5349            
5350        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5351          $self->{line_prev} = $self->{line};
5352          $self->{column_prev} = $self->{column};
5353          $self->{column}++;
5354          $self->{nc}
5355              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5356        } else {
5357          $self->{set_nc}->($self);
5358        }
5359      
5360            redo A;
5361          } elsif ($self->{nc} == 0x0045) { # E
5362            $self->{state} = MD_E_STATE;
5363            $self->{kwd} = chr $self->{nc};
5364            
5365        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5366          $self->{line_prev} = $self->{line};
5367          $self->{column_prev} = $self->{column};
5368          $self->{column}++;
5369          $self->{nc}
5370              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5371        } else {
5372          $self->{set_nc}->($self);
5373        }
5374      
5375            redo A;
5376          } elsif ($self->{nc} == 0x0041) { # A
5377            $self->{state} = MD_ATTLIST_STATE;
5378            $self->{kwd} = chr $self->{nc};
5379            
5380        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5381          $self->{line_prev} = $self->{line};
5382          $self->{column_prev} = $self->{column};
5383          $self->{column}++;
5384          $self->{nc}
5385              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5386        } else {
5387          $self->{set_nc}->($self);
5388        }
5389      
5390            redo A;
5391          } elsif ($self->{nc} == 0x004E) { # N
5392            $self->{state} = MD_NOTATION_STATE;
5393            $self->{kwd} = chr $self->{nc};
5394            
5395        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5396          $self->{line_prev} = $self->{line};
5397          $self->{column_prev} = $self->{column};
5398          $self->{column}++;
5399          $self->{nc}
5400              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5401        } else {
5402          $self->{set_nc}->($self);
5403        }
5404      
5405            redo A;
5406          } else {
5407            #
5408          }
5409          
5410          ## XML5: No parse error.
5411          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5412                          line => $self->{line_prev},
5413                          column => $self->{column_prev} - 1);
5414          ## Reconsume.
5415          $self->{state} = BOGUS_COMMENT_STATE;
5416          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5417          redo A;
5418        } elsif ($self->{state} == MD_E_STATE) {
5419          if ($self->{nc} == 0x004E) { # N
5420            $self->{state} = MD_ENTITY_STATE;
5421            $self->{kwd} .= chr $self->{nc};
5422            
5423        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5424          $self->{line_prev} = $self->{line};
5425          $self->{column_prev} = $self->{column};
5426          $self->{column}++;
5427          $self->{nc}
5428              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5429        } else {
5430          $self->{set_nc}->($self);
5431        }
5432      
5433            redo A;
5434          } elsif ($self->{nc} == 0x004C) { # L
5435            ## XML5: <!ELEMENT> not supported.
5436            $self->{state} = MD_ELEMENT_STATE;
5437            $self->{kwd} .= chr $self->{nc};
5438            
5439        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5440          $self->{line_prev} = $self->{line};
5441          $self->{column_prev} = $self->{column};
5442          $self->{column}++;
5443          $self->{nc}
5444              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5445        } else {
5446          $self->{set_nc}->($self);
5447        }
5448      
5449            redo A;
5450          } else {
5451            ## XML5: No parse error.
5452            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5453                            line => $self->{line_prev},
5454                            column => $self->{column_prev} - 2
5455                                + 1 * ($self->{nc} == -1));
5456            ## Reconsume.
5457            $self->{state} = BOGUS_COMMENT_STATE;
5458            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5459            redo A;
5460          }
5461        } elsif ($self->{state} == MD_ENTITY_STATE) {
5462          if ($self->{nc} == {
5463                'EN' => 0x0054, # T
5464                'ENT' => 0x0049, # I
5465                'ENTI' => 0x0054, # T
5466              }->{$self->{kwd}}) {
5467            ## Stay in the state.
5468            $self->{kwd} .= chr $self->{nc};
5469            
5470        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5471          $self->{line_prev} = $self->{line};
5472          $self->{column_prev} = $self->{column};
5473          $self->{column}++;
5474          $self->{nc}
5475              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5476        } else {
5477          $self->{set_nc}->($self);
5478        }
5479      
5480            redo A;
5481          } elsif ($self->{kwd} eq 'ENTIT' and
5482                   $self->{nc} == 0x0059) { # Y
5483            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',
5484                           line => $self->{line_prev},
5485                           column => $self->{column_prev} - 6};
5486            $self->{state} = DOCTYPE_MD_STATE;
5487            
5488        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5489          $self->{line_prev} = $self->{line};
5490          $self->{column_prev} = $self->{column};
5491          $self->{column}++;
5492          $self->{nc}
5493              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5494        } else {
5495          $self->{set_nc}->($self);
5496        }
5497      
5498            redo A;
5499          } else {
5500            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5501                            line => $self->{line_prev},
5502                            column => $self->{column_prev} - 1
5503                                - (length $self->{kwd})
5504                                + 1 * ($self->{nc} == -1));
5505            $self->{state} = BOGUS_COMMENT_STATE;
5506            ## Reconsume.
5507            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5508            redo A;
5509          }
5510        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5511          if ($self->{nc} == {
5512                'EL' => 0x0045, # E
5513                'ELE' => 0x004D, # M
5514                'ELEM' => 0x0045, # E
5515                'ELEME' => 0x004E, # N
5516              }->{$self->{kwd}}) {
5517            ## Stay in the state.
5518            $self->{kwd} .= chr $self->{nc};
5519            
5520        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5521          $self->{line_prev} = $self->{line};
5522          $self->{column_prev} = $self->{column};
5523          $self->{column}++;
5524          $self->{nc}
5525              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5526        } else {
5527          $self->{set_nc}->($self);
5528        }
5529      
5530            redo A;
5531          } elsif ($self->{kwd} eq 'ELEMEN' and
5532                   $self->{nc} == 0x0054) { # T
5533            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5534                           line => $self->{line_prev},
5535                           column => $self->{column_prev} - 6};
5536            $self->{state} = DOCTYPE_MD_STATE;
5537            
5538        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5539          $self->{line_prev} = $self->{line};
5540          $self->{column_prev} = $self->{column};
5541          $self->{column}++;
5542          $self->{nc}
5543              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5544        } else {
5545          $self->{set_nc}->($self);
5546        }
5547      
5548            redo A;
5549          } else {
5550            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5551                            line => $self->{line_prev},
5552                            column => $self->{column_prev} - 1
5553                                - (length $self->{kwd})
5554                                + 1 * ($self->{nc} == -1));
5555            $self->{state} = BOGUS_COMMENT_STATE;
5556            ## Reconsume.
5557            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5558            redo A;
5559          }
5560        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5561          if ($self->{nc} == {
5562                'A' => 0x0054, # T
5563                'AT' => 0x0054, # T
5564                'ATT' => 0x004C, # L
5565                'ATTL' => 0x0049, # I
5566                'ATTLI' => 0x0053, # S
5567              }->{$self->{kwd}}) {
5568            ## Stay in the state.
5569            $self->{kwd} .= chr $self->{nc};
5570            
5571        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5572          $self->{line_prev} = $self->{line};
5573          $self->{column_prev} = $self->{column};
5574          $self->{column}++;
5575          $self->{nc}
5576              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5577        } else {
5578          $self->{set_nc}->($self);
5579        }
5580      
5581            redo A;
5582          } elsif ($self->{kwd} eq 'ATTLIS' and
5583                   $self->{nc} == 0x0054) { # T
5584            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5585                           attrdefs => [],
5586                           line => $self->{line_prev},
5587                           column => $self->{column_prev} - 6};
5588            $self->{state} = DOCTYPE_MD_STATE;
5589            
5590        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5591          $self->{line_prev} = $self->{line};
5592          $self->{column_prev} = $self->{column};
5593          $self->{column}++;
5594          $self->{nc}
5595              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5596        } else {
5597          $self->{set_nc}->($self);
5598        }
5599      
5600            redo A;
5601          } else {
5602            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5603                            line => $self->{line_prev},
5604                            column => $self->{column_prev} - 1
5605                                 - (length $self->{kwd})
5606                                 + 1 * ($self->{nc} == -1));
5607            $self->{state} = BOGUS_COMMENT_STATE;
5608            ## Reconsume.
5609            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5610            redo A;
5611          }
5612        } elsif ($self->{state} == MD_NOTATION_STATE) {
5613          if ($self->{nc} == {
5614                'N' => 0x004F, # O
5615                'NO' => 0x0054, # T
5616                'NOT' => 0x0041, # A
5617                'NOTA' => 0x0054, # T
5618                'NOTAT' => 0x0049, # I
5619                'NOTATI' => 0x004F, # O
5620              }->{$self->{kwd}}) {
5621            ## Stay in the state.
5622            $self->{kwd} .= chr $self->{nc};
5623            
5624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5625          $self->{line_prev} = $self->{line};
5626          $self->{column_prev} = $self->{column};
5627          $self->{column}++;
5628          $self->{nc}
5629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5630        } else {
5631          $self->{set_nc}->($self);
5632        }
5633      
5634            redo A;
5635          } elsif ($self->{kwd} eq 'NOTATIO' and
5636                   $self->{nc} == 0x004E) { # N
5637            $self->{ct} = {type => NOTATION_TOKEN, name => '',
5638                           line => $self->{line_prev},
5639                           column => $self->{column_prev} - 6};
5640            $self->{state} = DOCTYPE_MD_STATE;
5641            
5642        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5643          $self->{line_prev} = $self->{line};
5644          $self->{column_prev} = $self->{column};
5645          $self->{column}++;
5646          $self->{nc}
5647              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5648        } else {
5649          $self->{set_nc}->($self);
5650        }
5651      
5652            redo A;
5653          } else {
5654            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5655                            line => $self->{line_prev},
5656                            column => $self->{column_prev} - 1
5657                                - (length $self->{kwd})
5658                                + 1 * ($self->{nc} == -1));
5659            $self->{state} = BOGUS_COMMENT_STATE;
5660            ## Reconsume.
5661            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5662            redo A;
5663          }
5664        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
5665          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
5666          ## "DOCTYPE NOTATION state".
5667    
5668          if ($is_space->{$self->{nc}}) {
5669            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
5670            $self->{state} = BEFORE_MD_NAME_STATE;
5671            
5672        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5673          $self->{line_prev} = $self->{line};
5674          $self->{column_prev} = $self->{column};
5675          $self->{column}++;
5676          $self->{nc}
5677              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5678        } else {
5679          $self->{set_nc}->($self);
5680        }
5681      
5682            redo A;
5683          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5684                   $self->{nc} == 0x0025) { # %
5685            ## XML5: Switch to the "DOCTYPE bogus comment state".
5686            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5687            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5688            
5689        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5690          $self->{line_prev} = $self->{line};
5691          $self->{column_prev} = $self->{column};
5692          $self->{column}++;
5693          $self->{nc}
5694              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5695        } else {
5696          $self->{set_nc}->($self);
5697        }
5698      
5699            redo A;
5700          } elsif ($self->{nc} == -1) {
5701            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5702            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5703            ## Reconsume.
5704            redo A;
5705          } elsif ($self->{nc} == 0x003E) { # >
5706            ## XML5: Switch to the "DOCTYPE bogus comment state".
5707            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5708            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5709                    
5710        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5711          $self->{line_prev} = $self->{line};
5712          $self->{column_prev} = $self->{column};
5713          $self->{column}++;
5714          $self->{nc}
5715              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5716        } else {
5717          $self->{set_nc}->($self);
5718        }
5719      
5720            redo A;
5721          } else {
5722            ## XML5: Switch to the "DOCTYPE bogus comment state".
5723            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5724            $self->{state} = BEFORE_MD_NAME_STATE;
5725            redo A;
5726          }
5727        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
5728          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
5729          ## before state", "DOCTYPE ATTLIST name before state".
5730    
5731          if ($is_space->{$self->{nc}}) {
5732            ## Stay in the state.
5733            
5734        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5735          $self->{line_prev} = $self->{line};
5736          $self->{column_prev} = $self->{column};
5737          $self->{column}++;
5738          $self->{nc}
5739              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5740        } else {
5741          $self->{set_nc}->($self);
5742        }
5743      
5744            redo A;
5745          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5746                   $self->{nc} == 0x0025) { # %
5747            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5748            
5749        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5750          $self->{line_prev} = $self->{line};
5751          $self->{column_prev} = $self->{column};
5752          $self->{column}++;
5753          $self->{nc}
5754              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5755        } else {
5756          $self->{set_nc}->($self);
5757        }
5758      
5759            redo A;
5760          } elsif ($self->{nc} == 0x003E) { # >
5761            ## XML5: Same as "Anything else".
5762            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5763            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5764            
5765        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5766          $self->{line_prev} = $self->{line};
5767          $self->{column_prev} = $self->{column};
5768          $self->{column}++;
5769          $self->{nc}
5770              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5771        } else {
5772          $self->{set_nc}->($self);
5773        }
5774      
5775            redo A;
5776          } elsif ($self->{nc} == -1) {
5777            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5778            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5779            ## Reconsume.
5780            redo A;
5781          } else {
5782            ## XML5: [ATTLIST] Not defined yet.
5783            $self->{ct}->{name} .= chr $self->{nc};
5784            $self->{state} = MD_NAME_STATE;
5785            
5786        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5787          $self->{line_prev} = $self->{line};
5788          $self->{column_prev} = $self->{column};
5789          $self->{column}++;
5790          $self->{nc}
5791              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5792        } else {
5793          $self->{set_nc}->($self);
5794        }
5795      
5796            redo A;
5797          }
5798        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
5799          if ($is_space->{$self->{nc}}) {
5800            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
5801            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
5802            $self->{state} = BEFORE_MD_NAME_STATE;
5803            
5804        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5805          $self->{line_prev} = $self->{line};
5806          $self->{column_prev} = $self->{column};
5807          $self->{column}++;
5808          $self->{nc}
5809              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5810        } else {
5811          $self->{set_nc}->($self);
5812        }
5813      
5814            redo A;
5815          } elsif ($self->{nc} == 0x003E) { # >
5816            ## XML5: Same as "Anything else".
5817            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5818            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5819            
5820        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5821          $self->{line_prev} = $self->{line};
5822          $self->{column_prev} = $self->{column};
5823          $self->{column}++;
5824          $self->{nc}
5825              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5826        } else {
5827          $self->{set_nc}->($self);
5828        }
5829      
5830            redo A;
5831          } elsif ($self->{nc} == -1) {
5832            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
5833            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5834            ## Reconsume.
5835            redo A;
5836          } else {
5837            ## XML5: No parse error.
5838            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
5839            $self->{state} = BOGUS_COMMENT_STATE;
5840            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5841            ## Reconsume.
5842            redo A;
5843          }
5844        } elsif ($self->{state} == MD_NAME_STATE) {
5845          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
5846          
5847          if ($is_space->{$self->{nc}}) {
5848            ## TODO:
5849            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
5850            
5851        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5852          $self->{line_prev} = $self->{line};
5853          $self->{column_prev} = $self->{column};
5854          $self->{column}++;
5855          $self->{nc}
5856              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5857        } else {
5858          $self->{set_nc}->($self);
5859        }
5860      
5861            redo A;
5862          } elsif ($self->{nc} == 0x003E) { # >
5863            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
5864              #
5865            } else {
5866              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md body'); ## TODO: type
5867            }
5868            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5869            
5870        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5871          $self->{line_prev} = $self->{line};
5872          $self->{column_prev} = $self->{column};
5873          $self->{column}++;
5874          $self->{nc}
5875              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5876        } else {
5877          $self->{set_nc}->($self);
5878        }
5879      
5880            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
5881            redo A;
5882          } elsif ($self->{nc} == -1) {
5883            ## XML5: [ATTLIST] No parse error.
5884            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
5885            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5886            ## Reconsume.
5887            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
5888            redo A;
5889          } else {
5890            ## XML5: [ATTLIST] Not defined yet.
5891            $self->{ct}->{name} .= chr $self->{nc};
5892            ## Stay in the state.
5893            
5894        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5895          $self->{line_prev} = $self->{line};
5896          $self->{column_prev} = $self->{column};
5897          $self->{column}++;
5898          $self->{nc}
5899              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5900        } else {
5901          $self->{set_nc}->($self);
5902        }
5903      
5904            redo A;
5905          }
5906        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
5907          if ($is_space->{$self->{nc}}) {
5908            ## Stay in the state.
5909            
5910        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5911          $self->{line_prev} = $self->{line};
5912          $self->{column_prev} = $self->{column};
5913          $self->{column}++;
5914          $self->{nc}
5915              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5916        } else {
5917          $self->{set_nc}->($self);
5918        }
5919      
5920            redo A;
5921          } elsif ($self->{nc} == 0x003E) { # >
5922            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5923            
5924        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5925          $self->{line_prev} = $self->{line};
5926          $self->{column_prev} = $self->{column};
5927          $self->{column}++;
5928          $self->{nc}
5929              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5930        } else {
5931          $self->{set_nc}->($self);
5932        }
5933      
5934            return  ($self->{ct}); # ATTLIST
5935            redo A;
5936          } elsif ($self->{nc} == -1) {
5937            ## XML5: No parse error.
5938            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5939            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5940            return  ($self->{ct});
5941            redo A;
5942          } else {
5943            ## XML5: Not defined yet.
5944            $self->{ca} = {name => chr ($self->{nc}), # attrdef
5945                           tokens => [],
5946                           line => $self->{line}, column => $self->{column}};
5947            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
5948            
5949        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5950          $self->{line_prev} = $self->{line};
5951          $self->{column_prev} = $self->{column};
5952          $self->{column}++;
5953          $self->{nc}
5954              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5955        } else {
5956          $self->{set_nc}->($self);
5957        }
5958      
5959            redo A;
5960          }
5961        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
5962          if ($is_space->{$self->{nc}}) {
5963            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
5964            
5965        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5966          $self->{line_prev} = $self->{line};
5967          $self->{column_prev} = $self->{column};
5968          $self->{column}++;
5969          $self->{nc}
5970              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5971        } else {
5972          $self->{set_nc}->($self);
5973        }
5974      
5975            redo A;
5976          } elsif ($self->{nc} == 0x003E) { # >
5977            ## XML5: Same as "anything else".
5978            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
5979            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5980            
5981        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5982          $self->{line_prev} = $self->{line};
5983          $self->{column_prev} = $self->{column};
5984          $self->{column}++;
5985          $self->{nc}
5986              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5987        } else {
5988          $self->{set_nc}->($self);
5989        }
5990      
5991            return  ($self->{ct}); # ATTLIST
5992            redo A;
5993          } elsif ($self->{nc} == 0x0028) { # (
5994            ## XML5: Same as "anything else".
5995            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
5996            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
5997            
5998        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5999          $self->{line_prev} = $self->{line};
6000          $self->{column_prev} = $self->{column};
6001          $self->{column}++;
6002          $self->{nc}
6003              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6004        } else {
6005          $self->{set_nc}->($self);
6006        }
6007      
6008            redo A;
6009          } elsif ($self->{nc} == -1) {
6010            ## XML5: No parse error.
6011            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6012            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6013            
6014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6015          $self->{line_prev} = $self->{line};
6016          $self->{column_prev} = $self->{column};
6017          $self->{column}++;
6018          $self->{nc}
6019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6020        } else {
6021          $self->{set_nc}->($self);
6022        }
6023      
6024            return  ($self->{ct}); # ATTLIST
6025            redo A;
6026          } else {
6027            ## XML5: Not defined yet.
6028            $self->{ca}->{name} .= chr $self->{nc};
6029            ## Stay in the state.
6030            
6031        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6032          $self->{line_prev} = $self->{line};
6033          $self->{column_prev} = $self->{column};
6034          $self->{column}++;
6035          $self->{nc}
6036              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6037        } else {
6038          $self->{set_nc}->($self);
6039        }
6040      
6041            redo A;
6042          }
6043        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6044          if ($is_space->{$self->{nc}}) {
6045            ## Stay in the state.
6046            
6047        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6048          $self->{line_prev} = $self->{line};
6049          $self->{column_prev} = $self->{column};
6050          $self->{column}++;
6051          $self->{nc}
6052              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6053        } else {
6054          $self->{set_nc}->($self);
6055        }
6056      
6057            redo A;
6058          } elsif ($self->{nc} == 0x003E) { # >
6059            ## XML5: Same as "anything else".
6060            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6061            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6062            
6063        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6064          $self->{line_prev} = $self->{line};
6065          $self->{column_prev} = $self->{column};
6066          $self->{column}++;
6067          $self->{nc}
6068              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6069        } else {
6070          $self->{set_nc}->($self);
6071        }
6072      
6073            return  ($self->{ct}); # ATTLIST
6074            redo A;
6075          } elsif ($self->{nc} == 0x0028) { # (
6076            ## XML5: Same as "anything else".
6077            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6078            
6079        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6080          $self->{line_prev} = $self->{line};
6081          $self->{column_prev} = $self->{column};
6082          $self->{column}++;
6083          $self->{nc}
6084              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6085        } else {
6086          $self->{set_nc}->($self);
6087        }
6088      
6089            redo A;
6090          } elsif ($self->{nc} == -1) {
6091            ## XML5: No parse error.
6092            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6093            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6094            
6095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6096          $self->{line_prev} = $self->{line};
6097          $self->{column_prev} = $self->{column};
6098          $self->{column}++;
6099          $self->{nc}
6100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6101        } else {
6102          $self->{set_nc}->($self);
6103        }
6104      
6105            return  ($self->{ct});
6106            redo A;
6107          } else {
6108            ## XML5: Not defined yet.
6109            $self->{ca}->{type} = chr $self->{nc};
6110            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6111            
6112        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6113          $self->{line_prev} = $self->{line};
6114          $self->{column_prev} = $self->{column};
6115          $self->{column}++;
6116          $self->{nc}
6117              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6118        } else {
6119          $self->{set_nc}->($self);
6120        }
6121      
6122            redo A;
6123          }
6124        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6125          if ($is_space->{$self->{nc}}) {
6126            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6127            
6128        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6129          $self->{line_prev} = $self->{line};
6130          $self->{column_prev} = $self->{column};
6131          $self->{column}++;
6132          $self->{nc}
6133              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6134        } else {
6135          $self->{set_nc}->($self);
6136        }
6137      
6138            redo A;
6139          } elsif ($self->{nc} == 0x0023) { # #
6140            ## XML5: Same as "anything else".
6141            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6142            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6143            
6144        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6145          $self->{line_prev} = $self->{line};
6146          $self->{column_prev} = $self->{column};
6147          $self->{column}++;
6148          $self->{nc}
6149              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6150        } else {
6151          $self->{set_nc}->($self);
6152        }
6153      
6154            redo A;
6155          } elsif ($self->{nc} == 0x0022) { # "
6156            ## XML5: Same as "anything else".
6157            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6158            $self->{ca}->{value} = '';
6159            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6160            
6161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6162          $self->{line_prev} = $self->{line};
6163          $self->{column_prev} = $self->{column};
6164          $self->{column}++;
6165          $self->{nc}
6166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6167        } else {
6168          $self->{set_nc}->($self);
6169        }
6170      
6171            redo A;
6172          } elsif ($self->{nc} == 0x0027) { # '
6173            ## XML5: Same as "anything else".
6174            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6175            $self->{ca}->{value} = '';
6176            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6177            
6178        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6179          $self->{line_prev} = $self->{line};
6180          $self->{column_prev} = $self->{column};
6181          $self->{column}++;
6182          $self->{nc}
6183              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6184        } else {
6185          $self->{set_nc}->($self);
6186        }
6187      
6188            redo A;
6189          } elsif ($self->{nc} == 0x003E) { # >
6190            ## XML5: Same as "anything else".
6191            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6192            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6193            
6194        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6195          $self->{line_prev} = $self->{line};
6196          $self->{column_prev} = $self->{column};
6197          $self->{column}++;
6198          $self->{nc}
6199              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6200        } else {
6201          $self->{set_nc}->($self);
6202        }
6203      
6204            return  ($self->{ct}); # ATTLIST
6205            redo A;
6206          } elsif ($self->{nc} == 0x0028) { # (
6207            ## XML5: Same as "anything else".
6208            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6209            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6210            
6211        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6212          $self->{line_prev} = $self->{line};
6213          $self->{column_prev} = $self->{column};
6214          $self->{column}++;
6215          $self->{nc}
6216              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6217        } else {
6218          $self->{set_nc}->($self);
6219        }
6220      
6221            redo A;
6222          } elsif ($self->{nc} == -1) {
6223            ## XML5: No parse error.
6224            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6225            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6226            
6227        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6228          $self->{line_prev} = $self->{line};
6229          $self->{column_prev} = $self->{column};
6230          $self->{column}++;
6231          $self->{nc}
6232              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6233        } else {
6234          $self->{set_nc}->($self);
6235        }
6236      
6237            return  ($self->{ct});
6238            redo A;
6239          } else {
6240            ## XML5: Not defined yet.
6241            $self->{ca}->{type} .= chr $self->{nc};
6242            ## Stay in the state.
6243            
6244        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6245          $self->{line_prev} = $self->{line};
6246          $self->{column_prev} = $self->{column};
6247          $self->{column}++;
6248          $self->{nc}
6249              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6250        } else {
6251          $self->{set_nc}->($self);
6252        }
6253      
6254            redo A;
6255          }
6256        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6257          if ($is_space->{$self->{nc}}) {
6258            ## Stay in the state.
6259            
6260        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6261          $self->{line_prev} = $self->{line};
6262          $self->{column_prev} = $self->{column};
6263          $self->{column}++;
6264          $self->{nc}
6265              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6266        } else {
6267          $self->{set_nc}->($self);
6268        }
6269      
6270            redo A;
6271          } elsif ($self->{nc} == 0x0028) { # (
6272            ## XML5: Same as "anything else".
6273            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6274            
6275        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6276          $self->{line_prev} = $self->{line};
6277          $self->{column_prev} = $self->{column};
6278          $self->{column}++;
6279          $self->{nc}
6280              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6281        } else {
6282          $self->{set_nc}->($self);
6283        }
6284      
6285            redo A;
6286          } elsif ($self->{nc} == 0x0023) { # #
6287            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6288            
6289        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6290          $self->{line_prev} = $self->{line};
6291          $self->{column_prev} = $self->{column};
6292          $self->{column}++;
6293          $self->{nc}
6294              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6295        } else {
6296          $self->{set_nc}->($self);
6297        }
6298      
6299            redo A;
6300          } elsif ($self->{nc} == 0x0022) { # "
6301            ## XML5: Same as "anything else".
6302            $self->{ca}->{value} = '';
6303            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6304            
6305        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6306          $self->{line_prev} = $self->{line};
6307          $self->{column_prev} = $self->{column};
6308          $self->{column}++;
6309          $self->{nc}
6310              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6311        } else {
6312          $self->{set_nc}->($self);
6313        }
6314      
6315            redo A;
6316          } elsif ($self->{nc} == 0x0027) { # '
6317            ## XML5: Same as "anything else".
6318            $self->{ca}->{value} = '';
6319            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6320            
6321        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6322          $self->{line_prev} = $self->{line};
6323          $self->{column_prev} = $self->{column};
6324          $self->{column}++;
6325          $self->{nc}
6326              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6327        } else {
6328          $self->{set_nc}->($self);
6329        }
6330      
6331            redo A;
6332          } elsif ($self->{nc} == 0x003E) { # >
6333            ## XML5: Same as "anything else".
6334            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6335            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6336            
6337        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6338          $self->{line_prev} = $self->{line};
6339          $self->{column_prev} = $self->{column};
6340          $self->{column}++;
6341          $self->{nc}
6342              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6343        } else {
6344          $self->{set_nc}->($self);
6345        }
6346      
6347            return  ($self->{ct}); # ATTLIST
6348            redo A;
6349          } elsif ($self->{nc} == -1) {
6350            ## XML5: No parse error.
6351            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6352            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6353            
6354        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6355          $self->{line_prev} = $self->{line};
6356          $self->{column_prev} = $self->{column};
6357          $self->{column}++;
6358          $self->{nc}
6359              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6360        } else {
6361          $self->{set_nc}->($self);
6362        }
6363      
6364            return  ($self->{ct});
6365            redo A;
6366          } else {
6367            ## XML5: Switch to the "DOCTYPE bogus comment state".
6368            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6369            $self->{ca}->{value} = '';
6370            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6371            ## Reconsume.
6372            redo A;
6373          }
6374        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6375          if ($is_space->{$self->{nc}}) {
6376            ## Stay in the state.
6377            
6378        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6379          $self->{line_prev} = $self->{line};
6380          $self->{column_prev} = $self->{column};
6381          $self->{column}++;
6382          $self->{nc}
6383              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6384        } else {
6385          $self->{set_nc}->($self);
6386        }
6387      
6388            redo A;
6389          } elsif ($self->{nc} == 0x007C) { # |
6390            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6391            ## Stay in the state.
6392            
6393        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6394          $self->{line_prev} = $self->{line};
6395          $self->{column_prev} = $self->{column};
6396          $self->{column}++;
6397          $self->{nc}
6398              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6399        } else {
6400          $self->{set_nc}->($self);
6401        }
6402      
6403            redo A;
6404          } elsif ($self->{nc} == 0x0029) { # )
6405            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6406            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6407            
6408        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6409          $self->{line_prev} = $self->{line};
6410          $self->{column_prev} = $self->{column};
6411          $self->{column}++;
6412          $self->{nc}
6413              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6414        } else {
6415          $self->{set_nc}->($self);
6416        }
6417      
6418            redo A;
6419          } elsif ($self->{nc} == 0x003E) { # >
6420            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6421            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6422            
6423        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6424          $self->{line_prev} = $self->{line};
6425          $self->{column_prev} = $self->{column};
6426          $self->{column}++;
6427          $self->{nc}
6428              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6429        } else {
6430          $self->{set_nc}->($self);
6431        }
6432      
6433            return  ($self->{ct}); # ATTLIST
6434            redo A;
6435          } elsif ($self->{nc} == -1) {
6436            ## XML5: No parse error.
6437            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6438            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6439            
6440        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6441          $self->{line_prev} = $self->{line};
6442          $self->{column_prev} = $self->{column};
6443          $self->{column}++;
6444          $self->{nc}
6445              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6446        } else {
6447          $self->{set_nc}->($self);
6448        }
6449      
6450            return  ($self->{ct});
6451            redo A;
6452          } else {
6453            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6454            $self->{state} = ALLOWED_TOKEN_STATE;
6455            
6456        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6457          $self->{line_prev} = $self->{line};
6458          $self->{column_prev} = $self->{column};
6459          $self->{column}++;
6460          $self->{nc}
6461              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6462        } else {
6463          $self->{set_nc}->($self);
6464        }
6465      
6466            redo A;
6467          }
6468        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6469          if ($is_space->{$self->{nc}}) {
6470            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6471            
6472        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6473          $self->{line_prev} = $self->{line};
6474          $self->{column_prev} = $self->{column};
6475          $self->{column}++;
6476          $self->{nc}
6477              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6478        } else {
6479          $self->{set_nc}->($self);
6480        }
6481      
6482            redo A;
6483          } elsif ($self->{nc} == 0x007C) { # |
6484            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6485            
6486        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6487          $self->{line_prev} = $self->{line};
6488          $self->{column_prev} = $self->{column};
6489          $self->{column}++;
6490          $self->{nc}
6491              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6492        } else {
6493          $self->{set_nc}->($self);
6494        }
6495      
6496            redo A;
6497          } elsif ($self->{nc} == 0x0029) { # )
6498            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6499            
6500        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6501          $self->{line_prev} = $self->{line};
6502          $self->{column_prev} = $self->{column};
6503          $self->{column}++;
6504          $self->{nc}
6505              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6506        } else {
6507          $self->{set_nc}->($self);
6508        }
6509      
6510            redo A;
6511          } elsif ($self->{nc} == 0x003E) { # >
6512            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6513            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6514            
6515        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6516          $self->{line_prev} = $self->{line};
6517          $self->{column_prev} = $self->{column};
6518          $self->{column}++;
6519          $self->{nc}
6520              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6521        } else {
6522          $self->{set_nc}->($self);
6523        }
6524      
6525            return  ($self->{ct}); # ATTLIST
6526            redo A;
6527          } elsif ($self->{nc} == -1) {
6528            ## XML5: No parse error.
6529            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6530            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6531            
6532        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6533          $self->{line_prev} = $self->{line};
6534          $self->{column_prev} = $self->{column};
6535          $self->{column}++;
6536          $self->{nc}
6537              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6538        } else {
6539          $self->{set_nc}->($self);
6540        }
6541      
6542            return  ($self->{ct});
6543            redo A;
6544          } else {
6545            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6546            ## Stay in the state.
6547            
6548        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6549          $self->{line_prev} = $self->{line};
6550          $self->{column_prev} = $self->{column};
6551          $self->{column}++;
6552          $self->{nc}
6553              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6554        } else {
6555          $self->{set_nc}->($self);
6556        }
6557      
6558            redo A;
6559          }
6560        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6561          if ($is_space->{$self->{nc}}) {
6562            ## Stay in the state.
6563            
6564        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6565          $self->{line_prev} = $self->{line};
6566          $self->{column_prev} = $self->{column};
6567          $self->{column}++;
6568          $self->{nc}
6569              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6570        } else {
6571          $self->{set_nc}->($self);
6572        }
6573      
6574            redo A;
6575          } elsif ($self->{nc} == 0x007C) { # |
6576            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6577            
6578        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6579          $self->{line_prev} = $self->{line};
6580          $self->{column_prev} = $self->{column};
6581          $self->{column}++;
6582          $self->{nc}
6583              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6584        } else {
6585          $self->{set_nc}->($self);
6586        }
6587      
6588            redo A;
6589          } elsif ($self->{nc} == 0x0029) { # )
6590            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6591            
6592        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6593          $self->{line_prev} = $self->{line};
6594          $self->{column_prev} = $self->{column};
6595          $self->{column}++;
6596          $self->{nc}
6597              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6598        } else {
6599          $self->{set_nc}->($self);
6600        }
6601      
6602            redo A;
6603          } elsif ($self->{nc} == 0x003E) { # >
6604            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6605            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6606            
6607        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6608          $self->{line_prev} = $self->{line};
6609          $self->{column_prev} = $self->{column};
6610          $self->{column}++;
6611          $self->{nc}
6612              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6613        } else {
6614          $self->{set_nc}->($self);
6615        }
6616      
6617            return  ($self->{ct}); # ATTLIST
6618            redo A;
6619          } elsif ($self->{nc} == -1) {
6620            ## XML5: No parse error.
6621            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6622            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6623            
6624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6625          $self->{line_prev} = $self->{line};
6626          $self->{column_prev} = $self->{column};
6627          $self->{column}++;
6628          $self->{nc}
6629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6630        } else {
6631          $self->{set_nc}->($self);
6632        }
6633      
6634            return  ($self->{ct});
6635            redo A;
6636          } else {
6637            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
6638                            line => $self->{line_prev},
6639                            column => $self->{column_prev});
6640            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
6641            $self->{state} = ALLOWED_TOKEN_STATE;
6642            
6643        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6644          $self->{line_prev} = $self->{line};
6645          $self->{column_prev} = $self->{column};
6646          $self->{column}++;
6647          $self->{nc}
6648              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6649        } else {
6650          $self->{set_nc}->($self);
6651        }
6652      
6653            redo A;
6654          }
6655        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
6656          if ($is_space->{$self->{nc}}) {
6657            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
6658            
6659        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6660          $self->{line_prev} = $self->{line};
6661          $self->{column_prev} = $self->{column};
6662          $self->{column}++;
6663          $self->{nc}
6664              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6665        } else {
6666          $self->{set_nc}->($self);
6667        }
6668      
6669            redo A;
6670          } elsif ($self->{nc} == 0x0023) { # #
6671            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6672            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6673            
6674        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6675          $self->{line_prev} = $self->{line};
6676          $self->{column_prev} = $self->{column};
6677          $self->{column}++;
6678          $self->{nc}
6679              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6680        } else {
6681          $self->{set_nc}->($self);
6682        }
6683      
6684            redo A;
6685          } elsif ($self->{nc} == 0x0022) { # "
6686            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6687            $self->{ca}->{value} = '';
6688            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6689            
6690        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6691          $self->{line_prev} = $self->{line};
6692          $self->{column_prev} = $self->{column};
6693          $self->{column}++;
6694          $self->{nc}
6695              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6696        } else {
6697          $self->{set_nc}->($self);
6698        }
6699      
6700            redo A;
6701          } elsif ($self->{nc} == 0x0027) { # '
6702            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6703            $self->{ca}->{value} = '';
6704            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6705            
6706        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6707          $self->{line_prev} = $self->{line};
6708          $self->{column_prev} = $self->{column};
6709          $self->{column}++;
6710          $self->{nc}
6711              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6712        } else {
6713          $self->{set_nc}->($self);
6714        }
6715      
6716            redo A;
6717          } elsif ($self->{nc} == 0x003E) { # >
6718            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6719            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6720            
6721        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6722          $self->{line_prev} = $self->{line};
6723          $self->{column_prev} = $self->{column};
6724          $self->{column}++;
6725          $self->{nc}
6726              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6727        } else {
6728          $self->{set_nc}->($self);
6729        }
6730      
6731            return  ($self->{ct}); # ATTLIST
6732            redo A;
6733          } elsif ($self->{nc} == -1) {
6734            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6735            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6736            
6737        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6738          $self->{line_prev} = $self->{line};
6739          $self->{column_prev} = $self->{column};
6740          $self->{column}++;
6741          $self->{nc}
6742              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6743        } else {
6744          $self->{set_nc}->($self);
6745        }
6746      
6747            return  ($self->{ct});
6748            redo A;
6749          } else {
6750            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6751            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6752            ## Reconsume.
6753            redo A;
6754          }
6755        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
6756          if ($is_space->{$self->{nc}}) {
6757            ## Stay in the state.
6758            
6759        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6760          $self->{line_prev} = $self->{line};
6761          $self->{column_prev} = $self->{column};
6762          $self->{column}++;
6763          $self->{nc}
6764              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6765        } else {
6766          $self->{set_nc}->($self);
6767        }
6768      
6769            redo A;
6770          } elsif ($self->{nc} == 0x0023) { # #
6771            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6772            
6773        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6774          $self->{line_prev} = $self->{line};
6775          $self->{column_prev} = $self->{column};
6776          $self->{column}++;
6777          $self->{nc}
6778              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6779        } else {
6780          $self->{set_nc}->($self);
6781        }
6782      
6783            redo A;
6784          } elsif ($self->{nc} == 0x0022) { # "
6785            $self->{ca}->{value} = '';
6786            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6787            
6788        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6789          $self->{line_prev} = $self->{line};
6790          $self->{column_prev} = $self->{column};
6791          $self->{column}++;
6792          $self->{nc}
6793              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6794        } else {
6795          $self->{set_nc}->($self);
6796        }
6797      
6798            redo A;
6799          } elsif ($self->{nc} == 0x0027) { # '
6800            $self->{ca}->{value} = '';
6801            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6802            
6803        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6804          $self->{line_prev} = $self->{line};
6805          $self->{column_prev} = $self->{column};
6806          $self->{column}++;
6807          $self->{nc}
6808              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6809        } else {
6810          $self->{set_nc}->($self);
6811        }
6812      
6813            redo A;
6814          } elsif ($self->{nc} == 0x003E) { # >
6815            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6816            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6817            
6818        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6819          $self->{line_prev} = $self->{line};
6820          $self->{column_prev} = $self->{column};
6821          $self->{column}++;
6822          $self->{nc}
6823              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6824        } else {
6825          $self->{set_nc}->($self);
6826        }
6827      
6828            return  ($self->{ct}); # ATTLIST
6829            redo A;
6830          } elsif ($self->{nc} == -1) {
6831            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6832            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6833            
6834        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6835          $self->{line_prev} = $self->{line};
6836          $self->{column_prev} = $self->{column};
6837          $self->{column}++;
6838          $self->{nc}
6839              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6840        } else {
6841          $self->{set_nc}->($self);
6842        }
6843      
6844            return  ($self->{ct});
6845            redo A;
6846          } else {
6847            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6848            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6849            ## Reconsume.
6850            redo A;
6851          }
6852        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
6853          if ($is_space->{$self->{nc}}) {
6854            ## XML5: No parse error.
6855            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
6856            $self->{state} = BOGUS_COMMENT_STATE;
6857            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6858            ## Reconsume.
6859            redo A;
6860          } elsif ($self->{nc} == 0x0022) { # "
6861            ## XML5: Same as "anything else".
6862            $self->{ca}->{value} = '';
6863            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6864            
6865        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6866          $self->{line_prev} = $self->{line};
6867          $self->{column_prev} = $self->{column};
6868          $self->{column}++;
6869          $self->{nc}
6870              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6871        } else {
6872          $self->{set_nc}->($self);
6873        }
6874      
6875            redo A;
6876          } elsif ($self->{nc} == 0x0027) { # '
6877            ## XML5: Same as "anything else".
6878            $self->{ca}->{value} = '';
6879            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6880            
6881        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6882          $self->{line_prev} = $self->{line};
6883          $self->{column_prev} = $self->{column};
6884          $self->{column}++;
6885          $self->{nc}
6886              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6887        } else {
6888          $self->{set_nc}->($self);
6889        }
6890      
6891            redo A;
6892          } elsif ($self->{nc} == 0x003E) { # >
6893            ## XML5: Same as "anything else".
6894            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6895            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6896            
6897        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6898          $self->{line_prev} = $self->{line};
6899          $self->{column_prev} = $self->{column};
6900          $self->{column}++;
6901          $self->{nc}
6902              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6903        } else {
6904          $self->{set_nc}->($self);
6905        }
6906      
6907            return  ($self->{ct}); # ATTLIST
6908            redo A;
6909          } elsif ($self->{nc} == -1) {
6910            ## XML5: No parse error.
6911            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6912            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6913            
6914        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6915          $self->{line_prev} = $self->{line};
6916          $self->{column_prev} = $self->{column};
6917          $self->{column}++;
6918          $self->{nc}
6919              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6920        } else {
6921          $self->{set_nc}->($self);
6922        }
6923      
6924            return  ($self->{ct});
6925            redo A;
6926          } else {
6927            $self->{ca}->{default} = chr $self->{nc};
6928            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
6929            
6930        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6931          $self->{line_prev} = $self->{line};
6932          $self->{column_prev} = $self->{column};
6933          $self->{column}++;
6934          $self->{nc}
6935              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6936        } else {
6937          $self->{set_nc}->($self);
6938        }
6939      
6940            redo A;
6941          }
6942        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
6943          if ($is_space->{$self->{nc}}) {
6944            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
6945            
6946        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6947          $self->{line_prev} = $self->{line};
6948          $self->{column_prev} = $self->{column};
6949          $self->{column}++;
6950          $self->{nc}
6951              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6952        } else {
6953          $self->{set_nc}->($self);
6954        }
6955      
6956            redo A;
6957          } elsif ($self->{nc} == 0x0022) { # "
6958            ## XML5: Same as "anything else".
6959            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6960            $self->{ca}->{value} = '';
6961            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6962            
6963        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6964          $self->{line_prev} = $self->{line};
6965          $self->{column_prev} = $self->{column};
6966          $self->{column}++;
6967          $self->{nc}
6968              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6969        } else {
6970          $self->{set_nc}->($self);
6971        }
6972      
6973            redo A;
6974          } elsif ($self->{nc} == 0x0027) { # '
6975            ## XML5: Same as "anything else".
6976            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6977            $self->{ca}->{value} = '';
6978            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6979            
6980        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6981          $self->{line_prev} = $self->{line};
6982          $self->{column_prev} = $self->{column};
6983          $self->{column}++;
6984          $self->{nc}
6985              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6986        } else {
6987          $self->{set_nc}->($self);
6988        }
6989      
6990            redo A;
6991          } elsif ($self->{nc} == 0x003E) { # >
6992            ## XML5: Same as "anything else".
6993            push @{$self->{ct}->{attrdefs}}, $self->{ca};
6994            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6995            
6996        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6997          $self->{line_prev} = $self->{line};
6998          $self->{column_prev} = $self->{column};
6999          $self->{column}++;
7000          $self->{nc}
7001              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7002        } else {
7003          $self->{set_nc}->($self);
7004        }
7005      
7006            return  ($self->{ct}); # ATTLIST
7007            redo A;
7008          } elsif ($self->{nc} == -1) {
7009            ## XML5: No parse error.
7010            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7011            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7012            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7013            
7014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7015          $self->{line_prev} = $self->{line};
7016          $self->{column_prev} = $self->{column};
7017          $self->{column}++;
7018          $self->{nc}
7019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7020        } else {
7021          $self->{set_nc}->($self);
7022        }
7023      
7024            return  ($self->{ct});
7025            redo A;
7026          } else {
7027            $self->{ca}->{default} .= chr $self->{nc};
7028            ## Stay in the state.
7029            
7030        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7031          $self->{line_prev} = $self->{line};
7032          $self->{column_prev} = $self->{column};
7033          $self->{column}++;
7034          $self->{nc}
7035              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7036        } else {
7037          $self->{set_nc}->($self);
7038        }
7039      
7040            redo A;
7041          }
7042        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7043          if ($is_space->{$self->{nc}}) {
7044            ## Stay in the state.
7045            
7046        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7047          $self->{line_prev} = $self->{line};
7048          $self->{column_prev} = $self->{column};
7049          $self->{column}++;
7050          $self->{nc}
7051              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7052        } else {
7053          $self->{set_nc}->($self);
7054        }
7055      
7056            redo A;
7057          } elsif ($self->{nc} == 0x0022) { # "
7058            $self->{ca}->{value} = '';
7059            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7060            
7061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7062          $self->{line_prev} = $self->{line};
7063          $self->{column_prev} = $self->{column};
7064          $self->{column}++;
7065          $self->{nc}
7066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7067        } else {
7068          $self->{set_nc}->($self);
7069        }
7070      
7071            redo A;
7072          } elsif ($self->{nc} == 0x0027) { # '
7073            $self->{ca}->{value} = '';
7074            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7075            
7076        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7077          $self->{line_prev} = $self->{line};
7078          $self->{column_prev} = $self->{column};
7079          $self->{column}++;
7080          $self->{nc}
7081              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7082        } else {
7083          $self->{set_nc}->($self);
7084        }
7085      
7086            redo A;
7087          } elsif ($self->{nc} == 0x003E) { # >
7088            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7089            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7090            
7091        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7092          $self->{line_prev} = $self->{line};
7093          $self->{column_prev} = $self->{column};
7094          $self->{column}++;
7095          $self->{nc}
7096              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7097        } else {
7098          $self->{set_nc}->($self);
7099        }
7100      
7101            return  ($self->{ct}); # ATTLIST
7102            redo A;
7103          } elsif ($self->{nc} == -1) {
7104            ## XML5: No parse error.
7105            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7106            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7107            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7108            
7109        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7110          $self->{line_prev} = $self->{line};
7111          $self->{column_prev} = $self->{column};
7112          $self->{column}++;
7113          $self->{nc}
7114              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7115        } else {
7116          $self->{set_nc}->($self);
7117        }
7118      
7119            return  ($self->{ct});
7120            redo A;
7121          } else {
7122            ## XML5: Not defined yet.
7123            if ($self->{ca}->{default} eq 'FIXED') {
7124              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7125            } else {
7126              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7127              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7128            }
7129            ## Reconsume.
7130            redo A;
7131          }
7132        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7133          if ($is_space->{$self->{nc}} or
7134              $self->{nc} == -1 or
7135              $self->{nc} == 0x003E) { # >
7136            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7137            ## Reconsume.
7138            redo A;
7139          } else {
7140            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7141            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7142            ## Reconsume.
7143            redo A;
7144          }      
7145      } else {      } else {
7146        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
7147      }      }
# Line 5206  sub _get_next_token ($) { Line 7152  sub _get_next_token ($) {
7152    
7153  1;  1;
7154  ## $Date$  ## $Date$
7155                                    

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.15

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24