/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14 by wakaba, Fri Oct 17 07:14:29 2008 UTC revision 1.19 by wakaba, Sun Oct 19 07:19:00 2008 UTC
# Line 164  sub BEFORE_MD_NAME_STATE () { 68 } Line 164  sub BEFORE_MD_NAME_STATE () { 68 }
164  sub MD_NAME_STATE () { 69 }  sub MD_NAME_STATE () { 69 }
165  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub AFTER_NOTATION_NAME_STATE () { 90 }
186    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 91 }
187    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 92 }
188    sub ENTITY_VALUE_ENTITY_STATE () { 93 }
189    sub BOGUS_MD_STATE () { 94 }
190    
191  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
192  ## list and descriptions)  ## list and descriptions)
# Line 1737  sub _get_next_token ($) { Line 1760  sub _get_next_token ($) {
1760          redo A;          redo A;
1761        }        }
1762      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1763        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1764          ## ATTLIST attribute value double quoted state".
1765                
1766        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1767                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1768          ## XML5: "Tag attribute name before state".            
1769          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1770              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1771              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1772            } else {
1773              
1774              ## XML5: "Tag attribute name before state".
1775              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1776            }
1777                    
1778      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1779        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1783  sub _get_next_token ($) { Line 1814  sub _get_next_token ($) {
1814          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1815                        
1816            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1817    
1818              $self->{state} = DATA_STATE;
1819              $self->{s_kwd} = '';
1820              ## reconsume
1821              return  ($self->{ct}); # start tag
1822              redo A;
1823          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1824            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1825            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1792  sub _get_next_token ($) { Line 1829  sub _get_next_token ($) {
1829              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1830                            
1831            }            }
1832    
1833              $self->{state} = DATA_STATE;
1834              $self->{s_kwd} = '';
1835              ## reconsume
1836              return  ($self->{ct}); # end tag
1837              redo A;
1838            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1839              ## XML5: No parse error above; not defined yet.
1840              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1841              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1842              ## Reconsume.
1843              return  ($self->{ct}); # ATTLIST
1844              redo A;
1845          } else {          } else {
1846            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1847          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1848        } else {        } else {
1849            ## XML5 [ATTLIST]: Not defined yet.
1850          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1851                        
1852            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1830  sub _get_next_token ($) { Line 1874  sub _get_next_token ($) {
1874          redo A;          redo A;
1875        }        }
1876      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1877        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1878          ## ATTLIST attribute value single quoted state".
1879    
1880        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1881                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1882          ## XML5: "Before attribute name state" (sic).            
1883          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1884              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1885              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1886            } else {
1887              
1888              ## XML5: "Before attribute name state" (sic).
1889              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1890            }
1891                    
1892      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1893        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1876  sub _get_next_token ($) { Line 1928  sub _get_next_token ($) {
1928          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1929                        
1930            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1931    
1932              $self->{state} = DATA_STATE;
1933              $self->{s_kwd} = '';
1934              ## reconsume
1935              return  ($self->{ct}); # start tag
1936              redo A;
1937          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1938            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1939            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1885  sub _get_next_token ($) { Line 1943  sub _get_next_token ($) {
1943              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1944                            
1945            }            }
1946    
1947              $self->{state} = DATA_STATE;
1948              $self->{s_kwd} = '';
1949              ## reconsume
1950              return  ($self->{ct}); # end tag
1951              redo A;
1952            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1953              ## XML5: No parse error above; not defined yet.
1954              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1955              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1956              ## Reconsume.
1957              return  ($self->{ct}); # ATTLIST
1958              redo A;
1959          } else {          } else {
1960            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1961          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1962        } else {        } else {
1963            ## XML5 [ATTLIST]: Not defined yet.
1964          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1965                        
1966            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1926  sub _get_next_token ($) { Line 1991  sub _get_next_token ($) {
1991        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
1992    
1993        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1994                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1995          ## XML5: "Tag attribute name before state".            
1996          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
1997              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1998            } else {
1999              
2000              ## XML5: "Tag attribute name before state".
2001              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2002            }
2003                    
2004      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2005        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1969  sub _get_next_token ($) { Line 2040  sub _get_next_token ($) {
2040          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2041                        
2042            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2043    
2044              $self->{state} = DATA_STATE;
2045              $self->{s_kwd} = '';
2046              
2047        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2048          $self->{line_prev} = $self->{line};
2049          $self->{column_prev} = $self->{column};
2050          $self->{column}++;
2051          $self->{nc}
2052              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2053        } else {
2054          $self->{set_nc}->($self);
2055        }
2056      
2057              return  ($self->{ct}); # start tag
2058              redo A;
2059          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2060            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2061            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1978  sub _get_next_token ($) { Line 2065  sub _get_next_token ($) {
2065              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2066                            
2067            }            }
2068          } else {  
2069            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2070          }            $self->{s_kwd} = '';
2071          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2072      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2073        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2074        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1994  sub _get_next_token ($) { Line 2079  sub _get_next_token ($) {
2079        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2080      }      }
2081        
2082              return  ($self->{ct}); # end tag
2083          return  ($self->{ct}); # start tag or end tag            redo A;
2084            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2085          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2086              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2087              
2088        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2089          $self->{line_prev} = $self->{line};
2090          $self->{column_prev} = $self->{column};
2091          $self->{column}++;
2092          $self->{nc}
2093              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2094        } else {
2095          $self->{set_nc}->($self);
2096        }
2097      
2098              return  ($self->{ct}); # ATTLIST
2099              redo A;
2100            } else {
2101              die "$0: $self->{ct}->{type}: Unknown token type";
2102            }
2103        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2104          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2105                        
2106              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2107            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2108    
2109              $self->{state} = DATA_STATE;
2110              $self->{s_kwd} = '';
2111              ## reconsume
2112              return  ($self->{ct}); # start tag
2113              redo A;
2114          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2115              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2116            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2117            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2118                            
# Line 2012  sub _get_next_token ($) { Line 2121  sub _get_next_token ($) {
2121              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2122                            
2123            }            }
2124    
2125              $self->{state} = DATA_STATE;
2126              $self->{s_kwd} = '';
2127              ## reconsume
2128              return  ($self->{ct}); # end tag
2129              redo A;
2130            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2131              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2132              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2133              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2134              ## Reconsume.
2135              return  ($self->{ct}); # ATTLIST
2136              redo A;
2137          } else {          } else {
2138            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2139          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2140        } else {        } else {
2141          if ({          if ({
2142               0x0022 => 1, # "               0x0022 => 1, # "
# Line 3077  sub _get_next_token ($) { Line 3192  sub _get_next_token ($) {
3192        
3193          redo A;          redo A;
3194        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3195            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3196              
3197              $self->{state} = DATA_STATE;
3198              $self->{s_kwd} = '';
3199            } else {
3200              
3201              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3202              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3203            }
3204                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3205                    
3206      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3207        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3091  sub _get_next_token ($) { Line 3213  sub _get_next_token ($) {
3213        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3214      }      }
3215        
3216            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3217          redo A;          redo A;
3218        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3219            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3220              
3221              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3222              $self->{state} = DATA_STATE;
3223              $self->{s_kwd} = '';
3224              $self->{ct}->{quirks} = 1;
3225            } else {
3226              
3227              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3228              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3229            }
3230                    
3231          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3232          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3233          redo A;          redo A;
3234        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3235                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
# Line 3140  sub _get_next_token ($) { Line 3265  sub _get_next_token ($) {
3265      }      }
3266        
3267          redo A;          redo A;
3268        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{nc} == 0x0022 and # "
3269                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3270                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3271            
3272            $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3273            $self->{ct}->{value} = ''; # ENTITY
3274            
3275        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3276          $self->{line_prev} = $self->{line};
3277          $self->{column_prev} = $self->{column};
3278          $self->{column}++;
3279          $self->{nc}
3280              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3281        } else {
3282          $self->{set_nc}->($self);
3283        }
3284      
3285            redo A;
3286          } elsif ($self->{nc} == 0x0027 and # '
3287                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3288                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3289            
3290            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3291            $self->{ct}->{value} = ''; # ENTITY
3292            
3293        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3294          $self->{line_prev} = $self->{line};
3295          $self->{column_prev} = $self->{column};
3296          $self->{column}++;
3297          $self->{nc}
3298              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3299        } else {
3300          $self->{set_nc}->($self);
3301        }
3302      
3303            redo A;
3304          } elsif ($self->{is_xml} and
3305                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3306                   $self->{nc} == 0x005B) { # [
3307                    
3308          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3309          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 3159  sub _get_next_token ($) { Line 3322  sub _get_next_token ($) {
3322          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3323          redo A;          redo A;
3324        } else {        } else {
3325                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3326          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');  
3327          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3328              
3329              $self->{ct}->{quirks} = 1;
3330              $self->{state} = BOGUS_DOCTYPE_STATE;
3331            } else {
3332              
3333              $self->{state} = BOGUS_MD_STATE;
3334            }
3335    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3336                    
3337      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3338        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3235  sub _get_next_token ($) { Line 3404  sub _get_next_token ($) {
3404        
3405          redo A;          redo A;
3406        } else {        } else {
3407                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3408                          line => $self->{line_prev},                          line => $self->{line_prev},
3409                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3410          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3411              
3412          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3413              $self->{state} = BOGUS_DOCTYPE_STATE;
3414            } else {
3415              
3416              $self->{state} = BOGUS_MD_STATE;
3417            }
3418          ## Reconsume.          ## Reconsume.
3419          redo A;          redo A;
3420        }        }
# Line 3303  sub _get_next_token ($) { Line 3476  sub _get_next_token ($) {
3476        
3477          redo A;          redo A;
3478        } else {        } else {
3479                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3480                          line => $self->{line_prev},                          line => $self->{line_prev},
3481                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3482          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3483              
3484          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3485              $self->{state} = BOGUS_DOCTYPE_STATE;
3486            } else {
3487              
3488              $self->{state} = BOGUS_MD_STATE;
3489            }
3490          ## Reconsume.          ## Reconsume.
3491          redo A;          redo A;
3492        }        }
# Line 3362  sub _get_next_token ($) { Line 3539  sub _get_next_token ($) {
3539        
3540          redo A;          redo A;
3541        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3542          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3543            
3544          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3545          $self->{s_kwd} = '';            
3546              $self->{state} = DATA_STATE;
3547              $self->{s_kwd} = '';
3548              $self->{ct}->{quirks} = 1;
3549            } else {
3550              
3551              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3552            }
3553            
3554                    
3555      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3556        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3378  sub _get_next_token ($) { Line 3562  sub _get_next_token ($) {
3562        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3563      }      }
3564        
3565            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3566          redo A;          redo A;
3567        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3568            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3569              
3570              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3571              $self->{state} = DATA_STATE;
3572              $self->{s_kwd} = '';
3573              $self->{ct}->{quirks} = 1;
3574            } else {
3575              
3576              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3577              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3578            }
3579                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3580          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3581          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3582          redo A;          redo A;
3583        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3584                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3585                   $self->{nc} == 0x005B) { # [
3586                    
3587          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3588          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3415  sub _get_next_token ($) { Line 3602  sub _get_next_token ($) {
3602          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3603          redo A;          redo A;
3604        } else {        } else {
           
3605          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3606    
3607          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3608              
3609              $self->{ct}->{quirks} = 1;
3610              $self->{state} = BOGUS_DOCTYPE_STATE;
3611            } else {
3612              
3613              $self->{state} = BOGUS_MD_STATE;
3614            }
3615    
3616                    
3617      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3618        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3450  sub _get_next_token ($) { Line 3643  sub _get_next_token ($) {
3643        
3644          redo A;          redo A;
3645        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3646          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3647    
3648          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3649          $self->{s_kwd} = '';            
3650              $self->{state} = DATA_STATE;
3651              $self->{s_kwd} = '';
3652              $self->{ct}->{quirks} = 1;
3653            } else {
3654              
3655              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3656            }
3657    
3658                    
3659      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3660        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3466  sub _get_next_token ($) { Line 3666  sub _get_next_token ($) {
3666        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3667      }      }
3668        
3669            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3670          redo A;          redo A;
3671        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3672          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3673    
3674          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3675          $self->{s_kwd} = '';            
3676          ## reconsume            $self->{state} = DATA_STATE;
3677              $self->{s_kwd} = '';
3678          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3679            } else {
3680              
3681              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3682            }
3683            
3684            ## Reconsume.
3685          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3686          redo A;          redo A;
3687        } else {        } else {
3688                    
3689          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3690          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3691                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3692    
# Line 3521  sub _get_next_token ($) { Line 3721  sub _get_next_token ($) {
3721        
3722          redo A;          redo A;
3723        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3724          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3725    
3726          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3727          $self->{s_kwd} = '';            
3728              $self->{state} = DATA_STATE;
3729              $self->{s_kwd} = '';
3730              $self->{ct}->{quirks} = 1;
3731            } else {
3732              
3733              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3734            }
3735    
3736                    
3737      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3738        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3537  sub _get_next_token ($) { Line 3744  sub _get_next_token ($) {
3744        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3745      }      }
3746        
3747            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3748          redo A;          redo A;
3749        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3750          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3751    
3752          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3753          $self->{s_kwd} = '';            
3754              $self->{state} = DATA_STATE;
3755              $self->{s_kwd} = '';
3756              $self->{ct}->{quirks} = 1;
3757            } else {
3758              
3759              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3760            }
3761          
3762          ## reconsume          ## reconsume
3763            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3764          redo A;          redo A;
3765        } else {        } else {
3766                    
3767          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3768          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3769                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3770    
# Line 3593  sub _get_next_token ($) { Line 3800  sub _get_next_token ($) {
3800          redo A;          redo A;
3801        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3802                    
3803          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3804          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3805                    
3806      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3609  sub _get_next_token ($) { Line 3816  sub _get_next_token ($) {
3816          redo A;          redo A;
3817        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3818                    
3819          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3820          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3821                    
3822      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3624  sub _get_next_token ($) { Line 3831  sub _get_next_token ($) {
3831        
3832          redo A;          redo A;
3833        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3834          if ($self->{is_xml}) {          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3835                        if ($self->{is_xml}) {
3836            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');              
3837                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3838              } else {
3839                
3840              }
3841              $self->{state} = DATA_STATE;
3842              $self->{s_kwd} = '';
3843          } else {          } else {
3844                        if ($self->{ct}->{type} == NOTATION_TOKEN) {
3845                
3846              } else {
3847                
3848                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3849              }
3850              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3851          }          }
3852          $self->{state} = DATA_STATE;          
         $self->{s_kwd} = '';  
3853                    
3854      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3855        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3643  sub _get_next_token ($) { Line 3861  sub _get_next_token ($) {
3861        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3862      }      }
3863        
3864            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3865          redo A;          redo A;
3866        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3867            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3868              
3869              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3870              
3871              $self->{state} = DATA_STATE;
3872              $self->{s_kwd} = '';
3873              $self->{ct}->{quirks} = 1;
3874            } else {
3875              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3876              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3877            }
3878                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3879          ## reconsume          ## reconsume
3880            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3881          redo A;          redo A;
3882        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3883                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3884                   $self->{nc} == 0x005B) { # [
3885                    
3886          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3887          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3679  sub _get_next_token ($) { Line 3901  sub _get_next_token ($) {
3901          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3902          redo A;          redo A;
3903        } else {        } else {
           
3904          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3905    
3906          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3907              
3908              $self->{ct}->{quirks} = 1;
3909              $self->{state} = BOGUS_DOCTYPE_STATE;
3910            } else {
3911              
3912              $self->{state} = BOGUS_MD_STATE;
3913            }
3914    
3915                    
3916      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3917        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3746  sub _get_next_token ($) { Line 3974  sub _get_next_token ($) {
3974        
3975          redo A;          redo A;
3976        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3977          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3978                    
3979      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3980        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3762  sub _get_next_token ($) { Line 3987  sub _get_next_token ($) {
3987      }      }
3988        
3989    
3990          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3991          return  ($self->{ct}); # DOCTYPE            
3992              $self->{state} = DATA_STATE;
3993              $self->{s_kwd} = '';
3994              $self->{ct}->{quirks} = 1;
3995            } else {
3996              
3997              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3998            }
3999    
4000            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4001          redo A;          redo A;
4002        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4003            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4004              
4005              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4006              $self->{state} = DATA_STATE;
4007              $self->{s_kwd} = '';
4008              $self->{ct}->{quirks} = 1;
4009            } else {
4010              
4011              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4012              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4013            }
4014                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4015          ## reconsume          ## reconsume
4016            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4017          redo A;          redo A;
4018        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4019                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4020                   $self->{nc} == 0x005B) { # [
4021                    
4022          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4023    
# Line 3799  sub _get_next_token ($) { Line 4038  sub _get_next_token ($) {
4038          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4039          redo A;          redo A;
4040        } else {        } else {
           
4041          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4042    
4043          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4044                        
4045              $self->{ct}->{quirks} = 1;
4046              $self->{state} = BOGUS_DOCTYPE_STATE;
4047            } else {
4048              
4049              $self->{state} = BOGUS_MD_STATE;
4050            }
4051    
4052                    
4053      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4054        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3834  sub _get_next_token ($) { Line 4079  sub _get_next_token ($) {
4079        
4080          redo A;          redo A;
4081        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4082          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4083    
4084          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4085          $self->{s_kwd} = '';            
4086              $self->{state} = DATA_STATE;
4087              $self->{s_kwd} = '';
4088              $self->{ct}->{quirks} = 1;
4089            } else {
4090              
4091              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4092            }
4093            
4094                    
4095      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4096        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3850  sub _get_next_token ($) { Line 4102  sub _get_next_token ($) {
4102        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4103      }      }
4104        
4105            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4106          redo A;          redo A;
4107        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4108          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4109    
4110          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4111          $self->{s_kwd} = '';            
4112              $self->{state} = DATA_STATE;
4113              $self->{s_kwd} = '';
4114              $self->{ct}->{quirks} = 1;
4115            } else {
4116              
4117              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4118            }
4119            
4120          ## reconsume          ## reconsume
4121            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4122          redo A;          redo A;
4123        } else {        } else {
4124                    
4125          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4126          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4127                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4128    
# Line 3927  sub _get_next_token ($) { Line 4179  sub _get_next_token ($) {
4179    
4180          redo A;          redo A;
4181        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4182          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4183    
4184          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4185          $self->{s_kwd} = '';            
4186          ## reconsume            $self->{state} = DATA_STATE;
4187              $self->{s_kwd} = '';
4188          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4189          return  ($self->{ct}); # DOCTYPE          } else {
4190              
4191              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4192            }
4193    
4194            ## reconsume
4195            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4196          redo A;          redo A;
4197        } else {        } else {
4198                    
4199          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4200          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4201                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4202    
# Line 3961  sub _get_next_token ($) { Line 4216  sub _get_next_token ($) {
4216        }        }
4217      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4218        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4219                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4220          ## Stay in the state            
4221              $self->{state} = BEFORE_NDATA_STATE;
4222            } else {
4223              
4224              ## Stay in the state
4225            }
4226                    
4227      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4228        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3976  sub _get_next_token ($) { Line 4236  sub _get_next_token ($) {
4236        
4237          redo A;          redo A;
4238        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4239            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4240              
4241              $self->{state} = DATA_STATE;
4242              $self->{s_kwd} = '';
4243            } else {
4244              
4245              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4246            }
4247    
4248                    
4249          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4250          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4251          $self->{column_prev} = $self->{column};
4252          $self->{column}++;
4253          $self->{nc}
4254              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4255        } else {
4256          $self->{set_nc}->($self);
4257        }
4258      
4259            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4260            redo A;
4261          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4262                   ($self->{nc} == 0x004E or # N
4263                    $self->{nc} == 0x006E)) { # n
4264            
4265            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4266            $self->{state} = NDATA_STATE;
4267            $self->{kwd} = chr $self->{nc};
4268                    
4269      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4270        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3990  sub _get_next_token ($) { Line 4276  sub _get_next_token ($) {
4276        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4277      }      }
4278        
   
         return  ($self->{ct}); # DOCTYPE  
   
4279          redo A;          redo A;
4280        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4281                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4282          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');            
4283          $self->{state} = DATA_STATE;            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4284          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4285          ## reconsume            $self->{s_kwd} = '';
4286              $self->{ct}->{quirks} = 1;
4287          $self->{ct}->{quirks} = 1;          } else {
4288          return  ($self->{ct}); # DOCTYPE            
4289              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4290              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4291            }
4292    
4293            ## reconsume
4294            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4295          redo A;          redo A;
4296        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4297                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4298                   $self->{nc} == 0x005B) { # [
4299                    
4300          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4301          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 4024  sub _get_next_token ($) { Line 4314  sub _get_next_token ($) {
4314          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4315          redo A;          redo A;
4316        } else {        } else {
           
4317          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
         #$self->{ct}->{quirks} = 1;  
4318    
4319          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4320              
4321              #$self->{ct}->{quirks} = 1;
4322              $self->{state} = BOGUS_DOCTYPE_STATE;
4323            } else {
4324              
4325              $self->{state} = BOGUS_MD_STATE;
4326            }
4327    
4328            
4329        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4330          $self->{line_prev} = $self->{line};
4331          $self->{column_prev} = $self->{column};
4332          $self->{column}++;
4333          $self->{nc}
4334              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4335        } else {
4336          $self->{set_nc}->($self);
4337        }
4338      
4339            redo A;
4340          }
4341        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4342          if ($is_space->{$self->{nc}}) {
4343            
4344            ## Stay in the state.
4345            
4346        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4347          $self->{line_prev} = $self->{line};
4348          $self->{column_prev} = $self->{column};
4349          $self->{column}++;
4350          $self->{nc}
4351              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4352        } else {
4353          $self->{set_nc}->($self);
4354        }
4355      
4356            redo A;
4357          } elsif ($self->{nc} == 0x003E) { # >
4358            
4359            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4360            
4361        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4362          $self->{line_prev} = $self->{line};
4363          $self->{column_prev} = $self->{column};
4364          $self->{column}++;
4365          $self->{nc}
4366              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4367        } else {
4368          $self->{set_nc}->($self);
4369        }
4370      
4371            return  ($self->{ct}); # ENTITY
4372            redo A;
4373          } elsif ($self->{nc} == 0x004E or # N
4374                   $self->{nc} == 0x006E) { # n
4375            
4376            $self->{state} = NDATA_STATE;
4377            $self->{kwd} = chr $self->{nc};
4378            
4379        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4380          $self->{line_prev} = $self->{line};
4381          $self->{column_prev} = $self->{column};
4382          $self->{column}++;
4383          $self->{nc}
4384              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4385        } else {
4386          $self->{set_nc}->($self);
4387        }
4388      
4389            redo A;
4390          } elsif ($self->{nc} == -1) {
4391            
4392            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4393            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4394            ## reconsume
4395            return  ($self->{ct}); # ENTITY
4396            redo A;
4397          } else {
4398            
4399            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4400            $self->{state} = BOGUS_MD_STATE;
4401                    
4402      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4403        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5253  sub _get_next_token ($) { Line 5622  sub _get_next_token ($) {
5622      }      }
5623        
5624          redo A;          redo A;
5625        } elsif ($self->{nc} == 0x0045) { # E        } elsif ($self->{nc} == 0x0045 or # E
5626                   $self->{nc} == 0x0065) { # e
5627          $self->{state} = MD_E_STATE;          $self->{state} = MD_E_STATE;
5628          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
5629                    
# Line 5268  sub _get_next_token ($) { Line 5638  sub _get_next_token ($) {
5638      }      }
5639        
5640          redo A;          redo A;
5641        } elsif ($self->{nc} == 0x0041) { # A        } elsif ($self->{nc} == 0x0041 or # A
5642                   $self->{nc} == 0x0061) { # a
5643          $self->{state} = MD_ATTLIST_STATE;          $self->{state} = MD_ATTLIST_STATE;
5644          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
5645                    
# Line 5283  sub _get_next_token ($) { Line 5654  sub _get_next_token ($) {
5654      }      }
5655        
5656          redo A;          redo A;
5657        } elsif ($self->{nc} == 0x004E) { # N        } elsif ($self->{nc} == 0x004E or # N
5658                   $self->{nc} == 0x006E) { # n
5659          $self->{state} = MD_NOTATION_STATE;          $self->{state} = MD_NOTATION_STATE;
5660          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
5661                    
# Line 5311  sub _get_next_token ($) { Line 5683  sub _get_next_token ($) {
5683        $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.        $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5684        redo A;        redo A;
5685      } elsif ($self->{state} == MD_E_STATE) {      } elsif ($self->{state} == MD_E_STATE) {
5686        if ($self->{nc} == 0x004E) { # N        if ($self->{nc} == 0x004E or # N
5687              $self->{nc} == 0x006E) { # n
5688          $self->{state} = MD_ENTITY_STATE;          $self->{state} = MD_ENTITY_STATE;
5689          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5690                    
# Line 5326  sub _get_next_token ($) { Line 5699  sub _get_next_token ($) {
5699      }      }
5700        
5701          redo A;          redo A;
5702        } elsif ($self->{nc} == 0x004C) { # L        } elsif ($self->{nc} == 0x004C or # L
5703                   $self->{nc} == 0x006C) { # l
5704          ## XML5: <!ELEMENT> not supported.          ## XML5: <!ELEMENT> not supported.
5705          $self->{state} = MD_ELEMENT_STATE;          $self->{state} = MD_ELEMENT_STATE;
5706          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
# Line 5354  sub _get_next_token ($) { Line 5728  sub _get_next_token ($) {
5728          redo A;          redo A;
5729        }        }
5730      } elsif ($self->{state} == MD_ENTITY_STATE) {      } elsif ($self->{state} == MD_ENTITY_STATE) {
5731        if ($self->{nc} == {        if ($self->{nc} == [
5732              'EN' => 0x0054, # T              undef,
5733              'ENT' => 0x0049, # I              undef,
5734              'ENTI' => 0x0054, # T              0x0054, # T
5735            }->{$self->{kwd}}) {              0x0049, # I
5736                0x0054, # T
5737              ]->[length $self->{kwd}] or
5738              $self->{nc} == [
5739                undef,
5740                undef,
5741                0x0074, # t
5742                0x0069, # i
5743                0x0074, # t
5744              ]->[length $self->{kwd}]) {
5745          ## Stay in the state.          ## Stay in the state.
5746          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5747                    
# Line 5373  sub _get_next_token ($) { Line 5756  sub _get_next_token ($) {
5756      }      }
5757        
5758          redo A;          redo A;
5759        } elsif ($self->{kwd} eq 'ENTIT' and        } elsif ((length $self->{kwd}) == 5 and
5760                 $self->{nc} == 0x0059) { # Y                 ($self->{nc} == 0x0059 or # Y
5761          $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',                  $self->{nc} == 0x0079)) { # y
5762            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5763              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5764                              text => 'ENTITY',
5765                              line => $self->{line_prev},
5766                              column => $self->{column_prev} - 4);
5767            }
5768            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5769                         line => $self->{line_prev},                         line => $self->{line_prev},
5770                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
5771          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
# Line 5403  sub _get_next_token ($) { Line 5793  sub _get_next_token ($) {
5793          redo A;          redo A;
5794        }        }
5795      } elsif ($self->{state} == MD_ELEMENT_STATE) {      } elsif ($self->{state} == MD_ELEMENT_STATE) {
5796        if ($self->{nc} == {        if ($self->{nc} == [
5797              'EL' => 0x0045, # E             undef,
5798              'ELE' => 0x004D, # M             undef,
5799              'ELEM' => 0x0045, # E             0x0045, # E
5800              'ELEME' => 0x004E, # N             0x004D, # M
5801            }->{$self->{kwd}}) {             0x0045, # E
5802               0x004E, # N
5803              ]->[length $self->{kwd}] or
5804              $self->{nc} == [
5805               undef,
5806               undef,
5807               0x0065, # e
5808               0x006D, # m
5809               0x0065, # e
5810               0x006E, # n
5811              ]->[length $self->{kwd}]) {
5812          ## Stay in the state.          ## Stay in the state.
5813          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5814                    
# Line 5423  sub _get_next_token ($) { Line 5823  sub _get_next_token ($) {
5823      }      }
5824        
5825          redo A;          redo A;
5826        } elsif ($self->{kwd} eq 'ELEMEN' and        } elsif ((length $self->{kwd}) == 6 and
5827                 $self->{nc} == 0x0054) { # T                 ($self->{nc} == 0x0054 or # T
5828                    $self->{nc} == 0x0074)) { # t
5829            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5830              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5831                              text => 'ELEMENT',
5832                              line => $self->{line_prev},
5833                              column => $self->{column_prev} - 5);
5834            }
5835          $self->{ct} = {type => ELEMENT_TOKEN, name => '',          $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5836                         line => $self->{line_prev},                         line => $self->{line_prev},
5837                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
# Line 5453  sub _get_next_token ($) { Line 5860  sub _get_next_token ($) {
5860          redo A;          redo A;
5861        }        }
5862      } elsif ($self->{state} == MD_ATTLIST_STATE) {      } elsif ($self->{state} == MD_ATTLIST_STATE) {
5863        if ($self->{nc} == {        if ($self->{nc} == [
5864              'A' => 0x0054, # T             undef,
5865              'AT' => 0x0054, # T             0x0054, # T
5866              'ATT' => 0x004C, # L             0x0054, # T
5867              'ATTL' => 0x0049, # I             0x004C, # L
5868              'ATTLI' => 0x0053, # S             0x0049, # I
5869            }->{$self->{kwd}}) {             0x0053, # S
5870              ]->[length $self->{kwd}] or
5871              $self->{nc} == [
5872               undef,
5873               0x0074, # t
5874               0x0074, # t
5875               0x006C, # l
5876               0x0069, # i
5877               0x0073, # s
5878              ]->[length $self->{kwd}]) {
5879          ## Stay in the state.          ## Stay in the state.
5880          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5881                    
# Line 5474  sub _get_next_token ($) { Line 5890  sub _get_next_token ($) {
5890      }      }
5891        
5892          redo A;          redo A;
5893        } elsif ($self->{kwd} eq 'ATTLIS' and        } elsif ((length $self->{kwd}) == 6 and
5894                 $self->{nc} == 0x0054) { # T                 ($self->{nc} == 0x0054 or # T
5895                    $self->{nc} == 0x0074)) { # t
5896            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
5897              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5898                              text => 'ATTLIST',
5899                              line => $self->{line_prev},
5900                              column => $self->{column_prev} - 5);
5901            }
5902          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5903                           attrdefs => [],
5904                         line => $self->{line_prev},                         line => $self->{line_prev},
5905                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
5906          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
# Line 5504  sub _get_next_token ($) { Line 5928  sub _get_next_token ($) {
5928          redo A;          redo A;
5929        }        }
5930      } elsif ($self->{state} == MD_NOTATION_STATE) {      } elsif ($self->{state} == MD_NOTATION_STATE) {
5931        if ($self->{nc} == {        if ($self->{nc} == [
5932              'N' => 0x004F, # O             undef,
5933              'NO' => 0x0054, # T             0x004F, # O
5934              'NOT' => 0x0041, # A             0x0054, # T
5935              'NOTA' => 0x0054, # T             0x0041, # A
5936              'NOTAT' => 0x0049, # I             0x0054, # T
5937              'NOTATI' => 0x004F, # O             0x0049, # I
5938            }->{$self->{kwd}}) {             0x004F, # O
5939              ]->[length $self->{kwd}] or
5940              $self->{nc} == [
5941               undef,
5942               0x006F, # o
5943               0x0074, # t
5944               0x0061, # a
5945               0x0074, # t
5946               0x0069, # i
5947               0x006F, # o
5948              ]->[length $self->{kwd}]) {
5949          ## Stay in the state.          ## Stay in the state.
5950          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5951                    
# Line 5526  sub _get_next_token ($) { Line 5960  sub _get_next_token ($) {
5960      }      }
5961        
5962          redo A;          redo A;
5963        } elsif ($self->{kwd} eq 'NOTATIO' and        } elsif ((length $self->{kwd}) == 7 and
5964                 $self->{nc} == 0x004E) { # N                 ($self->{nc} == 0x004E or # N
5965                    $self->{nc} == 0x006E)) { # n
5966            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
5967              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5968                              text => 'NOTATION',
5969                              line => $self->{line_prev},
5970                              column => $self->{column_prev} - 6);
5971            }
5972          $self->{ct} = {type => NOTATION_TOKEN, name => '',          $self->{ct} = {type => NOTATION_TOKEN, name => '',
5973                         line => $self->{line_prev},                         line => $self->{line_prev},
5974                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
# Line 5739  sub _get_next_token ($) { Line 6180  sub _get_next_token ($) {
6180        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6181                
6182        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
6183          ## TODO:          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6184          $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6185            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6186              ## TODO: ...
6187              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6188            } else { # ENTITY/NOTATION
6189              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6190            }
6191                    
6192      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6193        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5757  sub _get_next_token ($) { Line 6204  sub _get_next_token ($) {
6204          if ($self->{ct}->{type} == ATTLIST_TOKEN) {          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6205            #            #
6206          } else {          } else {
6207            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md body'); ## TODO: type            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6208          }          }
6209          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6210                    
# Line 5831  sub _get_next_token ($) { Line 6278  sub _get_next_token ($) {
6278          ## XML5: No parse error.          ## XML5: No parse error.
6279          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6280          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6281            return  ($self->{ct});
6282            redo A;
6283          } else {
6284            ## XML5: Not defined yet.
6285            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6286                           tokens => [],
6287                           line => $self->{line}, column => $self->{column}};
6288            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6289            
6290        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6291          $self->{line_prev} = $self->{line};
6292          $self->{column_prev} = $self->{column};
6293          $self->{column}++;
6294          $self->{nc}
6295              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6296        } else {
6297          $self->{set_nc}->($self);
6298        }
6299      
6300            redo A;
6301          }
6302        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6303          if ($is_space->{$self->{nc}}) {
6304            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6305            
6306        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6307          $self->{line_prev} = $self->{line};
6308          $self->{column_prev} = $self->{column};
6309          $self->{column}++;
6310          $self->{nc}
6311              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6312        } else {
6313          $self->{set_nc}->($self);
6314        }
6315      
6316            redo A;
6317          } elsif ($self->{nc} == 0x003E) { # >
6318            ## XML5: Same as "anything else".
6319            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6320            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6321            
6322        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6323          $self->{line_prev} = $self->{line};
6324          $self->{column_prev} = $self->{column};
6325          $self->{column}++;
6326          $self->{nc}
6327              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6328        } else {
6329          $self->{set_nc}->($self);
6330        }
6331      
6332            return  ($self->{ct}); # ATTLIST
6333            redo A;
6334          } elsif ($self->{nc} == 0x0028) { # (
6335            ## XML5: Same as "anything else".
6336            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6337            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6338            
6339        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6340          $self->{line_prev} = $self->{line};
6341          $self->{column_prev} = $self->{column};
6342          $self->{column}++;
6343          $self->{nc}
6344              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6345        } else {
6346          $self->{set_nc}->($self);
6347        }
6348      
6349            redo A;
6350          } elsif ($self->{nc} == -1) {
6351            ## XML5: No parse error.
6352            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6353            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6354            
6355        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6356          $self->{line_prev} = $self->{line};
6357          $self->{column_prev} = $self->{column};
6358          $self->{column}++;
6359          $self->{nc}
6360              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6361        } else {
6362          $self->{set_nc}->($self);
6363        }
6364      
6365            return  ($self->{ct}); # ATTLIST
6366            redo A;
6367          } else {
6368            ## XML5: Not defined yet.
6369            $self->{ca}->{name} .= chr $self->{nc};
6370            ## Stay in the state.
6371            
6372        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6373          $self->{line_prev} = $self->{line};
6374          $self->{column_prev} = $self->{column};
6375          $self->{column}++;
6376          $self->{nc}
6377              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6378        } else {
6379          $self->{set_nc}->($self);
6380        }
6381      
6382            redo A;
6383          }
6384        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6385          if ($is_space->{$self->{nc}}) {
6386            ## Stay in the state.
6387            
6388        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6389          $self->{line_prev} = $self->{line};
6390          $self->{column_prev} = $self->{column};
6391          $self->{column}++;
6392          $self->{nc}
6393              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6394        } else {
6395          $self->{set_nc}->($self);
6396        }
6397      
6398            redo A;
6399          } elsif ($self->{nc} == 0x003E) { # >
6400            ## XML5: Same as "anything else".
6401            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6402            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6403            
6404        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6405          $self->{line_prev} = $self->{line};
6406          $self->{column_prev} = $self->{column};
6407          $self->{column}++;
6408          $self->{nc}
6409              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6410        } else {
6411          $self->{set_nc}->($self);
6412        }
6413      
6414            return  ($self->{ct}); # ATTLIST
6415            redo A;
6416          } elsif ($self->{nc} == 0x0028) { # (
6417            ## XML5: Same as "anything else".
6418            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6419            
6420        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6421          $self->{line_prev} = $self->{line};
6422          $self->{column_prev} = $self->{column};
6423          $self->{column}++;
6424          $self->{nc}
6425              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6426        } else {
6427          $self->{set_nc}->($self);
6428        }
6429      
6430            redo A;
6431          } elsif ($self->{nc} == -1) {
6432            ## XML5: No parse error.
6433            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6434            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6435            
6436        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6437          $self->{line_prev} = $self->{line};
6438          $self->{column_prev} = $self->{column};
6439          $self->{column}++;
6440          $self->{nc}
6441              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6442        } else {
6443          $self->{set_nc}->($self);
6444        }
6445      
6446            return  ($self->{ct});
6447            redo A;
6448          } else {
6449            ## XML5: Not defined yet.
6450            $self->{ca}->{type} = chr $self->{nc};
6451            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6452            
6453        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6454          $self->{line_prev} = $self->{line};
6455          $self->{column_prev} = $self->{column};
6456          $self->{column}++;
6457          $self->{nc}
6458              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6459        } else {
6460          $self->{set_nc}->($self);
6461        }
6462      
6463            redo A;
6464          }
6465        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6466          if ($is_space->{$self->{nc}}) {
6467            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6468            
6469        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6470          $self->{line_prev} = $self->{line};
6471          $self->{column_prev} = $self->{column};
6472          $self->{column}++;
6473          $self->{nc}
6474              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6475        } else {
6476          $self->{set_nc}->($self);
6477        }
6478      
6479            redo A;
6480          } elsif ($self->{nc} == 0x0023) { # #
6481            ## XML5: Same as "anything else".
6482            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6483            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6484            
6485        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6486          $self->{line_prev} = $self->{line};
6487          $self->{column_prev} = $self->{column};
6488          $self->{column}++;
6489          $self->{nc}
6490              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6491        } else {
6492          $self->{set_nc}->($self);
6493        }
6494      
6495            redo A;
6496          } elsif ($self->{nc} == 0x0022) { # "
6497            ## XML5: Same as "anything else".
6498            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6499            $self->{ca}->{value} = '';
6500            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6501            
6502        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6503          $self->{line_prev} = $self->{line};
6504          $self->{column_prev} = $self->{column};
6505          $self->{column}++;
6506          $self->{nc}
6507              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6508        } else {
6509          $self->{set_nc}->($self);
6510        }
6511      
6512            redo A;
6513          } elsif ($self->{nc} == 0x0027) { # '
6514            ## XML5: Same as "anything else".
6515            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6516            $self->{ca}->{value} = '';
6517            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6518            
6519        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6520          $self->{line_prev} = $self->{line};
6521          $self->{column_prev} = $self->{column};
6522          $self->{column}++;
6523          $self->{nc}
6524              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6525        } else {
6526          $self->{set_nc}->($self);
6527        }
6528      
6529            redo A;
6530          } elsif ($self->{nc} == 0x003E) { # >
6531            ## XML5: Same as "anything else".
6532            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6533            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6534            
6535        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6536          $self->{line_prev} = $self->{line};
6537          $self->{column_prev} = $self->{column};
6538          $self->{column}++;
6539          $self->{nc}
6540              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6541        } else {
6542          $self->{set_nc}->($self);
6543        }
6544      
6545            return  ($self->{ct}); # ATTLIST
6546            redo A;
6547          } elsif ($self->{nc} == 0x0028) { # (
6548            ## XML5: Same as "anything else".
6549            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6550            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6551            
6552        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6553          $self->{line_prev} = $self->{line};
6554          $self->{column_prev} = $self->{column};
6555          $self->{column}++;
6556          $self->{nc}
6557              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6558        } else {
6559          $self->{set_nc}->($self);
6560        }
6561      
6562            redo A;
6563          } elsif ($self->{nc} == -1) {
6564            ## XML5: No parse error.
6565            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6566            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6567            
6568        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6569          $self->{line_prev} = $self->{line};
6570          $self->{column_prev} = $self->{column};
6571          $self->{column}++;
6572          $self->{nc}
6573              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6574        } else {
6575          $self->{set_nc}->($self);
6576        }
6577      
6578            return  ($self->{ct});
6579            redo A;
6580          } else {
6581            ## XML5: Not defined yet.
6582            $self->{ca}->{type} .= chr $self->{nc};
6583            ## Stay in the state.
6584            
6585        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6586          $self->{line_prev} = $self->{line};
6587          $self->{column_prev} = $self->{column};
6588          $self->{column}++;
6589          $self->{nc}
6590              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6591        } else {
6592          $self->{set_nc}->($self);
6593        }
6594      
6595            redo A;
6596          }
6597        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6598          if ($is_space->{$self->{nc}}) {
6599            ## Stay in the state.
6600            
6601        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6602          $self->{line_prev} = $self->{line};
6603          $self->{column_prev} = $self->{column};
6604          $self->{column}++;
6605          $self->{nc}
6606              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6607        } else {
6608          $self->{set_nc}->($self);
6609        }
6610      
6611            redo A;
6612          } elsif ($self->{nc} == 0x0028) { # (
6613            ## XML5: Same as "anything else".
6614            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6615            
6616        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6617          $self->{line_prev} = $self->{line};
6618          $self->{column_prev} = $self->{column};
6619          $self->{column}++;
6620          $self->{nc}
6621              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6622        } else {
6623          $self->{set_nc}->($self);
6624        }
6625      
6626            redo A;
6627          } elsif ($self->{nc} == 0x0023) { # #
6628            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6629            
6630        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6631          $self->{line_prev} = $self->{line};
6632          $self->{column_prev} = $self->{column};
6633          $self->{column}++;
6634          $self->{nc}
6635              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6636        } else {
6637          $self->{set_nc}->($self);
6638        }
6639      
6640            redo A;
6641          } elsif ($self->{nc} == 0x0022) { # "
6642            ## XML5: Same as "anything else".
6643            $self->{ca}->{value} = '';
6644            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6645            
6646        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6647          $self->{line_prev} = $self->{line};
6648          $self->{column_prev} = $self->{column};
6649          $self->{column}++;
6650          $self->{nc}
6651              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6652        } else {
6653          $self->{set_nc}->($self);
6654        }
6655      
6656            redo A;
6657          } elsif ($self->{nc} == 0x0027) { # '
6658            ## XML5: Same as "anything else".
6659            $self->{ca}->{value} = '';
6660            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6661            
6662        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6663          $self->{line_prev} = $self->{line};
6664          $self->{column_prev} = $self->{column};
6665          $self->{column}++;
6666          $self->{nc}
6667              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6668        } else {
6669          $self->{set_nc}->($self);
6670        }
6671      
6672            redo A;
6673          } elsif ($self->{nc} == 0x003E) { # >
6674            ## XML5: Same as "anything else".
6675            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6676            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6677            
6678        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6679          $self->{line_prev} = $self->{line};
6680          $self->{column_prev} = $self->{column};
6681          $self->{column}++;
6682          $self->{nc}
6683              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6684        } else {
6685          $self->{set_nc}->($self);
6686        }
6687      
6688            return  ($self->{ct}); # ATTLIST
6689            redo A;
6690          } elsif ($self->{nc} == -1) {
6691            ## XML5: No parse error.
6692            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6693            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6694            
6695        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6696          $self->{line_prev} = $self->{line};
6697          $self->{column_prev} = $self->{column};
6698          $self->{column}++;
6699          $self->{nc}
6700              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6701        } else {
6702          $self->{set_nc}->($self);
6703        }
6704      
6705            return  ($self->{ct});
6706            redo A;
6707          } else {
6708            ## XML5: Switch to the "DOCTYPE bogus comment state".
6709            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6710            $self->{ca}->{value} = '';
6711            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6712            ## Reconsume.
6713            redo A;
6714          }
6715        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6716          if ($is_space->{$self->{nc}}) {
6717            ## Stay in the state.
6718            
6719        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6720          $self->{line_prev} = $self->{line};
6721          $self->{column_prev} = $self->{column};
6722          $self->{column}++;
6723          $self->{nc}
6724              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6725        } else {
6726          $self->{set_nc}->($self);
6727        }
6728      
6729            redo A;
6730          } elsif ($self->{nc} == 0x007C) { # |
6731            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6732            ## Stay in the state.
6733            
6734        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6735          $self->{line_prev} = $self->{line};
6736          $self->{column_prev} = $self->{column};
6737          $self->{column}++;
6738          $self->{nc}
6739              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6740        } else {
6741          $self->{set_nc}->($self);
6742        }
6743      
6744            redo A;
6745          } elsif ($self->{nc} == 0x0029) { # )
6746            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6747            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6748            
6749        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6750          $self->{line_prev} = $self->{line};
6751          $self->{column_prev} = $self->{column};
6752          $self->{column}++;
6753          $self->{nc}
6754              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6755        } else {
6756          $self->{set_nc}->($self);
6757        }
6758      
6759            redo A;
6760          } elsif ($self->{nc} == 0x003E) { # >
6761            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6762            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6763            
6764        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6765          $self->{line_prev} = $self->{line};
6766          $self->{column_prev} = $self->{column};
6767          $self->{column}++;
6768          $self->{nc}
6769              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6770        } else {
6771          $self->{set_nc}->($self);
6772        }
6773      
6774            return  ($self->{ct}); # ATTLIST
6775            redo A;
6776          } elsif ($self->{nc} == -1) {
6777            ## XML5: No parse error.
6778            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6779            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6780            
6781        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6782          $self->{line_prev} = $self->{line};
6783          $self->{column_prev} = $self->{column};
6784          $self->{column}++;
6785          $self->{nc}
6786              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6787        } else {
6788          $self->{set_nc}->($self);
6789        }
6790      
6791            return  ($self->{ct});
6792            redo A;
6793          } else {
6794            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6795            $self->{state} = ALLOWED_TOKEN_STATE;
6796            
6797        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6798          $self->{line_prev} = $self->{line};
6799          $self->{column_prev} = $self->{column};
6800          $self->{column}++;
6801          $self->{nc}
6802              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6803        } else {
6804          $self->{set_nc}->($self);
6805        }
6806      
6807            redo A;
6808          }
6809        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6810          if ($is_space->{$self->{nc}}) {
6811            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6812            
6813        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6814          $self->{line_prev} = $self->{line};
6815          $self->{column_prev} = $self->{column};
6816          $self->{column}++;
6817          $self->{nc}
6818              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6819        } else {
6820          $self->{set_nc}->($self);
6821        }
6822      
6823            redo A;
6824          } elsif ($self->{nc} == 0x007C) { # |
6825            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6826            
6827        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6828          $self->{line_prev} = $self->{line};
6829          $self->{column_prev} = $self->{column};
6830          $self->{column}++;
6831          $self->{nc}
6832              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6833        } else {
6834          $self->{set_nc}->($self);
6835        }
6836      
6837            redo A;
6838          } elsif ($self->{nc} == 0x0029) { # )
6839            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6840            
6841        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6842          $self->{line_prev} = $self->{line};
6843          $self->{column_prev} = $self->{column};
6844          $self->{column}++;
6845          $self->{nc}
6846              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6847        } else {
6848          $self->{set_nc}->($self);
6849        }
6850      
6851            redo A;
6852          } elsif ($self->{nc} == 0x003E) { # >
6853            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6854            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6855            
6856        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6857          $self->{line_prev} = $self->{line};
6858          $self->{column_prev} = $self->{column};
6859          $self->{column}++;
6860          $self->{nc}
6861              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6862        } else {
6863          $self->{set_nc}->($self);
6864        }
6865      
6866            return  ($self->{ct}); # ATTLIST
6867            redo A;
6868          } elsif ($self->{nc} == -1) {
6869            ## XML5: No parse error.
6870            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6871            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6872            
6873        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6874          $self->{line_prev} = $self->{line};
6875          $self->{column_prev} = $self->{column};
6876          $self->{column}++;
6877          $self->{nc}
6878              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6879        } else {
6880          $self->{set_nc}->($self);
6881        }
6882      
6883            return  ($self->{ct});
6884            redo A;
6885          } else {
6886            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6887            ## Stay in the state.
6888            
6889        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6890          $self->{line_prev} = $self->{line};
6891          $self->{column_prev} = $self->{column};
6892          $self->{column}++;
6893          $self->{nc}
6894              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6895        } else {
6896          $self->{set_nc}->($self);
6897        }
6898      
6899            redo A;
6900          }
6901        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6902          if ($is_space->{$self->{nc}}) {
6903            ## Stay in the state.
6904            
6905        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6906          $self->{line_prev} = $self->{line};
6907          $self->{column_prev} = $self->{column};
6908          $self->{column}++;
6909          $self->{nc}
6910              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6911        } else {
6912          $self->{set_nc}->($self);
6913        }
6914      
6915            redo A;
6916          } elsif ($self->{nc} == 0x007C) { # |
6917            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6918            
6919        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6920          $self->{line_prev} = $self->{line};
6921          $self->{column_prev} = $self->{column};
6922          $self->{column}++;
6923          $self->{nc}
6924              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6925        } else {
6926          $self->{set_nc}->($self);
6927        }
6928      
6929            redo A;
6930          } elsif ($self->{nc} == 0x0029) { # )
6931            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6932            
6933        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6934          $self->{line_prev} = $self->{line};
6935          $self->{column_prev} = $self->{column};
6936          $self->{column}++;
6937          $self->{nc}
6938              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6939        } else {
6940          $self->{set_nc}->($self);
6941        }
6942      
6943            redo A;
6944          } elsif ($self->{nc} == 0x003E) { # >
6945            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6946            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6947            
6948        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6949          $self->{line_prev} = $self->{line};
6950          $self->{column_prev} = $self->{column};
6951          $self->{column}++;
6952          $self->{nc}
6953              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6954        } else {
6955          $self->{set_nc}->($self);
6956        }
6957      
6958            return  ($self->{ct}); # ATTLIST
6959            redo A;
6960          } elsif ($self->{nc} == -1) {
6961            ## XML5: No parse error.
6962            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6963            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6964            
6965        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6966          $self->{line_prev} = $self->{line};
6967          $self->{column_prev} = $self->{column};
6968          $self->{column}++;
6969          $self->{nc}
6970              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6971        } else {
6972          $self->{set_nc}->($self);
6973        }
6974      
6975            return  ($self->{ct});
6976            redo A;
6977          } else {
6978            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
6979                            line => $self->{line_prev},
6980                            column => $self->{column_prev});
6981            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
6982            $self->{state} = ALLOWED_TOKEN_STATE;
6983            
6984        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6985          $self->{line_prev} = $self->{line};
6986          $self->{column_prev} = $self->{column};
6987          $self->{column}++;
6988          $self->{nc}
6989              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6990        } else {
6991          $self->{set_nc}->($self);
6992        }
6993      
6994            redo A;
6995          }
6996        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
6997          if ($is_space->{$self->{nc}}) {
6998            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
6999            
7000        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7001          $self->{line_prev} = $self->{line};
7002          $self->{column_prev} = $self->{column};
7003          $self->{column}++;
7004          $self->{nc}
7005              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7006        } else {
7007          $self->{set_nc}->($self);
7008        }
7009      
7010            redo A;
7011          } elsif ($self->{nc} == 0x0023) { # #
7012            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7013            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7014            
7015        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7016          $self->{line_prev} = $self->{line};
7017          $self->{column_prev} = $self->{column};
7018          $self->{column}++;
7019          $self->{nc}
7020              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7021        } else {
7022          $self->{set_nc}->($self);
7023        }
7024      
7025            redo A;
7026          } elsif ($self->{nc} == 0x0022) { # "
7027            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7028            $self->{ca}->{value} = '';
7029            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7030            
7031        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7032          $self->{line_prev} = $self->{line};
7033          $self->{column_prev} = $self->{column};
7034          $self->{column}++;
7035          $self->{nc}
7036              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7037        } else {
7038          $self->{set_nc}->($self);
7039        }
7040      
7041            redo A;
7042          } elsif ($self->{nc} == 0x0027) { # '
7043            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7044            $self->{ca}->{value} = '';
7045            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7046            
7047        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7048          $self->{line_prev} = $self->{line};
7049          $self->{column_prev} = $self->{column};
7050          $self->{column}++;
7051          $self->{nc}
7052              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7053        } else {
7054          $self->{set_nc}->($self);
7055        }
7056      
7057            redo A;
7058          } elsif ($self->{nc} == 0x003E) { # >
7059            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7060            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7061            
7062        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7063          $self->{line_prev} = $self->{line};
7064          $self->{column_prev} = $self->{column};
7065          $self->{column}++;
7066          $self->{nc}
7067              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7068        } else {
7069          $self->{set_nc}->($self);
7070        }
7071      
7072            return  ($self->{ct}); # ATTLIST
7073            redo A;
7074          } elsif ($self->{nc} == -1) {
7075            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7076            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7077            
7078        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7079          $self->{line_prev} = $self->{line};
7080          $self->{column_prev} = $self->{column};
7081          $self->{column}++;
7082          $self->{nc}
7083              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7084        } else {
7085          $self->{set_nc}->($self);
7086        }
7087      
7088            return  ($self->{ct});
7089            redo A;
7090          } else {
7091            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7092            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7093            ## Reconsume.
7094            redo A;
7095          }
7096        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7097          if ($is_space->{$self->{nc}}) {
7098            ## Stay in the state.
7099            
7100        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7101          $self->{line_prev} = $self->{line};
7102          $self->{column_prev} = $self->{column};
7103          $self->{column}++;
7104          $self->{nc}
7105              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7106        } else {
7107          $self->{set_nc}->($self);
7108        }
7109      
7110            redo A;
7111          } elsif ($self->{nc} == 0x0023) { # #
7112            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7113            
7114        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7115          $self->{line_prev} = $self->{line};
7116          $self->{column_prev} = $self->{column};
7117          $self->{column}++;
7118          $self->{nc}
7119              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7120        } else {
7121          $self->{set_nc}->($self);
7122        }
7123      
7124            redo A;
7125          } elsif ($self->{nc} == 0x0022) { # "
7126            $self->{ca}->{value} = '';
7127            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7128            
7129        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7130          $self->{line_prev} = $self->{line};
7131          $self->{column_prev} = $self->{column};
7132          $self->{column}++;
7133          $self->{nc}
7134              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7135        } else {
7136          $self->{set_nc}->($self);
7137        }
7138      
7139            redo A;
7140          } elsif ($self->{nc} == 0x0027) { # '
7141            $self->{ca}->{value} = '';
7142            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7143            
7144        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7145          $self->{line_prev} = $self->{line};
7146          $self->{column_prev} = $self->{column};
7147          $self->{column}++;
7148          $self->{nc}
7149              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7150        } else {
7151          $self->{set_nc}->($self);
7152        }
7153      
7154            redo A;
7155          } elsif ($self->{nc} == 0x003E) { # >
7156            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7157            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7158            
7159        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7160          $self->{line_prev} = $self->{line};
7161          $self->{column_prev} = $self->{column};
7162          $self->{column}++;
7163          $self->{nc}
7164              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7165        } else {
7166          $self->{set_nc}->($self);
7167        }
7168      
7169            return  ($self->{ct}); # ATTLIST
7170            redo A;
7171          } elsif ($self->{nc} == -1) {
7172            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7173            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7174            
7175        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7176          $self->{line_prev} = $self->{line};
7177          $self->{column_prev} = $self->{column};
7178          $self->{column}++;
7179          $self->{nc}
7180              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7181        } else {
7182          $self->{set_nc}->($self);
7183        }
7184      
7185            return  ($self->{ct});
7186            redo A;
7187          } else {
7188            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7189            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7190            ## Reconsume.
7191            redo A;
7192          }
7193        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7194          if ($is_space->{$self->{nc}}) {
7195            ## XML5: No parse error.
7196            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7197            $self->{state} = BOGUS_MD_STATE;
7198            ## Reconsume.
7199            redo A;
7200          } elsif ($self->{nc} == 0x0022) { # "
7201            ## XML5: Same as "anything else".
7202            $self->{ca}->{value} = '';
7203            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7204            
7205        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7206          $self->{line_prev} = $self->{line};
7207          $self->{column_prev} = $self->{column};
7208          $self->{column}++;
7209          $self->{nc}
7210              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7211        } else {
7212          $self->{set_nc}->($self);
7213        }
7214      
7215            redo A;
7216          } elsif ($self->{nc} == 0x0027) { # '
7217            ## XML5: Same as "anything else".
7218            $self->{ca}->{value} = '';
7219            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7220            
7221        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7222          $self->{line_prev} = $self->{line};
7223          $self->{column_prev} = $self->{column};
7224          $self->{column}++;
7225          $self->{nc}
7226              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7227        } else {
7228          $self->{set_nc}->($self);
7229        }
7230      
7231            redo A;
7232          } elsif ($self->{nc} == 0x003E) { # >
7233            ## XML5: Same as "anything else".
7234            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7235            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7236            
7237        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7238          $self->{line_prev} = $self->{line};
7239          $self->{column_prev} = $self->{column};
7240          $self->{column}++;
7241          $self->{nc}
7242              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7243        } else {
7244          $self->{set_nc}->($self);
7245        }
7246      
7247            return  ($self->{ct}); # ATTLIST
7248            redo A;
7249          } elsif ($self->{nc} == -1) {
7250            ## XML5: No parse error.
7251            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7252            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7253            
7254        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7255          $self->{line_prev} = $self->{line};
7256          $self->{column_prev} = $self->{column};
7257          $self->{column}++;
7258          $self->{nc}
7259              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7260        } else {
7261          $self->{set_nc}->($self);
7262        }
7263      
7264            return  ($self->{ct});
7265            redo A;
7266          } else {
7267            $self->{ca}->{default} = chr $self->{nc};
7268            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7269            
7270        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7271          $self->{line_prev} = $self->{line};
7272          $self->{column_prev} = $self->{column};
7273          $self->{column}++;
7274          $self->{nc}
7275              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7276        } else {
7277          $self->{set_nc}->($self);
7278        }
7279      
7280            redo A;
7281          }
7282        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7283          if ($is_space->{$self->{nc}}) {
7284            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7285            
7286        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7287          $self->{line_prev} = $self->{line};
7288          $self->{column_prev} = $self->{column};
7289          $self->{column}++;
7290          $self->{nc}
7291              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7292        } else {
7293          $self->{set_nc}->($self);
7294        }
7295      
7296            redo A;
7297          } elsif ($self->{nc} == 0x0022) { # "
7298            ## XML5: Same as "anything else".
7299            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7300            $self->{ca}->{value} = '';
7301            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7302            
7303        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7304          $self->{line_prev} = $self->{line};
7305          $self->{column_prev} = $self->{column};
7306          $self->{column}++;
7307          $self->{nc}
7308              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7309        } else {
7310          $self->{set_nc}->($self);
7311        }
7312      
7313            redo A;
7314          } elsif ($self->{nc} == 0x0027) { # '
7315            ## XML5: Same as "anything else".
7316            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7317            $self->{ca}->{value} = '';
7318            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7319            
7320        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7321          $self->{line_prev} = $self->{line};
7322          $self->{column_prev} = $self->{column};
7323          $self->{column}++;
7324          $self->{nc}
7325              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7326        } else {
7327          $self->{set_nc}->($self);
7328        }
7329      
7330            redo A;
7331          } elsif ($self->{nc} == 0x003E) { # >
7332            ## XML5: Same as "anything else".
7333            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7334            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7335            
7336        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7337          $self->{line_prev} = $self->{line};
7338          $self->{column_prev} = $self->{column};
7339          $self->{column}++;
7340          $self->{nc}
7341              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7342        } else {
7343          $self->{set_nc}->($self);
7344        }
7345      
7346            return  ($self->{ct}); # ATTLIST
7347            redo A;
7348          } elsif ($self->{nc} == -1) {
7349            ## XML5: No parse error.
7350            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7351            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7352            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7353            
7354        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7355          $self->{line_prev} = $self->{line};
7356          $self->{column_prev} = $self->{column};
7357          $self->{column}++;
7358          $self->{nc}
7359              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7360        } else {
7361          $self->{set_nc}->($self);
7362        }
7363      
7364            return  ($self->{ct});
7365            redo A;
7366          } else {
7367            $self->{ca}->{default} .= chr $self->{nc};
7368            ## Stay in the state.
7369            
7370        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7371          $self->{line_prev} = $self->{line};
7372          $self->{column_prev} = $self->{column};
7373          $self->{column}++;
7374          $self->{nc}
7375              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7376        } else {
7377          $self->{set_nc}->($self);
7378        }
7379      
7380            redo A;
7381          }
7382        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7383          if ($is_space->{$self->{nc}}) {
7384            ## Stay in the state.
7385            
7386        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7387          $self->{line_prev} = $self->{line};
7388          $self->{column_prev} = $self->{column};
7389          $self->{column}++;
7390          $self->{nc}
7391              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7392        } else {
7393          $self->{set_nc}->($self);
7394        }
7395      
7396            redo A;
7397          } elsif ($self->{nc} == 0x0022) { # "
7398            $self->{ca}->{value} = '';
7399            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7400            
7401        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7402          $self->{line_prev} = $self->{line};
7403          $self->{column_prev} = $self->{column};
7404          $self->{column}++;
7405          $self->{nc}
7406              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7407        } else {
7408          $self->{set_nc}->($self);
7409        }
7410      
7411            redo A;
7412          } elsif ($self->{nc} == 0x0027) { # '
7413            $self->{ca}->{value} = '';
7414            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7415            
7416        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7417          $self->{line_prev} = $self->{line};
7418          $self->{column_prev} = $self->{column};
7419          $self->{column}++;
7420          $self->{nc}
7421              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7422        } else {
7423          $self->{set_nc}->($self);
7424        }
7425      
7426            redo A;
7427          } elsif ($self->{nc} == 0x003E) { # >
7428            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7429            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7430            
7431        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7432          $self->{line_prev} = $self->{line};
7433          $self->{column_prev} = $self->{column};
7434          $self->{column}++;
7435          $self->{nc}
7436              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7437        } else {
7438          $self->{set_nc}->($self);
7439        }
7440      
7441            return  ($self->{ct}); # ATTLIST
7442            redo A;
7443          } elsif ($self->{nc} == -1) {
7444            ## XML5: No parse error.
7445            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7446            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7447            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7448            
7449        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7450          $self->{line_prev} = $self->{line};
7451          $self->{column_prev} = $self->{column};
7452          $self->{column}++;
7453          $self->{nc}
7454              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7455        } else {
7456          $self->{set_nc}->($self);
7457        }
7458      
7459            return  ($self->{ct});
7460          redo A;          redo A;
7461        } else {        } else {
7462          ## XML5: Not defined yet.          ## XML5: Not defined yet.
7463            if ($self->{ca}->{default} eq 'FIXED') {
7464              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7465            } else {
7466              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7467              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7468            }
7469            ## Reconsume.
7470            redo A;
7471          }
7472        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7473          if ($is_space->{$self->{nc}} or
7474              $self->{nc} == -1 or
7475              $self->{nc} == 0x003E) { # >
7476            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7477            ## Reconsume.
7478            redo A;
7479          } else {
7480            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7481            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7482            ## Reconsume.
7483            redo A;
7484          }
7485        } elsif ($self->{state} == NDATA_STATE) {
7486          ## ASCII case-insensitive
7487          if ($self->{nc} == [
7488                undef,
7489                0x0044, # D
7490                0x0041, # A
7491                0x0054, # T
7492              ]->[length $self->{kwd}] or
7493              $self->{nc} == [
7494                undef,
7495                0x0064, # d
7496                0x0061, # a
7497                0x0074, # t
7498              ]->[length $self->{kwd}]) {
7499            
7500            ## Stay in the state.
7501            $self->{kwd} .= chr $self->{nc};
7502            
7503        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7504          $self->{line_prev} = $self->{line};
7505          $self->{column_prev} = $self->{column};
7506          $self->{column}++;
7507          $self->{nc}
7508              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7509        } else {
7510          $self->{set_nc}->($self);
7511        }
7512      
7513            redo A;
7514          } elsif ((length $self->{kwd}) == 4 and
7515                   ($self->{nc} == 0x0041 or # A
7516                    $self->{nc} == 0x0061)) { # a
7517            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7518              
7519              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7520                              text => 'NDATA',
7521                              line => $self->{line_prev},
7522                              column => $self->{column_prev} - 4);
7523            } else {
7524              
7525            }
7526            $self->{state} = AFTER_NDATA_STATE;
7527            
7528        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7529          $self->{line_prev} = $self->{line};
7530          $self->{column_prev} = $self->{column};
7531          $self->{column}++;
7532          $self->{nc}
7533              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7534        } else {
7535          $self->{set_nc}->($self);
7536        }
7537      
7538            redo A;
7539          } else {
7540            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7541                            line => $self->{line_prev},
7542                            column => $self->{column_prev} + 1
7543                                - length $self->{kwd});
7544            
7545            $self->{state} = BOGUS_MD_STATE;
7546            ## Reconsume.
7547            redo A;
7548          }
7549        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7550          if ($is_space->{$self->{nc}}) {
7551            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7552            
7553        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7554          $self->{line_prev} = $self->{line};
7555          $self->{column_prev} = $self->{column};
7556          $self->{column}++;
7557          $self->{nc}
7558              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7559        } else {
7560          $self->{set_nc}->($self);
7561        }
7562      
7563            redo A;
7564          } elsif ($self->{nc} == 0x003E) { # >
7565            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7566            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7567            
7568        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7569          $self->{line_prev} = $self->{line};
7570          $self->{column_prev} = $self->{column};
7571          $self->{column}++;
7572          $self->{nc}
7573              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7574        } else {
7575          $self->{set_nc}->($self);
7576        }
7577      
7578            return  ($self->{ct}); # ENTITY
7579            redo A;
7580          } elsif ($self->{nc} == -1) {
7581            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7582            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7583            
7584        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7585          $self->{line_prev} = $self->{line};
7586          $self->{column_prev} = $self->{column};
7587          $self->{column}++;
7588          $self->{nc}
7589              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7590        } else {
7591          $self->{set_nc}->($self);
7592        }
7593      
7594            return  ($self->{ct}); # ENTITY
7595            redo A;
7596          } else {
7597            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7598                            line => $self->{line_prev},
7599                            column => $self->{column_prev} + 1
7600                                - length $self->{kwd});
7601            $self->{state} = BOGUS_MD_STATE;
7602            ## Reconsume.
7603            redo A;
7604          }
7605        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7606          if ($is_space->{$self->{nc}}) {
7607            ## Stay in the state.
7608            
7609        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7610          $self->{line_prev} = $self->{line};
7611          $self->{column_prev} = $self->{column};
7612          $self->{column}++;
7613          $self->{nc}
7614              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7615        } else {
7616          $self->{set_nc}->($self);
7617        }
7618      
7619            redo A;
7620          } elsif ($self->{nc} == 0x003E) { # >
7621            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7622            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7623            
7624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7625          $self->{line_prev} = $self->{line};
7626          $self->{column_prev} = $self->{column};
7627          $self->{column}++;
7628          $self->{nc}
7629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7630        } else {
7631          $self->{set_nc}->($self);
7632        }
7633      
7634            return  ($self->{ct}); # ENTITY
7635            redo A;
7636          } elsif ($self->{nc} == -1) {
7637            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7638            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7639            
7640        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7641          $self->{line_prev} = $self->{line};
7642          $self->{column_prev} = $self->{column};
7643          $self->{column}++;
7644          $self->{nc}
7645              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7646        } else {
7647          $self->{set_nc}->($self);
7648        }
7649      
7650            return  ($self->{ct}); # ENTITY
7651            redo A;
7652          } else {
7653            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7654            $self->{state} = NOTATION_NAME_STATE;
7655            
7656        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7657          $self->{line_prev} = $self->{line};
7658          $self->{column_prev} = $self->{column};
7659          $self->{column}++;
7660          $self->{nc}
7661              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7662        } else {
7663          $self->{set_nc}->($self);
7664        }
7665      
7666            redo A;
7667          }
7668        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7669          if ($is_space->{$self->{nc}}) {
7670            $self->{state} = AFTER_NOTATION_NAME_STATE;
7671            
7672        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7673          $self->{line_prev} = $self->{line};
7674          $self->{column_prev} = $self->{column};
7675          $self->{column}++;
7676          $self->{nc}
7677              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7678        } else {
7679          $self->{set_nc}->($self);
7680        }
7681      
7682            redo A;
7683          } elsif ($self->{nc} == 0x003E) { # >
7684            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7685            
7686        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7687          $self->{line_prev} = $self->{line};
7688          $self->{column_prev} = $self->{column};
7689          $self->{column}++;
7690          $self->{nc}
7691              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7692        } else {
7693          $self->{set_nc}->($self);
7694        }
7695      
7696            return  ($self->{ct}); # ENTITY
7697            redo A;
7698          } elsif ($self->{nc} == -1) {
7699            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7700            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7701            
7702        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7703          $self->{line_prev} = $self->{line};
7704          $self->{column_prev} = $self->{column};
7705          $self->{column}++;
7706          $self->{nc}
7707              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7708        } else {
7709          $self->{set_nc}->($self);
7710        }
7711      
7712            return  ($self->{ct}); # ENTITY
7713            redo A;
7714          } else {
7715            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7716            ## Stay in the state.
7717            
7718        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7719          $self->{line_prev} = $self->{line};
7720          $self->{column_prev} = $self->{column};
7721          $self->{column}++;
7722          $self->{nc}
7723              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7724        } else {
7725          $self->{set_nc}->($self);
7726        }
7727      
7728            redo A;
7729          }
7730        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7731          if ($self->{nc} == 0x0022) { # "
7732            $self->{state} = AFTER_NOTATION_NAME_STATE;
7733            
7734        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7735          $self->{line_prev} = $self->{line};
7736          $self->{column_prev} = $self->{column};
7737          $self->{column}++;
7738          $self->{nc}
7739              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7740        } else {
7741          $self->{set_nc}->($self);
7742        }
7743      
7744            redo A;
7745          } elsif ($self->{nc} == 0x0026) { # &
7746            $self->{prev_state} = $self->{state};
7747            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7748            $self->{entity_add} = 0x0022; # "
7749            
7750        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7751          $self->{line_prev} = $self->{line};
7752          $self->{column_prev} = $self->{column};
7753          $self->{column}++;
7754          $self->{nc}
7755              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7756        } else {
7757          $self->{set_nc}->($self);
7758        }
7759      
7760            redo A;
7761    ## TODO: %
7762          } elsif ($self->{nc} == -1) {
7763            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7764            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7765            ## Reconsume.
7766            return  ($self->{ct}); # ENTITY
7767            redo A;
7768          } else {
7769            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7770            
7771        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7772          $self->{line_prev} = $self->{line};
7773          $self->{column_prev} = $self->{column};
7774          $self->{column}++;
7775          $self->{nc}
7776              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7777        } else {
7778          $self->{set_nc}->($self);
7779        }
7780      
7781            redo A;
7782          }
7783        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7784          if ($self->{nc} == 0x0027) { # '
7785            $self->{state} = AFTER_NOTATION_NAME_STATE;
7786            
7787        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7788          $self->{line_prev} = $self->{line};
7789          $self->{column_prev} = $self->{column};
7790          $self->{column}++;
7791          $self->{nc}
7792              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7793        } else {
7794          $self->{set_nc}->($self);
7795        }
7796      
7797            redo A;
7798          } elsif ($self->{nc} == 0x0026) { # &
7799            $self->{prev_state} = $self->{state};
7800            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7801            $self->{entity_add} = 0x0027; # '
7802            
7803        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7804          $self->{line_prev} = $self->{line};
7805          $self->{column_prev} = $self->{column};
7806          $self->{column}++;
7807          $self->{nc}
7808              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7809        } else {
7810          $self->{set_nc}->($self);
7811        }
7812      
7813            redo A;
7814    ## TODO: %
7815          } elsif ($self->{nc} == -1) {
7816            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7817            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7818            ## Reconsume.
7819            return  ($self->{ct}); # ENTITY
7820            redo A;
7821          } else {
7822            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7823            
7824        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7825          $self->{line_prev} = $self->{line};
7826          $self->{column_prev} = $self->{column};
7827          $self->{column}++;
7828          $self->{nc}
7829              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7830        } else {
7831          $self->{set_nc}->($self);
7832        }
7833      
7834            redo A;
7835          }
7836        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7837          ## TODO: XMLize
7838    
7839          ## TODO: ...        if ($is_space->{$self->{nc}} or
7840              {
7841                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7842                $self->{entity_add} => 1,
7843              }->{$self->{nc}}) {
7844            ## Don't consume
7845            ## No error
7846            ## Return nothing.
7847            #
7848          } elsif ($self->{nc} == 0x0023) { # #
7849            $self->{ca} = $self->{ct};
7850            $self->{state} = ENTITY_HASH_STATE;
7851            $self->{kwd} = '#';
7852            
7853        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7854          $self->{line_prev} = $self->{line};
7855          $self->{column_prev} = $self->{column};
7856          $self->{column}++;
7857          $self->{nc}
7858              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7859        } else {
7860          $self->{set_nc}->($self);
7861        }
7862      
7863            redo A;
7864          } elsif ((0x0041 <= $self->{nc} and
7865                    $self->{nc} <= 0x005A) or # A..Z
7866                   (0x0061 <= $self->{nc} and
7867                    $self->{nc} <= 0x007A)) { # a..z
7868            #
7869          } else {
7870            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero');
7871            ## Return nothing.
7872            #
7873          }
7874    
7875          $self->{state} = BOGUS_COMMENT_STATE;        $self->{ct}->{value} .= '&';
7876          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded        $self->{state} = $self->{prev_state};
7877          ## Reconsume.
7878          redo A;
7879        } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {
7880          if ($is_space->{$self->{nc}}) {
7881            ## Stay in the state.
7882            
7883        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7884          $self->{line_prev} = $self->{line};
7885          $self->{column_prev} = $self->{column};
7886          $self->{column}++;
7887          $self->{nc}
7888              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7889        } else {
7890          $self->{set_nc}->($self);
7891        }
7892      
7893            redo A;
7894          } elsif ($self->{nc} == 0x003E) { # >
7895            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7896            
7897        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7898          $self->{line_prev} = $self->{line};
7899          $self->{column_prev} = $self->{column};
7900          $self->{column}++;
7901          $self->{nc}
7902              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7903        } else {
7904          $self->{set_nc}->($self);
7905        }
7906      
7907            return  ($self->{ct}); # ENTITY
7908            redo A;
7909          } elsif ($self->{nc} == -1) {
7910            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7911            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7912            
7913        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7914          $self->{line_prev} = $self->{line};
7915          $self->{column_prev} = $self->{column};
7916          $self->{column}++;
7917          $self->{nc}
7918              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7919        } else {
7920          $self->{set_nc}->($self);
7921        }
7922      
7923            return  ($self->{ct}); # ENTITY
7924            redo A;
7925          } else {
7926            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after notation name'); ## TODO: type
7927            $self->{state} = BOGUS_MD_STATE;
7928          ## Reconsume.          ## Reconsume.
7929          redo A;          redo A;
7930        }        }
7931        } elsif ($self->{state} == BOGUS_MD_STATE) {
7932          if ($self->{nc} == 0x003E) { # >
7933            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7934            
7935        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7936          $self->{line_prev} = $self->{line};
7937          $self->{column_prev} = $self->{column};
7938          $self->{column}++;
7939          $self->{nc}
7940              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7941        } else {
7942          $self->{set_nc}->($self);
7943        }
7944      
7945            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7946            redo A;
7947          } elsif ($self->{nc} == -1) {
7948            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7949            ## Reconsume.
7950            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7951            redo A;
7952          } else {
7953            ## Stay in the state.
7954            
7955        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7956          $self->{line_prev} = $self->{line};
7957          $self->{column_prev} = $self->{column};
7958          $self->{column}++;
7959          $self->{nc}
7960              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7961        } else {
7962          $self->{set_nc}->($self);
7963        }
7964      
7965            redo A;
7966          }
7967      } else {      } else {
7968        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
7969      }      }
# Line 5853  sub _get_next_token ($) { Line 7974  sub _get_next_token ($) {
7974    
7975  1;  1;
7976  ## $Date$  ## $Date$
7977                                    

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.19

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24