/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14 by wakaba, Fri Oct 17 07:14:29 2008 UTC revision 1.18 by wakaba, Sun Oct 19 06:14:57 2008 UTC
# Line 164  sub BEFORE_MD_NAME_STATE () { 68 } Line 164  sub BEFORE_MD_NAME_STATE () { 68 }
164  sub MD_NAME_STATE () { 69 }  sub MD_NAME_STATE () { 69 }
165  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub AFTER_NOTATION_NAME_STATE () { 90 }
186    sub BOGUS_MD_STATE () { 91 }
187    
188  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
189  ## list and descriptions)  ## list and descriptions)
# Line 1737  sub _get_next_token ($) { Line 1757  sub _get_next_token ($) {
1757          redo A;          redo A;
1758        }        }
1759      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1760        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1761          ## ATTLIST attribute value double quoted state".
1762                
1763        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1764                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1765          ## XML5: "Tag attribute name before state".            
1766          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1767              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1768              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1769            } else {
1770              
1771              ## XML5: "Tag attribute name before state".
1772              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1773            }
1774                    
1775      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1776        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1783  sub _get_next_token ($) { Line 1811  sub _get_next_token ($) {
1811          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1812                        
1813            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1814    
1815              $self->{state} = DATA_STATE;
1816              $self->{s_kwd} = '';
1817              ## reconsume
1818              return  ($self->{ct}); # start tag
1819              redo A;
1820          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1821            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1822            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1792  sub _get_next_token ($) { Line 1826  sub _get_next_token ($) {
1826              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1827                            
1828            }            }
1829    
1830              $self->{state} = DATA_STATE;
1831              $self->{s_kwd} = '';
1832              ## reconsume
1833              return  ($self->{ct}); # end tag
1834              redo A;
1835            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1836              ## XML5: No parse error above; not defined yet.
1837              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1838              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1839              ## Reconsume.
1840              return  ($self->{ct}); # ATTLIST
1841              redo A;
1842          } else {          } else {
1843            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1844          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1845        } else {        } else {
1846            ## XML5 [ATTLIST]: Not defined yet.
1847          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1848                        
1849            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1830  sub _get_next_token ($) { Line 1871  sub _get_next_token ($) {
1871          redo A;          redo A;
1872        }        }
1873      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1874        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1875          ## ATTLIST attribute value single quoted state".
1876    
1877        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1878                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1879          ## XML5: "Before attribute name state" (sic).            
1880          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1881              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1882              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1883            } else {
1884              
1885              ## XML5: "Before attribute name state" (sic).
1886              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1887            }
1888                    
1889      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1890        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1876  sub _get_next_token ($) { Line 1925  sub _get_next_token ($) {
1925          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1926                        
1927            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1928    
1929              $self->{state} = DATA_STATE;
1930              $self->{s_kwd} = '';
1931              ## reconsume
1932              return  ($self->{ct}); # start tag
1933              redo A;
1934          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1935            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1936            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1885  sub _get_next_token ($) { Line 1940  sub _get_next_token ($) {
1940              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1941                            
1942            }            }
1943    
1944              $self->{state} = DATA_STATE;
1945              $self->{s_kwd} = '';
1946              ## reconsume
1947              return  ($self->{ct}); # end tag
1948              redo A;
1949            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1950              ## XML5: No parse error above; not defined yet.
1951              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1952              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1953              ## Reconsume.
1954              return  ($self->{ct}); # ATTLIST
1955              redo A;
1956          } else {          } else {
1957            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1958          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1959        } else {        } else {
1960            ## XML5 [ATTLIST]: Not defined yet.
1961          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1962                        
1963            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1926  sub _get_next_token ($) { Line 1988  sub _get_next_token ($) {
1988        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
1989    
1990        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1991                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1992          ## XML5: "Tag attribute name before state".            
1993          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
1994              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1995            } else {
1996              
1997              ## XML5: "Tag attribute name before state".
1998              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1999            }
2000                    
2001      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2002        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1969  sub _get_next_token ($) { Line 2037  sub _get_next_token ($) {
2037          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2038                        
2039            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2040    
2041              $self->{state} = DATA_STATE;
2042              $self->{s_kwd} = '';
2043              
2044        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2045          $self->{line_prev} = $self->{line};
2046          $self->{column_prev} = $self->{column};
2047          $self->{column}++;
2048          $self->{nc}
2049              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2050        } else {
2051          $self->{set_nc}->($self);
2052        }
2053      
2054              return  ($self->{ct}); # start tag
2055              redo A;
2056          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2057            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2058            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1978  sub _get_next_token ($) { Line 2062  sub _get_next_token ($) {
2062              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2063                            
2064            }            }
2065          } else {  
2066            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2067          }            $self->{s_kwd} = '';
2068          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2069      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2070        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2071        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1994  sub _get_next_token ($) { Line 2076  sub _get_next_token ($) {
2076        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2077      }      }
2078        
2079              return  ($self->{ct}); # end tag
2080          return  ($self->{ct}); # start tag or end tag            redo A;
2081            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2082          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2083              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2084              
2085        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2086          $self->{line_prev} = $self->{line};
2087          $self->{column_prev} = $self->{column};
2088          $self->{column}++;
2089          $self->{nc}
2090              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2091        } else {
2092          $self->{set_nc}->($self);
2093        }
2094      
2095              return  ($self->{ct}); # ATTLIST
2096              redo A;
2097            } else {
2098              die "$0: $self->{ct}->{type}: Unknown token type";
2099            }
2100        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2101          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2102                        
2103              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2104            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2105    
2106              $self->{state} = DATA_STATE;
2107              $self->{s_kwd} = '';
2108              ## reconsume
2109              return  ($self->{ct}); # start tag
2110              redo A;
2111          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2112              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2113            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2114            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2115                            
# Line 2012  sub _get_next_token ($) { Line 2118  sub _get_next_token ($) {
2118              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2119                            
2120            }            }
2121    
2122              $self->{state} = DATA_STATE;
2123              $self->{s_kwd} = '';
2124              ## reconsume
2125              return  ($self->{ct}); # end tag
2126              redo A;
2127            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2128              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2129              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2130              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2131              ## Reconsume.
2132              return  ($self->{ct}); # ATTLIST
2133              redo A;
2134          } else {          } else {
2135            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2136          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2137        } else {        } else {
2138          if ({          if ({
2139               0x0022 => 1, # "               0x0022 => 1, # "
# Line 3077  sub _get_next_token ($) { Line 3189  sub _get_next_token ($) {
3189        
3190          redo A;          redo A;
3191        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3192            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3193              
3194              $self->{state} = DATA_STATE;
3195              $self->{s_kwd} = '';
3196            } else {
3197              
3198              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3199              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3200            }
3201                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3202                    
3203      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3204        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3091  sub _get_next_token ($) { Line 3210  sub _get_next_token ($) {
3210        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3211      }      }
3212        
3213            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3214          redo A;          redo A;
3215        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3216            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3217              
3218              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3219              $self->{state} = DATA_STATE;
3220              $self->{s_kwd} = '';
3221              $self->{ct}->{quirks} = 1;
3222            } else {
3223              
3224              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3225              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3226            }
3227                    
3228          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3229          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3230          redo A;          redo A;
3231        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3232                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
# Line 3140  sub _get_next_token ($) { Line 3262  sub _get_next_token ($) {
3262      }      }
3263        
3264          redo A;          redo A;
3265        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [  ## TODO: " and ' for ENTITY
3266          } elsif ($self->{is_xml} and
3267                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3268                   $self->{nc} == 0x005B) { # [
3269                    
3270          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3271          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 3159  sub _get_next_token ($) { Line 3284  sub _get_next_token ($) {
3284          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3285          redo A;          redo A;
3286        } else {        } else {
3287                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3288          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');  
3289          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3290              
3291              $self->{ct}->{quirks} = 1;
3292              $self->{state} = BOGUS_DOCTYPE_STATE;
3293            } else {
3294              
3295              $self->{state} = BOGUS_MD_STATE;
3296            }
3297    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3298                    
3299      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3300        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3235  sub _get_next_token ($) { Line 3366  sub _get_next_token ($) {
3366        
3367          redo A;          redo A;
3368        } else {        } else {
3369                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3370                          line => $self->{line_prev},                          line => $self->{line_prev},
3371                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3372          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3373              
3374          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3375              $self->{state} = BOGUS_DOCTYPE_STATE;
3376            } else {
3377              
3378              $self->{state} = BOGUS_MD_STATE;
3379            }
3380          ## Reconsume.          ## Reconsume.
3381          redo A;          redo A;
3382        }        }
# Line 3303  sub _get_next_token ($) { Line 3438  sub _get_next_token ($) {
3438        
3439          redo A;          redo A;
3440        } else {        } else {
3441                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3442                          line => $self->{line_prev},                          line => $self->{line_prev},
3443                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3444          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3445              
3446          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3447              $self->{state} = BOGUS_DOCTYPE_STATE;
3448            } else {
3449              
3450              $self->{state} = BOGUS_MD_STATE;
3451            }
3452          ## Reconsume.          ## Reconsume.
3453          redo A;          redo A;
3454        }        }
# Line 3362  sub _get_next_token ($) { Line 3501  sub _get_next_token ($) {
3501        
3502          redo A;          redo A;
3503        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3504          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3505            
3506          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3507          $self->{s_kwd} = '';            
3508              $self->{state} = DATA_STATE;
3509              $self->{s_kwd} = '';
3510              $self->{ct}->{quirks} = 1;
3511            } else {
3512              
3513              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3514            }
3515            
3516                    
3517      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3518        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3378  sub _get_next_token ($) { Line 3524  sub _get_next_token ($) {
3524        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3525      }      }
3526        
3527            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3528          redo A;          redo A;
3529        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3530            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3531              
3532              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3533              $self->{state} = DATA_STATE;
3534              $self->{s_kwd} = '';
3535              $self->{ct}->{quirks} = 1;
3536            } else {
3537              
3538              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3539              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3540            }
3541                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3542          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3543          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3544          redo A;          redo A;
3545        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3546                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3547                   $self->{nc} == 0x005B) { # [
3548                    
3549          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3550          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3415  sub _get_next_token ($) { Line 3564  sub _get_next_token ($) {
3564          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3565          redo A;          redo A;
3566        } else {        } else {
           
3567          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3568    
3569          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3570              
3571              $self->{ct}->{quirks} = 1;
3572              $self->{state} = BOGUS_DOCTYPE_STATE;
3573            } else {
3574              
3575              $self->{state} = BOGUS_MD_STATE;
3576            }
3577    
3578                    
3579      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3580        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3450  sub _get_next_token ($) { Line 3605  sub _get_next_token ($) {
3605        
3606          redo A;          redo A;
3607        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3608          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3609    
3610          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3611          $self->{s_kwd} = '';            
3612              $self->{state} = DATA_STATE;
3613              $self->{s_kwd} = '';
3614              $self->{ct}->{quirks} = 1;
3615            } else {
3616              
3617              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3618            }
3619    
3620                    
3621      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3622        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3466  sub _get_next_token ($) { Line 3628  sub _get_next_token ($) {
3628        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3629      }      }
3630        
3631            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3632          redo A;          redo A;
3633        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3634          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3635    
3636          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3637          $self->{s_kwd} = '';            
3638          ## reconsume            $self->{state} = DATA_STATE;
3639              $self->{s_kwd} = '';
3640          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3641            } else {
3642              
3643              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3644            }
3645            
3646            ## Reconsume.
3647          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3648          redo A;          redo A;
3649        } else {        } else {
3650                    
3651          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3652          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3653                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3654    
# Line 3521  sub _get_next_token ($) { Line 3683  sub _get_next_token ($) {
3683        
3684          redo A;          redo A;
3685        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3686          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3687    
3688          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3689          $self->{s_kwd} = '';            
3690              $self->{state} = DATA_STATE;
3691              $self->{s_kwd} = '';
3692              $self->{ct}->{quirks} = 1;
3693            } else {
3694              
3695              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3696            }
3697    
3698                    
3699      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3700        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3537  sub _get_next_token ($) { Line 3706  sub _get_next_token ($) {
3706        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3707      }      }
3708        
3709            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3710          redo A;          redo A;
3711        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3712          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3713    
3714          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3715          $self->{s_kwd} = '';            
3716              $self->{state} = DATA_STATE;
3717              $self->{s_kwd} = '';
3718              $self->{ct}->{quirks} = 1;
3719            } else {
3720              
3721              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3722            }
3723          
3724          ## reconsume          ## reconsume
3725            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3726          redo A;          redo A;
3727        } else {        } else {
3728                    
3729          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3730          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3731                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3732    
# Line 3593  sub _get_next_token ($) { Line 3762  sub _get_next_token ($) {
3762          redo A;          redo A;
3763        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3764                    
3765          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3766          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3767                    
3768      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3609  sub _get_next_token ($) { Line 3778  sub _get_next_token ($) {
3778          redo A;          redo A;
3779        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3780                    
3781          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3782          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3783                    
3784      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3624  sub _get_next_token ($) { Line 3793  sub _get_next_token ($) {
3793        
3794          redo A;          redo A;
3795        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3796          if ($self->{is_xml}) {          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3797                        if ($self->{is_xml}) {
3798            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');              
3799                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3800              } else {
3801                
3802              }
3803              $self->{state} = DATA_STATE;
3804              $self->{s_kwd} = '';
3805          } else {          } else {
3806                        if ($self->{ct}->{type} == NOTATION_TOKEN) {
3807                
3808              } else {
3809                
3810                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3811              }
3812              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3813          }          }
3814          $self->{state} = DATA_STATE;          
         $self->{s_kwd} = '';  
3815                    
3816      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3817        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3643  sub _get_next_token ($) { Line 3823  sub _get_next_token ($) {
3823        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3824      }      }
3825        
3826            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3827          redo A;          redo A;
3828        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3829            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3830              
3831              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3832              
3833              $self->{state} = DATA_STATE;
3834              $self->{s_kwd} = '';
3835              $self->{ct}->{quirks} = 1;
3836            } else {
3837              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3838              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3839            }
3840                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3841          ## reconsume          ## reconsume
3842            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3843          redo A;          redo A;
3844        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3845                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3846                   $self->{nc} == 0x005B) { # [
3847                    
3848          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3849          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3679  sub _get_next_token ($) { Line 3863  sub _get_next_token ($) {
3863          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3864          redo A;          redo A;
3865        } else {        } else {
           
3866          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3867    
3868          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3869              
3870              $self->{ct}->{quirks} = 1;
3871              $self->{state} = BOGUS_DOCTYPE_STATE;
3872            } else {
3873              
3874              $self->{state} = BOGUS_MD_STATE;
3875            }
3876    
3877                    
3878      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3879        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3746  sub _get_next_token ($) { Line 3936  sub _get_next_token ($) {
3936        
3937          redo A;          redo A;
3938        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3939          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3940                    
3941      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3942        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3762  sub _get_next_token ($) { Line 3949  sub _get_next_token ($) {
3949      }      }
3950        
3951    
3952          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3953          return  ($self->{ct}); # DOCTYPE            
3954              $self->{state} = DATA_STATE;
3955              $self->{s_kwd} = '';
3956              $self->{ct}->{quirks} = 1;
3957            } else {
3958              
3959              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3960            }
3961    
3962            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3963          redo A;          redo A;
3964        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3965            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3966              
3967              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3968              $self->{state} = DATA_STATE;
3969              $self->{s_kwd} = '';
3970              $self->{ct}->{quirks} = 1;
3971            } else {
3972              
3973              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3974              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3975            }
3976                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3977          ## reconsume          ## reconsume
3978            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3979          redo A;          redo A;
3980        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3981                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3982                   $self->{nc} == 0x005B) { # [
3983                    
3984          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3985    
# Line 3799  sub _get_next_token ($) { Line 4000  sub _get_next_token ($) {
4000          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4001          redo A;          redo A;
4002        } else {        } else {
           
4003          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4004    
4005          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4006                        
4007              $self->{ct}->{quirks} = 1;
4008              $self->{state} = BOGUS_DOCTYPE_STATE;
4009            } else {
4010              
4011              $self->{state} = BOGUS_MD_STATE;
4012            }
4013    
4014                    
4015      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4016        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3834  sub _get_next_token ($) { Line 4041  sub _get_next_token ($) {
4041        
4042          redo A;          redo A;
4043        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4044          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4045    
4046          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4047          $self->{s_kwd} = '';            
4048              $self->{state} = DATA_STATE;
4049              $self->{s_kwd} = '';
4050              $self->{ct}->{quirks} = 1;
4051            } else {
4052              
4053              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4054            }
4055            
4056                    
4057      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4058        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3850  sub _get_next_token ($) { Line 4064  sub _get_next_token ($) {
4064        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4065      }      }
4066        
4067            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4068          redo A;          redo A;
4069        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4070          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4071    
4072          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4073          $self->{s_kwd} = '';            
4074              $self->{state} = DATA_STATE;
4075              $self->{s_kwd} = '';
4076              $self->{ct}->{quirks} = 1;
4077            } else {
4078              
4079              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4080            }
4081            
4082          ## reconsume          ## reconsume
4083            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4084          redo A;          redo A;
4085        } else {        } else {
4086                    
4087          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4088          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4089                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4090    
# Line 3927  sub _get_next_token ($) { Line 4141  sub _get_next_token ($) {
4141    
4142          redo A;          redo A;
4143        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4144          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4145    
4146          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4147          $self->{s_kwd} = '';            
4148          ## reconsume            $self->{state} = DATA_STATE;
4149              $self->{s_kwd} = '';
4150          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4151          return  ($self->{ct}); # DOCTYPE          } else {
4152              
4153              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4154            }
4155    
4156            ## reconsume
4157            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4158          redo A;          redo A;
4159        } else {        } else {
4160                    
4161          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4162          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4163                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4164    
# Line 3961  sub _get_next_token ($) { Line 4178  sub _get_next_token ($) {
4178        }        }
4179      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4180        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4181                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4182          ## Stay in the state            
4183              $self->{state} = BEFORE_NDATA_STATE;
4184            } else {
4185              
4186              ## Stay in the state
4187            }
4188                    
4189      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4190        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3976  sub _get_next_token ($) { Line 4198  sub _get_next_token ($) {
4198        
4199          redo A;          redo A;
4200        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4201            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4202              
4203              $self->{state} = DATA_STATE;
4204              $self->{s_kwd} = '';
4205            } else {
4206              
4207              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4208            }
4209    
4210                    
4211          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4212          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4213          $self->{column_prev} = $self->{column};
4214          $self->{column}++;
4215          $self->{nc}
4216              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4217        } else {
4218          $self->{set_nc}->($self);
4219        }
4220      
4221            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4222            redo A;
4223          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4224                   ($self->{nc} == 0x004E or # N
4225                    $self->{nc} == 0x006E)) { # n
4226            
4227            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4228            $self->{state} = NDATA_STATE;
4229            $self->{kwd} = chr $self->{nc};
4230                    
4231      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4232        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3990  sub _get_next_token ($) { Line 4238  sub _get_next_token ($) {
4238        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4239      }      }
4240        
   
         return  ($self->{ct}); # DOCTYPE  
   
4241          redo A;          redo A;
4242        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4243                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4244          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');            
4245          $self->{state} = DATA_STATE;            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4246          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4247          ## reconsume            $self->{s_kwd} = '';
4248              $self->{ct}->{quirks} = 1;
4249          $self->{ct}->{quirks} = 1;          } else {
4250          return  ($self->{ct}); # DOCTYPE            
4251              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4252              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4253            }
4254    
4255            ## reconsume
4256            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4257          redo A;          redo A;
4258        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4259                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4260                   $self->{nc} == 0x005B) { # [
4261                    
4262          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4263          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 4024  sub _get_next_token ($) { Line 4276  sub _get_next_token ($) {
4276          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4277          redo A;          redo A;
4278        } else {        } else {
           
4279          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
         #$self->{ct}->{quirks} = 1;  
4280    
4281          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4282              
4283              #$self->{ct}->{quirks} = 1;
4284              $self->{state} = BOGUS_DOCTYPE_STATE;
4285            } else {
4286              
4287              $self->{state} = BOGUS_MD_STATE;
4288            }
4289    
4290            
4291        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4292          $self->{line_prev} = $self->{line};
4293          $self->{column_prev} = $self->{column};
4294          $self->{column}++;
4295          $self->{nc}
4296              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4297        } else {
4298          $self->{set_nc}->($self);
4299        }
4300      
4301            redo A;
4302          }
4303        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4304          if ($is_space->{$self->{nc}}) {
4305            
4306            ## Stay in the state.
4307            
4308        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4309          $self->{line_prev} = $self->{line};
4310          $self->{column_prev} = $self->{column};
4311          $self->{column}++;
4312          $self->{nc}
4313              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4314        } else {
4315          $self->{set_nc}->($self);
4316        }
4317      
4318            redo A;
4319          } elsif ($self->{nc} == 0x003E) { # >
4320            
4321            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4322            
4323        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4324          $self->{line_prev} = $self->{line};
4325          $self->{column_prev} = $self->{column};
4326          $self->{column}++;
4327          $self->{nc}
4328              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4329        } else {
4330          $self->{set_nc}->($self);
4331        }
4332      
4333            return  ($self->{ct}); # ENTITY
4334            redo A;
4335          } elsif ($self->{nc} == 0x004E or # N
4336                   $self->{nc} == 0x006E) { # n
4337            
4338            $self->{state} = NDATA_STATE;
4339            $self->{kwd} = chr $self->{nc};
4340            
4341        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4342          $self->{line_prev} = $self->{line};
4343          $self->{column_prev} = $self->{column};
4344          $self->{column}++;
4345          $self->{nc}
4346              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4347        } else {
4348          $self->{set_nc}->($self);
4349        }
4350      
4351            redo A;
4352          } elsif ($self->{nc} == -1) {
4353            
4354            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4355            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4356            ## reconsume
4357            return  ($self->{ct}); # ENTITY
4358            redo A;
4359          } else {
4360            
4361            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4362            $self->{state} = BOGUS_MD_STATE;
4363                    
4364      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4365        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5253  sub _get_next_token ($) { Line 5584  sub _get_next_token ($) {
5584      }      }
5585        
5586          redo A;          redo A;
5587        } elsif ($self->{nc} == 0x0045) { # E        } elsif ($self->{nc} == 0x0045 or # E
5588                   $self->{nc} == 0x0065) { # e
5589          $self->{state} = MD_E_STATE;          $self->{state} = MD_E_STATE;
5590          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
5591                    
# Line 5268  sub _get_next_token ($) { Line 5600  sub _get_next_token ($) {
5600      }      }
5601        
5602          redo A;          redo A;
5603        } elsif ($self->{nc} == 0x0041) { # A        } elsif ($self->{nc} == 0x0041 or # A
5604                   $self->{nc} == 0x0061) { # a
5605          $self->{state} = MD_ATTLIST_STATE;          $self->{state} = MD_ATTLIST_STATE;
5606          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
5607                    
# Line 5283  sub _get_next_token ($) { Line 5616  sub _get_next_token ($) {
5616      }      }
5617        
5618          redo A;          redo A;
5619        } elsif ($self->{nc} == 0x004E) { # N        } elsif ($self->{nc} == 0x004E or # N
5620                   $self->{nc} == 0x006E) { # n
5621          $self->{state} = MD_NOTATION_STATE;          $self->{state} = MD_NOTATION_STATE;
5622          $self->{kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
5623                    
# Line 5311  sub _get_next_token ($) { Line 5645  sub _get_next_token ($) {
5645        $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.        $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5646        redo A;        redo A;
5647      } elsif ($self->{state} == MD_E_STATE) {      } elsif ($self->{state} == MD_E_STATE) {
5648        if ($self->{nc} == 0x004E) { # N        if ($self->{nc} == 0x004E or # N
5649              $self->{nc} == 0x006E) { # n
5650          $self->{state} = MD_ENTITY_STATE;          $self->{state} = MD_ENTITY_STATE;
5651          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5652                    
# Line 5326  sub _get_next_token ($) { Line 5661  sub _get_next_token ($) {
5661      }      }
5662        
5663          redo A;          redo A;
5664        } elsif ($self->{nc} == 0x004C) { # L        } elsif ($self->{nc} == 0x004C or # L
5665                   $self->{nc} == 0x006C) { # l
5666          ## XML5: <!ELEMENT> not supported.          ## XML5: <!ELEMENT> not supported.
5667          $self->{state} = MD_ELEMENT_STATE;          $self->{state} = MD_ELEMENT_STATE;
5668          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
# Line 5354  sub _get_next_token ($) { Line 5690  sub _get_next_token ($) {
5690          redo A;          redo A;
5691        }        }
5692      } elsif ($self->{state} == MD_ENTITY_STATE) {      } elsif ($self->{state} == MD_ENTITY_STATE) {
5693        if ($self->{nc} == {        if ($self->{nc} == [
5694              'EN' => 0x0054, # T              undef,
5695              'ENT' => 0x0049, # I              undef,
5696              'ENTI' => 0x0054, # T              0x0054, # T
5697            }->{$self->{kwd}}) {              0x0049, # I
5698                0x0054, # T
5699              ]->[length $self->{kwd}] or
5700              $self->{nc} == [
5701                undef,
5702                undef,
5703                0x0074, # t
5704                0x0069, # i
5705                0x0074, # t
5706              ]->[length $self->{kwd}]) {
5707          ## Stay in the state.          ## Stay in the state.
5708          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5709                    
# Line 5373  sub _get_next_token ($) { Line 5718  sub _get_next_token ($) {
5718      }      }
5719        
5720          redo A;          redo A;
5721        } elsif ($self->{kwd} eq 'ENTIT' and        } elsif ((length $self->{kwd}) == 5 and
5722                 $self->{nc} == 0x0059) { # Y                 ($self->{nc} == 0x0059 or # Y
5723          $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',                  $self->{nc} == 0x0079)) { # y
5724            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5725              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5726                              text => 'ENTITY',
5727                              line => $self->{line_prev},
5728                              column => $self->{column_prev} - 4);
5729            }
5730            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5731                         line => $self->{line_prev},                         line => $self->{line_prev},
5732                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
5733          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
# Line 5403  sub _get_next_token ($) { Line 5755  sub _get_next_token ($) {
5755          redo A;          redo A;
5756        }        }
5757      } elsif ($self->{state} == MD_ELEMENT_STATE) {      } elsif ($self->{state} == MD_ELEMENT_STATE) {
5758        if ($self->{nc} == {        if ($self->{nc} == [
5759              'EL' => 0x0045, # E             undef,
5760              'ELE' => 0x004D, # M             undef,
5761              'ELEM' => 0x0045, # E             0x0045, # E
5762              'ELEME' => 0x004E, # N             0x004D, # M
5763            }->{$self->{kwd}}) {             0x0045, # E
5764               0x004E, # N
5765              ]->[length $self->{kwd}] or
5766              $self->{nc} == [
5767               undef,
5768               undef,
5769               0x0065, # e
5770               0x006D, # m
5771               0x0065, # e
5772               0x006E, # n
5773              ]->[length $self->{kwd}]) {
5774          ## Stay in the state.          ## Stay in the state.
5775          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5776                    
# Line 5423  sub _get_next_token ($) { Line 5785  sub _get_next_token ($) {
5785      }      }
5786        
5787          redo A;          redo A;
5788        } elsif ($self->{kwd} eq 'ELEMEN' and        } elsif ((length $self->{kwd}) == 6 and
5789                 $self->{nc} == 0x0054) { # T                 ($self->{nc} == 0x0054 or # T
5790                    $self->{nc} == 0x0074)) { # t
5791            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5792              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5793                              text => 'ELEMENT',
5794                              line => $self->{line_prev},
5795                              column => $self->{column_prev} - 5);
5796            }
5797          $self->{ct} = {type => ELEMENT_TOKEN, name => '',          $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5798                         line => $self->{line_prev},                         line => $self->{line_prev},
5799                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
# Line 5453  sub _get_next_token ($) { Line 5822  sub _get_next_token ($) {
5822          redo A;          redo A;
5823        }        }
5824      } elsif ($self->{state} == MD_ATTLIST_STATE) {      } elsif ($self->{state} == MD_ATTLIST_STATE) {
5825        if ($self->{nc} == {        if ($self->{nc} == [
5826              'A' => 0x0054, # T             undef,
5827              'AT' => 0x0054, # T             0x0054, # T
5828              'ATT' => 0x004C, # L             0x0054, # T
5829              'ATTL' => 0x0049, # I             0x004C, # L
5830              'ATTLI' => 0x0053, # S             0x0049, # I
5831            }->{$self->{kwd}}) {             0x0053, # S
5832              ]->[length $self->{kwd}] or
5833              $self->{nc} == [
5834               undef,
5835               0x0074, # t
5836               0x0074, # t
5837               0x006C, # l
5838               0x0069, # i
5839               0x0073, # s
5840              ]->[length $self->{kwd}]) {
5841          ## Stay in the state.          ## Stay in the state.
5842          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5843                    
# Line 5474  sub _get_next_token ($) { Line 5852  sub _get_next_token ($) {
5852      }      }
5853        
5854          redo A;          redo A;
5855        } elsif ($self->{kwd} eq 'ATTLIS' and        } elsif ((length $self->{kwd}) == 6 and
5856                 $self->{nc} == 0x0054) { # T                 ($self->{nc} == 0x0054 or # T
5857                    $self->{nc} == 0x0074)) { # t
5858            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
5859              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5860                              text => 'ATTLIST',
5861                              line => $self->{line_prev},
5862                              column => $self->{column_prev} - 5);
5863            }
5864          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5865                           attrdefs => [],
5866                         line => $self->{line_prev},                         line => $self->{line_prev},
5867                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
5868          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
# Line 5504  sub _get_next_token ($) { Line 5890  sub _get_next_token ($) {
5890          redo A;          redo A;
5891        }        }
5892      } elsif ($self->{state} == MD_NOTATION_STATE) {      } elsif ($self->{state} == MD_NOTATION_STATE) {
5893        if ($self->{nc} == {        if ($self->{nc} == [
5894              'N' => 0x004F, # O             undef,
5895              'NO' => 0x0054, # T             0x004F, # O
5896              'NOT' => 0x0041, # A             0x0054, # T
5897              'NOTA' => 0x0054, # T             0x0041, # A
5898              'NOTAT' => 0x0049, # I             0x0054, # T
5899              'NOTATI' => 0x004F, # O             0x0049, # I
5900            }->{$self->{kwd}}) {             0x004F, # O
5901              ]->[length $self->{kwd}] or
5902              $self->{nc} == [
5903               undef,
5904               0x006F, # o
5905               0x0074, # t
5906               0x0061, # a
5907               0x0074, # t
5908               0x0069, # i
5909               0x006F, # o
5910              ]->[length $self->{kwd}]) {
5911          ## Stay in the state.          ## Stay in the state.
5912          $self->{kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5913                    
# Line 5526  sub _get_next_token ($) { Line 5922  sub _get_next_token ($) {
5922      }      }
5923        
5924          redo A;          redo A;
5925        } elsif ($self->{kwd} eq 'NOTATIO' and        } elsif ((length $self->{kwd}) == 7 and
5926                 $self->{nc} == 0x004E) { # N                 ($self->{nc} == 0x004E or # N
5927                    $self->{nc} == 0x006E)) { # n
5928            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
5929              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5930                              text => 'NOTATION',
5931                              line => $self->{line_prev},
5932                              column => $self->{column_prev} - 6);
5933            }
5934          $self->{ct} = {type => NOTATION_TOKEN, name => '',          $self->{ct} = {type => NOTATION_TOKEN, name => '',
5935                         line => $self->{line_prev},                         line => $self->{line_prev},
5936                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
# Line 5739  sub _get_next_token ($) { Line 6142  sub _get_next_token ($) {
6142        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6143                
6144        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
6145          ## TODO:          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6146          $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6147            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6148              ## TODO: ...
6149              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6150            } else { # ENTITY/NOTATION
6151              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6152            }
6153                    
6154      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6155        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5757  sub _get_next_token ($) { Line 6166  sub _get_next_token ($) {
6166          if ($self->{ct}->{type} == ATTLIST_TOKEN) {          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6167            #            #
6168          } else {          } else {
6169            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md body'); ## TODO: type            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6170          }          }
6171          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6172                    
# Line 5831  sub _get_next_token ($) { Line 6240  sub _get_next_token ($) {
6240          ## XML5: No parse error.          ## XML5: No parse error.
6241          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6242          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6243            return  ($self->{ct});
6244            redo A;
6245          } else {
6246            ## XML5: Not defined yet.
6247            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6248                           tokens => [],
6249                           line => $self->{line}, column => $self->{column}};
6250            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6251            
6252        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6253          $self->{line_prev} = $self->{line};
6254          $self->{column_prev} = $self->{column};
6255          $self->{column}++;
6256          $self->{nc}
6257              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6258        } else {
6259          $self->{set_nc}->($self);
6260        }
6261      
6262            redo A;
6263          }
6264        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6265          if ($is_space->{$self->{nc}}) {
6266            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6267            
6268        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6269          $self->{line_prev} = $self->{line};
6270          $self->{column_prev} = $self->{column};
6271          $self->{column}++;
6272          $self->{nc}
6273              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6274        } else {
6275          $self->{set_nc}->($self);
6276        }
6277      
6278            redo A;
6279          } elsif ($self->{nc} == 0x003E) { # >
6280            ## XML5: Same as "anything else".
6281            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6282            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6283            
6284        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6285          $self->{line_prev} = $self->{line};
6286          $self->{column_prev} = $self->{column};
6287          $self->{column}++;
6288          $self->{nc}
6289              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6290        } else {
6291          $self->{set_nc}->($self);
6292        }
6293      
6294            return  ($self->{ct}); # ATTLIST
6295            redo A;
6296          } elsif ($self->{nc} == 0x0028) { # (
6297            ## XML5: Same as "anything else".
6298            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6299            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6300            
6301        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6302          $self->{line_prev} = $self->{line};
6303          $self->{column_prev} = $self->{column};
6304          $self->{column}++;
6305          $self->{nc}
6306              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6307        } else {
6308          $self->{set_nc}->($self);
6309        }
6310      
6311            redo A;
6312          } elsif ($self->{nc} == -1) {
6313            ## XML5: No parse error.
6314            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6315            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6316            
6317        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6318          $self->{line_prev} = $self->{line};
6319          $self->{column_prev} = $self->{column};
6320          $self->{column}++;
6321          $self->{nc}
6322              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6323        } else {
6324          $self->{set_nc}->($self);
6325        }
6326      
6327            return  ($self->{ct}); # ATTLIST
6328            redo A;
6329          } else {
6330            ## XML5: Not defined yet.
6331            $self->{ca}->{name} .= chr $self->{nc};
6332            ## Stay in the state.
6333            
6334        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6335          $self->{line_prev} = $self->{line};
6336          $self->{column_prev} = $self->{column};
6337          $self->{column}++;
6338          $self->{nc}
6339              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6340        } else {
6341          $self->{set_nc}->($self);
6342        }
6343      
6344            redo A;
6345          }
6346        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6347          if ($is_space->{$self->{nc}}) {
6348            ## Stay in the state.
6349            
6350        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6351          $self->{line_prev} = $self->{line};
6352          $self->{column_prev} = $self->{column};
6353          $self->{column}++;
6354          $self->{nc}
6355              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6356        } else {
6357          $self->{set_nc}->($self);
6358        }
6359      
6360            redo A;
6361          } elsif ($self->{nc} == 0x003E) { # >
6362            ## XML5: Same as "anything else".
6363            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6364            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6365            
6366        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6367          $self->{line_prev} = $self->{line};
6368          $self->{column_prev} = $self->{column};
6369          $self->{column}++;
6370          $self->{nc}
6371              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6372        } else {
6373          $self->{set_nc}->($self);
6374        }
6375      
6376            return  ($self->{ct}); # ATTLIST
6377            redo A;
6378          } elsif ($self->{nc} == 0x0028) { # (
6379            ## XML5: Same as "anything else".
6380            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6381            
6382        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6383          $self->{line_prev} = $self->{line};
6384          $self->{column_prev} = $self->{column};
6385          $self->{column}++;
6386          $self->{nc}
6387              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6388        } else {
6389          $self->{set_nc}->($self);
6390        }
6391      
6392            redo A;
6393          } elsif ($self->{nc} == -1) {
6394            ## XML5: No parse error.
6395            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6396            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6397            
6398        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6399          $self->{line_prev} = $self->{line};
6400          $self->{column_prev} = $self->{column};
6401          $self->{column}++;
6402          $self->{nc}
6403              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6404        } else {
6405          $self->{set_nc}->($self);
6406        }
6407      
6408            return  ($self->{ct});
6409            redo A;
6410          } else {
6411            ## XML5: Not defined yet.
6412            $self->{ca}->{type} = chr $self->{nc};
6413            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6414            
6415        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6416          $self->{line_prev} = $self->{line};
6417          $self->{column_prev} = $self->{column};
6418          $self->{column}++;
6419          $self->{nc}
6420              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6421        } else {
6422          $self->{set_nc}->($self);
6423        }
6424      
6425            redo A;
6426          }
6427        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6428          if ($is_space->{$self->{nc}}) {
6429            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6430            
6431        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6432          $self->{line_prev} = $self->{line};
6433          $self->{column_prev} = $self->{column};
6434          $self->{column}++;
6435          $self->{nc}
6436              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6437        } else {
6438          $self->{set_nc}->($self);
6439        }
6440      
6441            redo A;
6442          } elsif ($self->{nc} == 0x0023) { # #
6443            ## XML5: Same as "anything else".
6444            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6445            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6446            
6447        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6448          $self->{line_prev} = $self->{line};
6449          $self->{column_prev} = $self->{column};
6450          $self->{column}++;
6451          $self->{nc}
6452              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6453        } else {
6454          $self->{set_nc}->($self);
6455        }
6456      
6457            redo A;
6458          } elsif ($self->{nc} == 0x0022) { # "
6459            ## XML5: Same as "anything else".
6460            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6461            $self->{ca}->{value} = '';
6462            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6463            
6464        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6465          $self->{line_prev} = $self->{line};
6466          $self->{column_prev} = $self->{column};
6467          $self->{column}++;
6468          $self->{nc}
6469              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6470        } else {
6471          $self->{set_nc}->($self);
6472        }
6473      
6474            redo A;
6475          } elsif ($self->{nc} == 0x0027) { # '
6476            ## XML5: Same as "anything else".
6477            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6478            $self->{ca}->{value} = '';
6479            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6480            
6481        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6482          $self->{line_prev} = $self->{line};
6483          $self->{column_prev} = $self->{column};
6484          $self->{column}++;
6485          $self->{nc}
6486              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6487        } else {
6488          $self->{set_nc}->($self);
6489        }
6490      
6491            redo A;
6492          } elsif ($self->{nc} == 0x003E) { # >
6493            ## XML5: Same as "anything else".
6494            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6495            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6496            
6497        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6498          $self->{line_prev} = $self->{line};
6499          $self->{column_prev} = $self->{column};
6500          $self->{column}++;
6501          $self->{nc}
6502              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6503        } else {
6504          $self->{set_nc}->($self);
6505        }
6506      
6507            return  ($self->{ct}); # ATTLIST
6508            redo A;
6509          } elsif ($self->{nc} == 0x0028) { # (
6510            ## XML5: Same as "anything else".
6511            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6512            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6513            
6514        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6515          $self->{line_prev} = $self->{line};
6516          $self->{column_prev} = $self->{column};
6517          $self->{column}++;
6518          $self->{nc}
6519              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6520        } else {
6521          $self->{set_nc}->($self);
6522        }
6523      
6524            redo A;
6525          } elsif ($self->{nc} == -1) {
6526            ## XML5: No parse error.
6527            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6528            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6529            
6530        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6531          $self->{line_prev} = $self->{line};
6532          $self->{column_prev} = $self->{column};
6533          $self->{column}++;
6534          $self->{nc}
6535              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6536        } else {
6537          $self->{set_nc}->($self);
6538        }
6539      
6540            return  ($self->{ct});
6541            redo A;
6542          } else {
6543            ## XML5: Not defined yet.
6544            $self->{ca}->{type} .= chr $self->{nc};
6545            ## Stay in the state.
6546            
6547        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6548          $self->{line_prev} = $self->{line};
6549          $self->{column_prev} = $self->{column};
6550          $self->{column}++;
6551          $self->{nc}
6552              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6553        } else {
6554          $self->{set_nc}->($self);
6555        }
6556      
6557            redo A;
6558          }
6559        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6560          if ($is_space->{$self->{nc}}) {
6561            ## Stay in the state.
6562            
6563        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6564          $self->{line_prev} = $self->{line};
6565          $self->{column_prev} = $self->{column};
6566          $self->{column}++;
6567          $self->{nc}
6568              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6569        } else {
6570          $self->{set_nc}->($self);
6571        }
6572      
6573            redo A;
6574          } elsif ($self->{nc} == 0x0028) { # (
6575            ## XML5: Same as "anything else".
6576            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6577            
6578        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6579          $self->{line_prev} = $self->{line};
6580          $self->{column_prev} = $self->{column};
6581          $self->{column}++;
6582          $self->{nc}
6583              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6584        } else {
6585          $self->{set_nc}->($self);
6586        }
6587      
6588            redo A;
6589          } elsif ($self->{nc} == 0x0023) { # #
6590            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6591            
6592        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6593          $self->{line_prev} = $self->{line};
6594          $self->{column_prev} = $self->{column};
6595          $self->{column}++;
6596          $self->{nc}
6597              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6598        } else {
6599          $self->{set_nc}->($self);
6600        }
6601      
6602            redo A;
6603          } elsif ($self->{nc} == 0x0022) { # "
6604            ## XML5: Same as "anything else".
6605            $self->{ca}->{value} = '';
6606            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6607            
6608        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6609          $self->{line_prev} = $self->{line};
6610          $self->{column_prev} = $self->{column};
6611          $self->{column}++;
6612          $self->{nc}
6613              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6614        } else {
6615          $self->{set_nc}->($self);
6616        }
6617      
6618            redo A;
6619          } elsif ($self->{nc} == 0x0027) { # '
6620            ## XML5: Same as "anything else".
6621            $self->{ca}->{value} = '';
6622            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6623            
6624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6625          $self->{line_prev} = $self->{line};
6626          $self->{column_prev} = $self->{column};
6627          $self->{column}++;
6628          $self->{nc}
6629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6630        } else {
6631          $self->{set_nc}->($self);
6632        }
6633      
6634            redo A;
6635          } elsif ($self->{nc} == 0x003E) { # >
6636            ## XML5: Same as "anything else".
6637            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6638            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6639            
6640        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6641          $self->{line_prev} = $self->{line};
6642          $self->{column_prev} = $self->{column};
6643          $self->{column}++;
6644          $self->{nc}
6645              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6646        } else {
6647          $self->{set_nc}->($self);
6648        }
6649      
6650            return  ($self->{ct}); # ATTLIST
6651            redo A;
6652          } elsif ($self->{nc} == -1) {
6653            ## XML5: No parse error.
6654            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6655            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6656            
6657        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6658          $self->{line_prev} = $self->{line};
6659          $self->{column_prev} = $self->{column};
6660          $self->{column}++;
6661          $self->{nc}
6662              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6663        } else {
6664          $self->{set_nc}->($self);
6665        }
6666      
6667            return  ($self->{ct});
6668            redo A;
6669          } else {
6670            ## XML5: Switch to the "DOCTYPE bogus comment state".
6671            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6672            $self->{ca}->{value} = '';
6673            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6674            ## Reconsume.
6675            redo A;
6676          }
6677        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6678          if ($is_space->{$self->{nc}}) {
6679            ## Stay in the state.
6680            
6681        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6682          $self->{line_prev} = $self->{line};
6683          $self->{column_prev} = $self->{column};
6684          $self->{column}++;
6685          $self->{nc}
6686              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6687        } else {
6688          $self->{set_nc}->($self);
6689        }
6690      
6691            redo A;
6692          } elsif ($self->{nc} == 0x007C) { # |
6693            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6694            ## Stay in the state.
6695            
6696        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6697          $self->{line_prev} = $self->{line};
6698          $self->{column_prev} = $self->{column};
6699          $self->{column}++;
6700          $self->{nc}
6701              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6702        } else {
6703          $self->{set_nc}->($self);
6704        }
6705      
6706            redo A;
6707          } elsif ($self->{nc} == 0x0029) { # )
6708            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6709            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6710            
6711        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6712          $self->{line_prev} = $self->{line};
6713          $self->{column_prev} = $self->{column};
6714          $self->{column}++;
6715          $self->{nc}
6716              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6717        } else {
6718          $self->{set_nc}->($self);
6719        }
6720      
6721            redo A;
6722          } elsif ($self->{nc} == 0x003E) { # >
6723            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6724            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6725            
6726        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6727          $self->{line_prev} = $self->{line};
6728          $self->{column_prev} = $self->{column};
6729          $self->{column}++;
6730          $self->{nc}
6731              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6732        } else {
6733          $self->{set_nc}->($self);
6734        }
6735      
6736            return  ($self->{ct}); # ATTLIST
6737            redo A;
6738          } elsif ($self->{nc} == -1) {
6739            ## XML5: No parse error.
6740            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6741            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6742            
6743        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6744          $self->{line_prev} = $self->{line};
6745          $self->{column_prev} = $self->{column};
6746          $self->{column}++;
6747          $self->{nc}
6748              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6749        } else {
6750          $self->{set_nc}->($self);
6751        }
6752      
6753            return  ($self->{ct});
6754            redo A;
6755          } else {
6756            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6757            $self->{state} = ALLOWED_TOKEN_STATE;
6758            
6759        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6760          $self->{line_prev} = $self->{line};
6761          $self->{column_prev} = $self->{column};
6762          $self->{column}++;
6763          $self->{nc}
6764              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6765        } else {
6766          $self->{set_nc}->($self);
6767        }
6768      
6769            redo A;
6770          }
6771        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6772          if ($is_space->{$self->{nc}}) {
6773            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6774            
6775        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6776          $self->{line_prev} = $self->{line};
6777          $self->{column_prev} = $self->{column};
6778          $self->{column}++;
6779          $self->{nc}
6780              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6781        } else {
6782          $self->{set_nc}->($self);
6783        }
6784      
6785            redo A;
6786          } elsif ($self->{nc} == 0x007C) { # |
6787            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6788            
6789        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6790          $self->{line_prev} = $self->{line};
6791          $self->{column_prev} = $self->{column};
6792          $self->{column}++;
6793          $self->{nc}
6794              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6795        } else {
6796          $self->{set_nc}->($self);
6797        }
6798      
6799            redo A;
6800          } elsif ($self->{nc} == 0x0029) { # )
6801            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6802            
6803        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6804          $self->{line_prev} = $self->{line};
6805          $self->{column_prev} = $self->{column};
6806          $self->{column}++;
6807          $self->{nc}
6808              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6809        } else {
6810          $self->{set_nc}->($self);
6811        }
6812      
6813            redo A;
6814          } elsif ($self->{nc} == 0x003E) { # >
6815            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6816            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6817            
6818        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6819          $self->{line_prev} = $self->{line};
6820          $self->{column_prev} = $self->{column};
6821          $self->{column}++;
6822          $self->{nc}
6823              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6824        } else {
6825          $self->{set_nc}->($self);
6826        }
6827      
6828            return  ($self->{ct}); # ATTLIST
6829            redo A;
6830          } elsif ($self->{nc} == -1) {
6831            ## XML5: No parse error.
6832            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6833            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6834            
6835        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6836          $self->{line_prev} = $self->{line};
6837          $self->{column_prev} = $self->{column};
6838          $self->{column}++;
6839          $self->{nc}
6840              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6841        } else {
6842          $self->{set_nc}->($self);
6843        }
6844      
6845            return  ($self->{ct});
6846            redo A;
6847          } else {
6848            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6849            ## Stay in the state.
6850            
6851        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6852          $self->{line_prev} = $self->{line};
6853          $self->{column_prev} = $self->{column};
6854          $self->{column}++;
6855          $self->{nc}
6856              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6857        } else {
6858          $self->{set_nc}->($self);
6859        }
6860      
6861            redo A;
6862          }
6863        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6864          if ($is_space->{$self->{nc}}) {
6865            ## Stay in the state.
6866            
6867        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6868          $self->{line_prev} = $self->{line};
6869          $self->{column_prev} = $self->{column};
6870          $self->{column}++;
6871          $self->{nc}
6872              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6873        } else {
6874          $self->{set_nc}->($self);
6875        }
6876      
6877            redo A;
6878          } elsif ($self->{nc} == 0x007C) { # |
6879            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6880            
6881        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6882          $self->{line_prev} = $self->{line};
6883          $self->{column_prev} = $self->{column};
6884          $self->{column}++;
6885          $self->{nc}
6886              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6887        } else {
6888          $self->{set_nc}->($self);
6889        }
6890      
6891            redo A;
6892          } elsif ($self->{nc} == 0x0029) { # )
6893            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6894            
6895        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6896          $self->{line_prev} = $self->{line};
6897          $self->{column_prev} = $self->{column};
6898          $self->{column}++;
6899          $self->{nc}
6900              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6901        } else {
6902          $self->{set_nc}->($self);
6903        }
6904      
6905            redo A;
6906          } elsif ($self->{nc} == 0x003E) { # >
6907            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6908            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6909            
6910        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6911          $self->{line_prev} = $self->{line};
6912          $self->{column_prev} = $self->{column};
6913          $self->{column}++;
6914          $self->{nc}
6915              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6916        } else {
6917          $self->{set_nc}->($self);
6918        }
6919      
6920            return  ($self->{ct}); # ATTLIST
6921            redo A;
6922          } elsif ($self->{nc} == -1) {
6923            ## XML5: No parse error.
6924            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6925            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6926            
6927        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6928          $self->{line_prev} = $self->{line};
6929          $self->{column_prev} = $self->{column};
6930          $self->{column}++;
6931          $self->{nc}
6932              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6933        } else {
6934          $self->{set_nc}->($self);
6935        }
6936      
6937            return  ($self->{ct});
6938            redo A;
6939          } else {
6940            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
6941                            line => $self->{line_prev},
6942                            column => $self->{column_prev});
6943            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
6944            $self->{state} = ALLOWED_TOKEN_STATE;
6945            
6946        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6947          $self->{line_prev} = $self->{line};
6948          $self->{column_prev} = $self->{column};
6949          $self->{column}++;
6950          $self->{nc}
6951              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6952        } else {
6953          $self->{set_nc}->($self);
6954        }
6955      
6956            redo A;
6957          }
6958        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
6959          if ($is_space->{$self->{nc}}) {
6960            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
6961            
6962        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6963          $self->{line_prev} = $self->{line};
6964          $self->{column_prev} = $self->{column};
6965          $self->{column}++;
6966          $self->{nc}
6967              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6968        } else {
6969          $self->{set_nc}->($self);
6970        }
6971      
6972            redo A;
6973          } elsif ($self->{nc} == 0x0023) { # #
6974            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6975            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6976            
6977        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6978          $self->{line_prev} = $self->{line};
6979          $self->{column_prev} = $self->{column};
6980          $self->{column}++;
6981          $self->{nc}
6982              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6983        } else {
6984          $self->{set_nc}->($self);
6985        }
6986      
6987            redo A;
6988          } elsif ($self->{nc} == 0x0022) { # "
6989            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6990            $self->{ca}->{value} = '';
6991            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6992            
6993        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6994          $self->{line_prev} = $self->{line};
6995          $self->{column_prev} = $self->{column};
6996          $self->{column}++;
6997          $self->{nc}
6998              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6999        } else {
7000          $self->{set_nc}->($self);
7001        }
7002      
7003            redo A;
7004          } elsif ($self->{nc} == 0x0027) { # '
7005            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7006            $self->{ca}->{value} = '';
7007            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7008            
7009        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7010          $self->{line_prev} = $self->{line};
7011          $self->{column_prev} = $self->{column};
7012          $self->{column}++;
7013          $self->{nc}
7014              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7015        } else {
7016          $self->{set_nc}->($self);
7017        }
7018      
7019            redo A;
7020          } elsif ($self->{nc} == 0x003E) { # >
7021            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7022            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7023            
7024        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7025          $self->{line_prev} = $self->{line};
7026          $self->{column_prev} = $self->{column};
7027          $self->{column}++;
7028          $self->{nc}
7029              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7030        } else {
7031          $self->{set_nc}->($self);
7032        }
7033      
7034            return  ($self->{ct}); # ATTLIST
7035            redo A;
7036          } elsif ($self->{nc} == -1) {
7037            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7038            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7039            
7040        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7041          $self->{line_prev} = $self->{line};
7042          $self->{column_prev} = $self->{column};
7043          $self->{column}++;
7044          $self->{nc}
7045              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7046        } else {
7047          $self->{set_nc}->($self);
7048        }
7049      
7050            return  ($self->{ct});
7051            redo A;
7052          } else {
7053            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7054            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7055            ## Reconsume.
7056            redo A;
7057          }
7058        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7059          if ($is_space->{$self->{nc}}) {
7060            ## Stay in the state.
7061            
7062        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7063          $self->{line_prev} = $self->{line};
7064          $self->{column_prev} = $self->{column};
7065          $self->{column}++;
7066          $self->{nc}
7067              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7068        } else {
7069          $self->{set_nc}->($self);
7070        }
7071      
7072            redo A;
7073          } elsif ($self->{nc} == 0x0023) { # #
7074            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7075            
7076        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7077          $self->{line_prev} = $self->{line};
7078          $self->{column_prev} = $self->{column};
7079          $self->{column}++;
7080          $self->{nc}
7081              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7082        } else {
7083          $self->{set_nc}->($self);
7084        }
7085      
7086            redo A;
7087          } elsif ($self->{nc} == 0x0022) { # "
7088            $self->{ca}->{value} = '';
7089            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7090            
7091        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7092          $self->{line_prev} = $self->{line};
7093          $self->{column_prev} = $self->{column};
7094          $self->{column}++;
7095          $self->{nc}
7096              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7097        } else {
7098          $self->{set_nc}->($self);
7099        }
7100      
7101            redo A;
7102          } elsif ($self->{nc} == 0x0027) { # '
7103            $self->{ca}->{value} = '';
7104            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7105            
7106        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7107          $self->{line_prev} = $self->{line};
7108          $self->{column_prev} = $self->{column};
7109          $self->{column}++;
7110          $self->{nc}
7111              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7112        } else {
7113          $self->{set_nc}->($self);
7114        }
7115      
7116            redo A;
7117          } elsif ($self->{nc} == 0x003E) { # >
7118            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7119            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7120            
7121        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7122          $self->{line_prev} = $self->{line};
7123          $self->{column_prev} = $self->{column};
7124          $self->{column}++;
7125          $self->{nc}
7126              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7127        } else {
7128          $self->{set_nc}->($self);
7129        }
7130      
7131            return  ($self->{ct}); # ATTLIST
7132            redo A;
7133          } elsif ($self->{nc} == -1) {
7134            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7135            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7136            
7137        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7138          $self->{line_prev} = $self->{line};
7139          $self->{column_prev} = $self->{column};
7140          $self->{column}++;
7141          $self->{nc}
7142              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7143        } else {
7144          $self->{set_nc}->($self);
7145        }
7146      
7147            return  ($self->{ct});
7148            redo A;
7149          } else {
7150            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7151            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7152            ## Reconsume.
7153            redo A;
7154          }
7155        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7156          if ($is_space->{$self->{nc}}) {
7157            ## XML5: No parse error.
7158            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7159            $self->{state} = BOGUS_MD_STATE;
7160            ## Reconsume.
7161            redo A;
7162          } elsif ($self->{nc} == 0x0022) { # "
7163            ## XML5: Same as "anything else".
7164            $self->{ca}->{value} = '';
7165            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7166            
7167        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7168          $self->{line_prev} = $self->{line};
7169          $self->{column_prev} = $self->{column};
7170          $self->{column}++;
7171          $self->{nc}
7172              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7173        } else {
7174          $self->{set_nc}->($self);
7175        }
7176      
7177            redo A;
7178          } elsif ($self->{nc} == 0x0027) { # '
7179            ## XML5: Same as "anything else".
7180            $self->{ca}->{value} = '';
7181            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7182            
7183        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7184          $self->{line_prev} = $self->{line};
7185          $self->{column_prev} = $self->{column};
7186          $self->{column}++;
7187          $self->{nc}
7188              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7189        } else {
7190          $self->{set_nc}->($self);
7191        }
7192      
7193            redo A;
7194          } elsif ($self->{nc} == 0x003E) { # >
7195            ## XML5: Same as "anything else".
7196            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7197            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7198            
7199        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7200          $self->{line_prev} = $self->{line};
7201          $self->{column_prev} = $self->{column};
7202          $self->{column}++;
7203          $self->{nc}
7204              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7205        } else {
7206          $self->{set_nc}->($self);
7207        }
7208      
7209            return  ($self->{ct}); # ATTLIST
7210            redo A;
7211          } elsif ($self->{nc} == -1) {
7212            ## XML5: No parse error.
7213            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7214            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7215            
7216        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7217          $self->{line_prev} = $self->{line};
7218          $self->{column_prev} = $self->{column};
7219          $self->{column}++;
7220          $self->{nc}
7221              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7222        } else {
7223          $self->{set_nc}->($self);
7224        }
7225      
7226            return  ($self->{ct});
7227            redo A;
7228          } else {
7229            $self->{ca}->{default} = chr $self->{nc};
7230            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7231            
7232        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7233          $self->{line_prev} = $self->{line};
7234          $self->{column_prev} = $self->{column};
7235          $self->{column}++;
7236          $self->{nc}
7237              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7238        } else {
7239          $self->{set_nc}->($self);
7240        }
7241      
7242            redo A;
7243          }
7244        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7245          if ($is_space->{$self->{nc}}) {
7246            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7247            
7248        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7249          $self->{line_prev} = $self->{line};
7250          $self->{column_prev} = $self->{column};
7251          $self->{column}++;
7252          $self->{nc}
7253              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7254        } else {
7255          $self->{set_nc}->($self);
7256        }
7257      
7258            redo A;
7259          } elsif ($self->{nc} == 0x0022) { # "
7260            ## XML5: Same as "anything else".
7261            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7262            $self->{ca}->{value} = '';
7263            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7264            
7265        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7266          $self->{line_prev} = $self->{line};
7267          $self->{column_prev} = $self->{column};
7268          $self->{column}++;
7269          $self->{nc}
7270              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7271        } else {
7272          $self->{set_nc}->($self);
7273        }
7274      
7275            redo A;
7276          } elsif ($self->{nc} == 0x0027) { # '
7277            ## XML5: Same as "anything else".
7278            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7279            $self->{ca}->{value} = '';
7280            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7281            
7282        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7283          $self->{line_prev} = $self->{line};
7284          $self->{column_prev} = $self->{column};
7285          $self->{column}++;
7286          $self->{nc}
7287              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7288        } else {
7289          $self->{set_nc}->($self);
7290        }
7291      
7292            redo A;
7293          } elsif ($self->{nc} == 0x003E) { # >
7294            ## XML5: Same as "anything else".
7295            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7296            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7297            
7298        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7299          $self->{line_prev} = $self->{line};
7300          $self->{column_prev} = $self->{column};
7301          $self->{column}++;
7302          $self->{nc}
7303              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7304        } else {
7305          $self->{set_nc}->($self);
7306        }
7307      
7308            return  ($self->{ct}); # ATTLIST
7309            redo A;
7310          } elsif ($self->{nc} == -1) {
7311            ## XML5: No parse error.
7312            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7313            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7314            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7315            
7316        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7317          $self->{line_prev} = $self->{line};
7318          $self->{column_prev} = $self->{column};
7319          $self->{column}++;
7320          $self->{nc}
7321              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7322        } else {
7323          $self->{set_nc}->($self);
7324        }
7325      
7326            return  ($self->{ct});
7327            redo A;
7328          } else {
7329            $self->{ca}->{default} .= chr $self->{nc};
7330            ## Stay in the state.
7331            
7332        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7333          $self->{line_prev} = $self->{line};
7334          $self->{column_prev} = $self->{column};
7335          $self->{column}++;
7336          $self->{nc}
7337              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7338        } else {
7339          $self->{set_nc}->($self);
7340        }
7341      
7342            redo A;
7343          }
7344        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7345          if ($is_space->{$self->{nc}}) {
7346            ## Stay in the state.
7347            
7348        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7349          $self->{line_prev} = $self->{line};
7350          $self->{column_prev} = $self->{column};
7351          $self->{column}++;
7352          $self->{nc}
7353              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7354        } else {
7355          $self->{set_nc}->($self);
7356        }
7357      
7358            redo A;
7359          } elsif ($self->{nc} == 0x0022) { # "
7360            $self->{ca}->{value} = '';
7361            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7362            
7363        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7364          $self->{line_prev} = $self->{line};
7365          $self->{column_prev} = $self->{column};
7366          $self->{column}++;
7367          $self->{nc}
7368              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7369        } else {
7370          $self->{set_nc}->($self);
7371        }
7372      
7373            redo A;
7374          } elsif ($self->{nc} == 0x0027) { # '
7375            $self->{ca}->{value} = '';
7376            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7377            
7378        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7379          $self->{line_prev} = $self->{line};
7380          $self->{column_prev} = $self->{column};
7381          $self->{column}++;
7382          $self->{nc}
7383              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7384        } else {
7385          $self->{set_nc}->($self);
7386        }
7387      
7388            redo A;
7389          } elsif ($self->{nc} == 0x003E) { # >
7390            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7391            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7392            
7393        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7394          $self->{line_prev} = $self->{line};
7395          $self->{column_prev} = $self->{column};
7396          $self->{column}++;
7397          $self->{nc}
7398              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7399        } else {
7400          $self->{set_nc}->($self);
7401        }
7402      
7403            return  ($self->{ct}); # ATTLIST
7404            redo A;
7405          } elsif ($self->{nc} == -1) {
7406            ## XML5: No parse error.
7407            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7408            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7409            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7410            
7411        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7412          $self->{line_prev} = $self->{line};
7413          $self->{column_prev} = $self->{column};
7414          $self->{column}++;
7415          $self->{nc}
7416              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7417        } else {
7418          $self->{set_nc}->($self);
7419        }
7420      
7421            return  ($self->{ct});
7422          redo A;          redo A;
7423        } else {        } else {
7424          ## XML5: Not defined yet.          ## XML5: Not defined yet.
7425            if ($self->{ca}->{default} eq 'FIXED') {
7426              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7427            } else {
7428              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7429              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7430            }
7431            ## Reconsume.
7432            redo A;
7433          }
7434        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7435          if ($is_space->{$self->{nc}} or
7436              $self->{nc} == -1 or
7437              $self->{nc} == 0x003E) { # >
7438            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7439            ## Reconsume.
7440            redo A;
7441          } else {
7442            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7443            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7444            ## Reconsume.
7445            redo A;
7446          }
7447        } elsif ($self->{state} == NDATA_STATE) {
7448          ## ASCII case-insensitive
7449          if ($self->{nc} == [
7450                undef,
7451                0x0044, # D
7452                0x0041, # A
7453                0x0054, # T
7454              ]->[length $self->{kwd}] or
7455              $self->{nc} == [
7456                undef,
7457                0x0064, # d
7458                0x0061, # a
7459                0x0074, # t
7460              ]->[length $self->{kwd}]) {
7461            
7462            ## Stay in the state.
7463            $self->{kwd} .= chr $self->{nc};
7464            
7465        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7466          $self->{line_prev} = $self->{line};
7467          $self->{column_prev} = $self->{column};
7468          $self->{column}++;
7469          $self->{nc}
7470              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7471        } else {
7472          $self->{set_nc}->($self);
7473        }
7474      
7475            redo A;
7476          } elsif ((length $self->{kwd}) == 4 and
7477                   ($self->{nc} == 0x0041 or # A
7478                    $self->{nc} == 0x0061)) { # a
7479            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7480              
7481              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7482                              text => 'NDATA',
7483                              line => $self->{line_prev},
7484                              column => $self->{column_prev} - 4);
7485            } else {
7486              
7487            }
7488            $self->{state} = AFTER_NDATA_STATE;
7489            
7490        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7491          $self->{line_prev} = $self->{line};
7492          $self->{column_prev} = $self->{column};
7493          $self->{column}++;
7494          $self->{nc}
7495              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7496        } else {
7497          $self->{set_nc}->($self);
7498        }
7499      
7500            redo A;
7501          } else {
7502            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7503                            line => $self->{line_prev},
7504                            column => $self->{column_prev} + 1
7505                                - length $self->{kwd});
7506            
7507            $self->{state} = BOGUS_MD_STATE;
7508            ## Reconsume.
7509            redo A;
7510          }
7511        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7512          if ($is_space->{$self->{nc}}) {
7513            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7514            
7515        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7516          $self->{line_prev} = $self->{line};
7517          $self->{column_prev} = $self->{column};
7518          $self->{column}++;
7519          $self->{nc}
7520              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7521        } else {
7522          $self->{set_nc}->($self);
7523        }
7524      
7525            redo A;
7526          } elsif ($self->{nc} == 0x003E) { # >
7527            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7528            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7529            
7530        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7531          $self->{line_prev} = $self->{line};
7532          $self->{column_prev} = $self->{column};
7533          $self->{column}++;
7534          $self->{nc}
7535              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7536        } else {
7537          $self->{set_nc}->($self);
7538        }
7539      
7540            return  ($self->{ct}); # ENTITY
7541            redo A;
7542          } elsif ($self->{nc} == -1) {
7543            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7544            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7545            
7546        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7547          $self->{line_prev} = $self->{line};
7548          $self->{column_prev} = $self->{column};
7549          $self->{column}++;
7550          $self->{nc}
7551              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7552        } else {
7553          $self->{set_nc}->($self);
7554        }
7555      
7556            return  ($self->{ct}); # ENTITY
7557            redo A;
7558          } else {
7559            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7560                            line => $self->{line_prev},
7561                            column => $self->{column_prev} + 1
7562                                - length $self->{kwd});
7563            $self->{state} = BOGUS_MD_STATE;
7564            ## Reconsume.
7565            redo A;
7566          }
7567        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7568          if ($is_space->{$self->{nc}}) {
7569            ## Stay in the state.
7570            
7571        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7572          $self->{line_prev} = $self->{line};
7573          $self->{column_prev} = $self->{column};
7574          $self->{column}++;
7575          $self->{nc}
7576              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7577        } else {
7578          $self->{set_nc}->($self);
7579        }
7580      
7581            redo A;
7582          } elsif ($self->{nc} == 0x003E) { # >
7583            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7584            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7585            
7586        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7587          $self->{line_prev} = $self->{line};
7588          $self->{column_prev} = $self->{column};
7589          $self->{column}++;
7590          $self->{nc}
7591              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7592        } else {
7593          $self->{set_nc}->($self);
7594        }
7595      
7596            return  ($self->{ct}); # ENTITY
7597            redo A;
7598          } elsif ($self->{nc} == -1) {
7599            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7600            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7601            
7602        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7603          $self->{line_prev} = $self->{line};
7604          $self->{column_prev} = $self->{column};
7605          $self->{column}++;
7606          $self->{nc}
7607              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7608        } else {
7609          $self->{set_nc}->($self);
7610        }
7611      
7612            return  ($self->{ct}); # ENTITY
7613            redo A;
7614          } else {
7615            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7616            $self->{state} = NOTATION_NAME_STATE;
7617            
7618        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7619          $self->{line_prev} = $self->{line};
7620          $self->{column_prev} = $self->{column};
7621          $self->{column}++;
7622          $self->{nc}
7623              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7624        } else {
7625          $self->{set_nc}->($self);
7626        }
7627      
7628            redo A;
7629          }
7630        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7631          if ($is_space->{$self->{nc}}) {
7632            $self->{state} = AFTER_NOTATION_NAME_STATE;
7633            
7634        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7635          $self->{line_prev} = $self->{line};
7636          $self->{column_prev} = $self->{column};
7637          $self->{column}++;
7638          $self->{nc}
7639              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7640        } else {
7641          $self->{set_nc}->($self);
7642        }
7643      
7644            redo A;
7645          } elsif ($self->{nc} == 0x003E) { # >
7646            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7647            
7648        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7649          $self->{line_prev} = $self->{line};
7650          $self->{column_prev} = $self->{column};
7651          $self->{column}++;
7652          $self->{nc}
7653              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7654        } else {
7655          $self->{set_nc}->($self);
7656        }
7657      
7658            return  ($self->{ct}); # ENTITY
7659            redo A;
7660          } elsif ($self->{nc} == -1) {
7661            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7662            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7663            
7664        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7665          $self->{line_prev} = $self->{line};
7666          $self->{column_prev} = $self->{column};
7667          $self->{column}++;
7668          $self->{nc}
7669              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7670        } else {
7671          $self->{set_nc}->($self);
7672        }
7673      
7674            return  ($self->{ct}); # ENTITY
7675            redo A;
7676          } else {
7677            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7678            ## Stay in the state.
7679            
7680        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7681          $self->{line_prev} = $self->{line};
7682          $self->{column_prev} = $self->{column};
7683          $self->{column}++;
7684          $self->{nc}
7685              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7686        } else {
7687          $self->{set_nc}->($self);
7688        }
7689      
7690            redo A;
7691          }
7692        } elsif ($self->{state} == AFTER_NOTATION_NAME_STATE) {
7693          if ($is_space->{$self->{nc}}) {
7694            ## Stay in the state.
7695            
7696        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7697          $self->{line_prev} = $self->{line};
7698          $self->{column_prev} = $self->{column};
7699          $self->{column}++;
7700          $self->{nc}
7701              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7702        } else {
7703          $self->{set_nc}->($self);
7704        }
7705      
7706            redo A;
7707          } elsif ($self->{nc} == 0x003E) { # >
7708            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7709            
7710        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7711          $self->{line_prev} = $self->{line};
7712          $self->{column_prev} = $self->{column};
7713          $self->{column}++;
7714          $self->{nc}
7715              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7716        } else {
7717          $self->{set_nc}->($self);
7718        }
7719      
7720            return  ($self->{ct}); # ENTITY
7721            redo A;
7722          } elsif ($self->{nc} == -1) {
7723            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7724            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7725            
7726        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7727          $self->{line_prev} = $self->{line};
7728          $self->{column_prev} = $self->{column};
7729          $self->{column}++;
7730          $self->{nc}
7731              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7732        } else {
7733          $self->{set_nc}->($self);
7734        }
7735      
7736            return  ($self->{ct}); # ENTITY
7737            redo A;
7738          } else {
7739            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after notation name'); ## TODO: type
7740            $self->{state} = BOGUS_MD_STATE;
7741            ## Reconsume.
7742            redo A;
7743          }
7744    
         ## TODO: ...  
7745    
7746          $self->{state} = BOGUS_COMMENT_STATE;      } elsif ($self->{state} == BOGUS_MD_STATE) {
7747          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded        if ($self->{nc} == 0x003E) { # >
7748            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7749            
7750        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7751          $self->{line_prev} = $self->{line};
7752          $self->{column_prev} = $self->{column};
7753          $self->{column}++;
7754          $self->{nc}
7755              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7756        } else {
7757          $self->{set_nc}->($self);
7758        }
7759      
7760            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7761            redo A;
7762          } elsif ($self->{nc} == -1) {
7763            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7764          ## Reconsume.          ## Reconsume.
7765            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7766            redo A;
7767          } else {
7768            ## Stay in the state.
7769            
7770        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7771          $self->{line_prev} = $self->{line};
7772          $self->{column_prev} = $self->{column};
7773          $self->{column}++;
7774          $self->{nc}
7775              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7776        } else {
7777          $self->{set_nc}->($self);
7778        }
7779      
7780          redo A;          redo A;
7781        }        }
   
7782      } else {      } else {
7783        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
7784      }      }
# Line 5853  sub _get_next_token ($) { Line 7789  sub _get_next_token ($) {
7789    
7790  1;  1;
7791  ## $Date$  ## $Date$
7792                                    

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.18

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24