/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14 by wakaba, Fri Oct 17 07:14:29 2008 UTC revision 1.16 by wakaba, Sat Oct 18 11:34:49 2008 UTC
# Line 164  sub BEFORE_MD_NAME_STATE () { 68 } Line 164  sub BEFORE_MD_NAME_STATE () { 68 }
164  sub MD_NAME_STATE () { 69 }  sub MD_NAME_STATE () { 69 }
165  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }  sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }  sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BOGUS_MD_STATE () { 85 }
181    
182  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
183  ## list and descriptions)  ## list and descriptions)
# Line 1737  sub _get_next_token ($) { Line 1751  sub _get_next_token ($) {
1751          redo A;          redo A;
1752        }        }
1753      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1754        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1755          ## ATTLIST attribute value double quoted state".
1756                
1757        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1758                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1759          ## XML5: "Tag attribute name before state".            
1760          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1761              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1762              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1763            } else {
1764              
1765              ## XML5: "Tag attribute name before state".
1766              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1767            }
1768                    
1769      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1770        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1783  sub _get_next_token ($) { Line 1805  sub _get_next_token ($) {
1805          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1806                        
1807            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1808    
1809              $self->{state} = DATA_STATE;
1810              $self->{s_kwd} = '';
1811              ## reconsume
1812              return  ($self->{ct}); # start tag
1813              redo A;
1814          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1815            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1816            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1792  sub _get_next_token ($) { Line 1820  sub _get_next_token ($) {
1820              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1821                            
1822            }            }
1823    
1824              $self->{state} = DATA_STATE;
1825              $self->{s_kwd} = '';
1826              ## reconsume
1827              return  ($self->{ct}); # end tag
1828              redo A;
1829            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1830              ## XML5: No parse error above; not defined yet.
1831              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1832              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1833              ## Reconsume.
1834              return  ($self->{ct}); # ATTLIST
1835              redo A;
1836          } else {          } else {
1837            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1838          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1839        } else {        } else {
1840            ## XML5 [ATTLIST]: Not defined yet.
1841          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1842                        
1843            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1830  sub _get_next_token ($) { Line 1865  sub _get_next_token ($) {
1865          redo A;          redo A;
1866        }        }
1867      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1868        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1869          ## ATTLIST attribute value single quoted state".
1870    
1871        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1872                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1873          ## XML5: "Before attribute name state" (sic).            
1874          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1875              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1876              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1877            } else {
1878              
1879              ## XML5: "Before attribute name state" (sic).
1880              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1881            }
1882                    
1883      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1884        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1876  sub _get_next_token ($) { Line 1919  sub _get_next_token ($) {
1919          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1920                        
1921            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1922    
1923              $self->{state} = DATA_STATE;
1924              $self->{s_kwd} = '';
1925              ## reconsume
1926              return  ($self->{ct}); # start tag
1927              redo A;
1928          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1929            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1930            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1885  sub _get_next_token ($) { Line 1934  sub _get_next_token ($) {
1934              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1935                            
1936            }            }
1937    
1938              $self->{state} = DATA_STATE;
1939              $self->{s_kwd} = '';
1940              ## reconsume
1941              return  ($self->{ct}); # end tag
1942              redo A;
1943            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1944              ## XML5: No parse error above; not defined yet.
1945              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1946              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1947              ## Reconsume.
1948              return  ($self->{ct}); # ATTLIST
1949              redo A;
1950          } else {          } else {
1951            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1952          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1953        } else {        } else {
1954            ## XML5 [ATTLIST]: Not defined yet.
1955          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1956                        
1957            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1926  sub _get_next_token ($) { Line 1982  sub _get_next_token ($) {
1982        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
1983    
1984        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1985                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1986          ## XML5: "Tag attribute name before state".            
1987          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
1988              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1989            } else {
1990              
1991              ## XML5: "Tag attribute name before state".
1992              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1993            }
1994                    
1995      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1996        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1969  sub _get_next_token ($) { Line 2031  sub _get_next_token ($) {
2031          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2032                        
2033            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2034    
2035              $self->{state} = DATA_STATE;
2036              $self->{s_kwd} = '';
2037              
2038        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2039          $self->{line_prev} = $self->{line};
2040          $self->{column_prev} = $self->{column};
2041          $self->{column}++;
2042          $self->{nc}
2043              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2044        } else {
2045          $self->{set_nc}->($self);
2046        }
2047      
2048              return  ($self->{ct}); # start tag
2049              redo A;
2050          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2051            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2052            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1978  sub _get_next_token ($) { Line 2056  sub _get_next_token ($) {
2056              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2057                            
2058            }            }
2059          } else {  
2060            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2061          }            $self->{s_kwd} = '';
2062          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2063      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2064        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2065        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1994  sub _get_next_token ($) { Line 2070  sub _get_next_token ($) {
2070        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2071      }      }
2072        
2073              return  ($self->{ct}); # end tag
2074          return  ($self->{ct}); # start tag or end tag            redo A;
2075            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2076          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2077              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2078              
2079        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2080          $self->{line_prev} = $self->{line};
2081          $self->{column_prev} = $self->{column};
2082          $self->{column}++;
2083          $self->{nc}
2084              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2085        } else {
2086          $self->{set_nc}->($self);
2087        }
2088      
2089              return  ($self->{ct}); # ATTLIST
2090              redo A;
2091            } else {
2092              die "$0: $self->{ct}->{type}: Unknown token type";
2093            }
2094        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2095          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2096                        
2097              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2098            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2099    
2100              $self->{state} = DATA_STATE;
2101              $self->{s_kwd} = '';
2102              ## reconsume
2103              return  ($self->{ct}); # start tag
2104              redo A;
2105          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2106              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2107            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2108            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2109                            
# Line 2012  sub _get_next_token ($) { Line 2112  sub _get_next_token ($) {
2112              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2113                            
2114            }            }
2115    
2116              $self->{state} = DATA_STATE;
2117              $self->{s_kwd} = '';
2118              ## reconsume
2119              return  ($self->{ct}); # end tag
2120              redo A;
2121            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2122              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2123              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2124              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2125              ## Reconsume.
2126              return  ($self->{ct}); # ATTLIST
2127              redo A;
2128          } else {          } else {
2129            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2130          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2131        } else {        } else {
2132          if ({          if ({
2133               0x0022 => 1, # "               0x0022 => 1, # "
# Line 3077  sub _get_next_token ($) { Line 3183  sub _get_next_token ($) {
3183        
3184          redo A;          redo A;
3185        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3186            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3187              
3188              $self->{state} = DATA_STATE;
3189              $self->{s_kwd} = '';
3190            } else {
3191              
3192              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3193              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3194            }
3195                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3196                    
3197      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3198        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3091  sub _get_next_token ($) { Line 3204  sub _get_next_token ($) {
3204        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3205      }      }
3206        
3207            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3208          redo A;          redo A;
3209        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3210            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3211              
3212              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3213              $self->{state} = DATA_STATE;
3214              $self->{s_kwd} = '';
3215              $self->{ct}->{quirks} = 1;
3216            } else {
3217              
3218              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3219              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3220            }
3221                    
3222          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3223          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3224          redo A;          redo A;
3225        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3226                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
# Line 3140  sub _get_next_token ($) { Line 3256  sub _get_next_token ($) {
3256      }      }
3257        
3258          redo A;          redo A;
3259        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [  ## TODO: " and ' for ENTITY
3260          } elsif ($self->{is_xml} and
3261                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3262                   $self->{nc} == 0x005B) { # [
3263                    
3264          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3265          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 3159  sub _get_next_token ($) { Line 3278  sub _get_next_token ($) {
3278          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3279          redo A;          redo A;
3280        } else {        } else {
3281                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3282          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');  
3283          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3284              
3285              $self->{ct}->{quirks} = 1;
3286              $self->{state} = BOGUS_DOCTYPE_STATE;
3287            } else {
3288              
3289              $self->{state} = BOGUS_MD_STATE;
3290            }
3291    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3292                    
3293      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3294        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3235  sub _get_next_token ($) { Line 3360  sub _get_next_token ($) {
3360        
3361          redo A;          redo A;
3362        } else {        } else {
3363                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3364                          line => $self->{line_prev},                          line => $self->{line_prev},
3365                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3366          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3367              
3368          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3369              $self->{state} = BOGUS_DOCTYPE_STATE;
3370            } else {
3371              
3372              $self->{state} = BOGUS_MD_STATE;
3373            }
3374          ## Reconsume.          ## Reconsume.
3375          redo A;          redo A;
3376        }        }
# Line 3303  sub _get_next_token ($) { Line 3432  sub _get_next_token ($) {
3432        
3433          redo A;          redo A;
3434        } else {        } else {
3435                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3436                          line => $self->{line_prev},                          line => $self->{line_prev},
3437                          column => $self->{column_prev} + 1 - length $self->{kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3438          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3439              
3440          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3441              $self->{state} = BOGUS_DOCTYPE_STATE;
3442            } else {
3443              
3444              $self->{state} = BOGUS_MD_STATE;
3445            }
3446          ## Reconsume.          ## Reconsume.
3447          redo A;          redo A;
3448        }        }
# Line 3362  sub _get_next_token ($) { Line 3495  sub _get_next_token ($) {
3495        
3496          redo A;          redo A;
3497        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3498          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3499            
3500          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3501          $self->{s_kwd} = '';            
3502              $self->{state} = DATA_STATE;
3503              $self->{s_kwd} = '';
3504              $self->{ct}->{quirks} = 1;
3505            } else {
3506              
3507              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3508            }
3509            
3510                    
3511      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3512        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3378  sub _get_next_token ($) { Line 3518  sub _get_next_token ($) {
3518        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3519      }      }
3520        
3521            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3522          redo A;          redo A;
3523        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3524            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3525              
3526              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3527              $self->{state} = DATA_STATE;
3528              $self->{s_kwd} = '';
3529              $self->{ct}->{quirks} = 1;
3530            } else {
3531              
3532              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3533              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3534            }
3535                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3536          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3537          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3538          redo A;          redo A;
3539        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3540                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3541                   $self->{nc} == 0x005B) { # [
3542                    
3543          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3544          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3415  sub _get_next_token ($) { Line 3558  sub _get_next_token ($) {
3558          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3559          redo A;          redo A;
3560        } else {        } else {
           
3561          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3562    
3563          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3564              
3565              $self->{ct}->{quirks} = 1;
3566              $self->{state} = BOGUS_DOCTYPE_STATE;
3567            } else {
3568              
3569              $self->{state} = BOGUS_MD_STATE;
3570            }
3571    
3572                    
3573      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3574        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3450  sub _get_next_token ($) { Line 3599  sub _get_next_token ($) {
3599        
3600          redo A;          redo A;
3601        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3602          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3603    
3604          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3605          $self->{s_kwd} = '';            
3606              $self->{state} = DATA_STATE;
3607              $self->{s_kwd} = '';
3608              $self->{ct}->{quirks} = 1;
3609            } else {
3610              
3611              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3612            }
3613    
3614                    
3615      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3616        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3466  sub _get_next_token ($) { Line 3622  sub _get_next_token ($) {
3622        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3623      }      }
3624        
3625            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3626          redo A;          redo A;
3627        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3628          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3629    
3630          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3631          $self->{s_kwd} = '';            
3632          ## reconsume            $self->{state} = DATA_STATE;
3633              $self->{s_kwd} = '';
3634          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3635            } else {
3636              
3637              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3638            }
3639            
3640            ## Reconsume.
3641          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3642          redo A;          redo A;
3643        } else {        } else {
3644                    
3645          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3646          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3647                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3648    
# Line 3521  sub _get_next_token ($) { Line 3677  sub _get_next_token ($) {
3677        
3678          redo A;          redo A;
3679        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3680          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3681    
3682          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3683          $self->{s_kwd} = '';            
3684              $self->{state} = DATA_STATE;
3685              $self->{s_kwd} = '';
3686              $self->{ct}->{quirks} = 1;
3687            } else {
3688              
3689              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3690            }
3691    
3692                    
3693      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3694        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3537  sub _get_next_token ($) { Line 3700  sub _get_next_token ($) {
3700        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3701      }      }
3702        
3703            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3704          redo A;          redo A;
3705        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3706          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3707    
3708          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3709          $self->{s_kwd} = '';            
3710              $self->{state} = DATA_STATE;
3711              $self->{s_kwd} = '';
3712              $self->{ct}->{quirks} = 1;
3713            } else {
3714              
3715              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3716            }
3717          
3718          ## reconsume          ## reconsume
3719            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3720          redo A;          redo A;
3721        } else {        } else {
3722                    
3723          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3724          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3725                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3726    
# Line 3593  sub _get_next_token ($) { Line 3756  sub _get_next_token ($) {
3756          redo A;          redo A;
3757        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3758                    
3759          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3760          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3761                    
3762      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3609  sub _get_next_token ($) { Line 3772  sub _get_next_token ($) {
3772          redo A;          redo A;
3773        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3774                    
3775          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3776          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3777                    
3778      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3624  sub _get_next_token ($) { Line 3787  sub _get_next_token ($) {
3787        
3788          redo A;          redo A;
3789        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3790          if ($self->{is_xml}) {          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3791                        if ($self->{is_xml}) {
3792            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');              
3793                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3794              } else {
3795                
3796              }
3797              $self->{state} = DATA_STATE;
3798              $self->{s_kwd} = '';
3799          } else {          } else {
3800                        if ($self->{ct}->{type} == NOTATION_TOKEN) {
3801                
3802              } else {
3803                
3804                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3805              }
3806              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3807          }          }
3808          $self->{state} = DATA_STATE;          
         $self->{s_kwd} = '';  
3809                    
3810      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3811        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3643  sub _get_next_token ($) { Line 3817  sub _get_next_token ($) {
3817        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3818      }      }
3819        
3820            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3821          redo A;          redo A;
3822        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3823            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3824              
3825              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3826              
3827              $self->{state} = DATA_STATE;
3828              $self->{s_kwd} = '';
3829              $self->{ct}->{quirks} = 1;
3830            } else {
3831              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3832              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3833            }
3834                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3835          ## reconsume          ## reconsume
3836            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3837          redo A;          redo A;
3838        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3839                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3840                   $self->{nc} == 0x005B) { # [
3841                    
3842          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3843          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
# Line 3679  sub _get_next_token ($) { Line 3857  sub _get_next_token ($) {
3857          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3858          redo A;          redo A;
3859        } else {        } else {
           
3860          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3861    
3862          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3863              
3864              $self->{ct}->{quirks} = 1;
3865              $self->{state} = BOGUS_DOCTYPE_STATE;
3866            } else {
3867              
3868              $self->{state} = BOGUS_MD_STATE;
3869            }
3870    
3871                    
3872      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3873        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3746  sub _get_next_token ($) { Line 3930  sub _get_next_token ($) {
3930        
3931          redo A;          redo A;
3932        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3933          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3934                    
3935      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3936        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3762  sub _get_next_token ($) { Line 3943  sub _get_next_token ($) {
3943      }      }
3944        
3945    
3946          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3947          return  ($self->{ct}); # DOCTYPE            
3948              $self->{state} = DATA_STATE;
3949              $self->{s_kwd} = '';
3950              $self->{ct}->{quirks} = 1;
3951            } else {
3952              
3953              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3954            }
3955    
3956            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3957          redo A;          redo A;
3958        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3959            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3960              
3961              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3962              $self->{state} = DATA_STATE;
3963              $self->{s_kwd} = '';
3964              $self->{ct}->{quirks} = 1;
3965            } else {
3966              
3967              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3968              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3969            }
3970                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3971          ## reconsume          ## reconsume
3972            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3973          redo A;          redo A;
3974        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
3975                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3976                   $self->{nc} == 0x005B) { # [
3977                    
3978          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3979    
# Line 3799  sub _get_next_token ($) { Line 3994  sub _get_next_token ($) {
3994          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3995          redo A;          redo A;
3996        } else {        } else {
           
3997          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
3998    
3999          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4000                        
4001              $self->{ct}->{quirks} = 1;
4002              $self->{state} = BOGUS_DOCTYPE_STATE;
4003            } else {
4004              
4005              $self->{state} = BOGUS_MD_STATE;
4006            }
4007    
4008                    
4009      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4010        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3834  sub _get_next_token ($) { Line 4035  sub _get_next_token ($) {
4035        
4036          redo A;          redo A;
4037        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4038          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4039    
4040          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4041          $self->{s_kwd} = '';            
4042              $self->{state} = DATA_STATE;
4043              $self->{s_kwd} = '';
4044              $self->{ct}->{quirks} = 1;
4045            } else {
4046              
4047              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4048            }
4049            
4050                    
4051      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4052        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3850  sub _get_next_token ($) { Line 4058  sub _get_next_token ($) {
4058        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4059      }      }
4060        
4061            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4062          redo A;          redo A;
4063        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4064          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4065    
4066          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4067          $self->{s_kwd} = '';            
4068              $self->{state} = DATA_STATE;
4069              $self->{s_kwd} = '';
4070              $self->{ct}->{quirks} = 1;
4071            } else {
4072              
4073              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4074            }
4075            
4076          ## reconsume          ## reconsume
4077            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4078          redo A;          redo A;
4079        } else {        } else {
4080                    
4081          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4082          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4083                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4084    
# Line 3927  sub _get_next_token ($) { Line 4135  sub _get_next_token ($) {
4135    
4136          redo A;          redo A;
4137        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4138          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4139    
4140          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4141          $self->{s_kwd} = '';            
4142          ## reconsume            $self->{state} = DATA_STATE;
4143              $self->{s_kwd} = '';
4144          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4145          return  ($self->{ct}); # DOCTYPE          } else {
4146              
4147              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4148            }
4149    
4150            ## reconsume
4151            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4152          redo A;          redo A;
4153        } else {        } else {
4154                    
4155          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4156          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4157                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4158    
# Line 3976  sub _get_next_token ($) { Line 4187  sub _get_next_token ($) {
4187        
4188          redo A;          redo A;
4189        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4190                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4191          $self->{state} = DATA_STATE;            
4192          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4193              $self->{s_kwd} = '';
4194            } else {
4195              
4196              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4197            }
4198    
4199                    
4200      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4201        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3990  sub _get_next_token ($) { Line 4207  sub _get_next_token ($) {
4207        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4208      }      }
4209        
4210            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
4211          redo A;          redo A;
4212    ## TODO: "NDATA"
4213        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4214                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4215          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');            
4216          $self->{state} = DATA_STATE;            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4217          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4218          ## reconsume            $self->{s_kwd} = '';
4219              $self->{ct}->{quirks} = 1;
4220          $self->{ct}->{quirks} = 1;          } else {
4221          return  ($self->{ct}); # DOCTYPE            
4222              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4223              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4224            }
4225    
4226            ## reconsume
4227            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4228          redo A;          redo A;
4229        } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [        } elsif ($self->{is_xml} and
4230                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4231                   $self->{nc} == 0x005B) { # [
4232                    
4233          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4234          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
# Line 4024  sub _get_next_token ($) { Line 4247  sub _get_next_token ($) {
4247          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4248          redo A;          redo A;
4249        } else {        } else {
           
4250          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
         #$self->{ct}->{quirks} = 1;  
4251    
4252          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4253              
4254              #$self->{ct}->{quirks} = 1;
4255              $self->{state} = BOGUS_DOCTYPE_STATE;
4256            } else {
4257              
4258              $self->{state} = BOGUS_MD_STATE;
4259            }
4260    
4261                    
4262      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4263        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5477  sub _get_next_token ($) { Line 5706  sub _get_next_token ($) {
5706        } elsif ($self->{kwd} eq 'ATTLIS' and        } elsif ($self->{kwd} eq 'ATTLIS' and
5707                 $self->{nc} == 0x0054) { # T                 $self->{nc} == 0x0054) { # T
5708          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5709                           attrdefs => [],
5710                         line => $self->{line_prev},                         line => $self->{line_prev},
5711                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 6};
5712          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
# Line 5739  sub _get_next_token ($) { Line 5969  sub _get_next_token ($) {
5969        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".        ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
5970                
5971        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
5972          ## TODO:          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
5973          $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
5974            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
5975              ## TODO: ...
5976              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
5977            } else { # ENTITY/NOTATION
5978              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
5979            }
5980                    
5981      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5982        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 5757  sub _get_next_token ($) { Line 5993  sub _get_next_token ($) {
5993          if ($self->{ct}->{type} == ATTLIST_TOKEN) {          if ($self->{ct}->{type} == ATTLIST_TOKEN) {
5994            #            #
5995          } else {          } else {
5996            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md body'); ## TODO: type            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
5997          }          }
5998          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5999                    
# Line 5831  sub _get_next_token ($) { Line 6067  sub _get_next_token ($) {
6067          ## XML5: No parse error.          ## XML5: No parse error.
6068          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6069          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6070            return  ($self->{ct});
6071          redo A;          redo A;
6072        } else {        } else {
6073          ## XML5: Not defined yet.          ## XML5: Not defined yet.
6074            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6075          ## TODO: ...                         tokens => [],
6076                           line => $self->{line}, column => $self->{column}};
6077          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6078          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded          
6079        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6080          $self->{line_prev} = $self->{line};
6081          $self->{column_prev} = $self->{column};
6082          $self->{column}++;
6083          $self->{nc}
6084              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6085        } else {
6086          $self->{set_nc}->($self);
6087        }
6088      
6089            redo A;
6090          }
6091        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6092          if ($is_space->{$self->{nc}}) {
6093            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6094            
6095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6096          $self->{line_prev} = $self->{line};
6097          $self->{column_prev} = $self->{column};
6098          $self->{column}++;
6099          $self->{nc}
6100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6101        } else {
6102          $self->{set_nc}->($self);
6103        }
6104      
6105            redo A;
6106          } elsif ($self->{nc} == 0x003E) { # >
6107            ## XML5: Same as "anything else".
6108            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6109            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6110            
6111        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6112          $self->{line_prev} = $self->{line};
6113          $self->{column_prev} = $self->{column};
6114          $self->{column}++;
6115          $self->{nc}
6116              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6117        } else {
6118          $self->{set_nc}->($self);
6119        }
6120      
6121            return  ($self->{ct}); # ATTLIST
6122            redo A;
6123          } elsif ($self->{nc} == 0x0028) { # (
6124            ## XML5: Same as "anything else".
6125            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6126            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6127            
6128        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6129          $self->{line_prev} = $self->{line};
6130          $self->{column_prev} = $self->{column};
6131          $self->{column}++;
6132          $self->{nc}
6133              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6134        } else {
6135          $self->{set_nc}->($self);
6136        }
6137      
6138            redo A;
6139          } elsif ($self->{nc} == -1) {
6140            ## XML5: No parse error.
6141            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6142            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6143            
6144        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6145          $self->{line_prev} = $self->{line};
6146          $self->{column_prev} = $self->{column};
6147          $self->{column}++;
6148          $self->{nc}
6149              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6150        } else {
6151          $self->{set_nc}->($self);
6152        }
6153      
6154            return  ($self->{ct}); # ATTLIST
6155            redo A;
6156          } else {
6157            ## XML5: Not defined yet.
6158            $self->{ca}->{name} .= chr $self->{nc};
6159            ## Stay in the state.
6160            
6161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6162          $self->{line_prev} = $self->{line};
6163          $self->{column_prev} = $self->{column};
6164          $self->{column}++;
6165          $self->{nc}
6166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6167        } else {
6168          $self->{set_nc}->($self);
6169        }
6170      
6171            redo A;
6172          }
6173        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6174          if ($is_space->{$self->{nc}}) {
6175            ## Stay in the state.
6176            
6177        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6178          $self->{line_prev} = $self->{line};
6179          $self->{column_prev} = $self->{column};
6180          $self->{column}++;
6181          $self->{nc}
6182              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6183        } else {
6184          $self->{set_nc}->($self);
6185        }
6186      
6187            redo A;
6188          } elsif ($self->{nc} == 0x003E) { # >
6189            ## XML5: Same as "anything else".
6190            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6191            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6192            
6193        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6194          $self->{line_prev} = $self->{line};
6195          $self->{column_prev} = $self->{column};
6196          $self->{column}++;
6197          $self->{nc}
6198              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6199        } else {
6200          $self->{set_nc}->($self);
6201        }
6202      
6203            return  ($self->{ct}); # ATTLIST
6204            redo A;
6205          } elsif ($self->{nc} == 0x0028) { # (
6206            ## XML5: Same as "anything else".
6207            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6208            
6209        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6210          $self->{line_prev} = $self->{line};
6211          $self->{column_prev} = $self->{column};
6212          $self->{column}++;
6213          $self->{nc}
6214              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6215        } else {
6216          $self->{set_nc}->($self);
6217        }
6218      
6219            redo A;
6220          } elsif ($self->{nc} == -1) {
6221            ## XML5: No parse error.
6222            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6223            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6224            
6225        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6226          $self->{line_prev} = $self->{line};
6227          $self->{column_prev} = $self->{column};
6228          $self->{column}++;
6229          $self->{nc}
6230              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6231        } else {
6232          $self->{set_nc}->($self);
6233        }
6234      
6235            return  ($self->{ct});
6236            redo A;
6237          } else {
6238            ## XML5: Not defined yet.
6239            $self->{ca}->{type} = chr $self->{nc};
6240            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6241            
6242        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6243          $self->{line_prev} = $self->{line};
6244          $self->{column_prev} = $self->{column};
6245          $self->{column}++;
6246          $self->{nc}
6247              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6248        } else {
6249          $self->{set_nc}->($self);
6250        }
6251      
6252            redo A;
6253          }
6254        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6255          if ($is_space->{$self->{nc}}) {
6256            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6257            
6258        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6259          $self->{line_prev} = $self->{line};
6260          $self->{column_prev} = $self->{column};
6261          $self->{column}++;
6262          $self->{nc}
6263              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6264        } else {
6265          $self->{set_nc}->($self);
6266        }
6267      
6268            redo A;
6269          } elsif ($self->{nc} == 0x0023) { # #
6270            ## XML5: Same as "anything else".
6271            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6272            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6273            
6274        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6275          $self->{line_prev} = $self->{line};
6276          $self->{column_prev} = $self->{column};
6277          $self->{column}++;
6278          $self->{nc}
6279              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6280        } else {
6281          $self->{set_nc}->($self);
6282        }
6283      
6284            redo A;
6285          } elsif ($self->{nc} == 0x0022) { # "
6286            ## XML5: Same as "anything else".
6287            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6288            $self->{ca}->{value} = '';
6289            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6290            
6291        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6292          $self->{line_prev} = $self->{line};
6293          $self->{column_prev} = $self->{column};
6294          $self->{column}++;
6295          $self->{nc}
6296              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6297        } else {
6298          $self->{set_nc}->($self);
6299        }
6300      
6301            redo A;
6302          } elsif ($self->{nc} == 0x0027) { # '
6303            ## XML5: Same as "anything else".
6304            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6305            $self->{ca}->{value} = '';
6306            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6307            
6308        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6309          $self->{line_prev} = $self->{line};
6310          $self->{column_prev} = $self->{column};
6311          $self->{column}++;
6312          $self->{nc}
6313              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6314        } else {
6315          $self->{set_nc}->($self);
6316        }
6317      
6318            redo A;
6319          } elsif ($self->{nc} == 0x003E) { # >
6320            ## XML5: Same as "anything else".
6321            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6322            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6323            
6324        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6325          $self->{line_prev} = $self->{line};
6326          $self->{column_prev} = $self->{column};
6327          $self->{column}++;
6328          $self->{nc}
6329              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6330        } else {
6331          $self->{set_nc}->($self);
6332        }
6333      
6334            return  ($self->{ct}); # ATTLIST
6335            redo A;
6336          } elsif ($self->{nc} == 0x0028) { # (
6337            ## XML5: Same as "anything else".
6338            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6339            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6340            
6341        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6342          $self->{line_prev} = $self->{line};
6343          $self->{column_prev} = $self->{column};
6344          $self->{column}++;
6345          $self->{nc}
6346              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6347        } else {
6348          $self->{set_nc}->($self);
6349        }
6350      
6351            redo A;
6352          } elsif ($self->{nc} == -1) {
6353            ## XML5: No parse error.
6354            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6355            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6356            
6357        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6358          $self->{line_prev} = $self->{line};
6359          $self->{column_prev} = $self->{column};
6360          $self->{column}++;
6361          $self->{nc}
6362              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6363        } else {
6364          $self->{set_nc}->($self);
6365        }
6366      
6367            return  ($self->{ct});
6368            redo A;
6369          } else {
6370            ## XML5: Not defined yet.
6371            $self->{ca}->{type} .= chr $self->{nc};
6372            ## Stay in the state.
6373            
6374        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6375          $self->{line_prev} = $self->{line};
6376          $self->{column_prev} = $self->{column};
6377          $self->{column}++;
6378          $self->{nc}
6379              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6380        } else {
6381          $self->{set_nc}->($self);
6382        }
6383      
6384            redo A;
6385          }
6386        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6387          if ($is_space->{$self->{nc}}) {
6388            ## Stay in the state.
6389            
6390        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6391          $self->{line_prev} = $self->{line};
6392          $self->{column_prev} = $self->{column};
6393          $self->{column}++;
6394          $self->{nc}
6395              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6396        } else {
6397          $self->{set_nc}->($self);
6398        }
6399      
6400            redo A;
6401          } elsif ($self->{nc} == 0x0028) { # (
6402            ## XML5: Same as "anything else".
6403            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6404            
6405        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6406          $self->{line_prev} = $self->{line};
6407          $self->{column_prev} = $self->{column};
6408          $self->{column}++;
6409          $self->{nc}
6410              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6411        } else {
6412          $self->{set_nc}->($self);
6413        }
6414      
6415            redo A;
6416          } elsif ($self->{nc} == 0x0023) { # #
6417            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6418            
6419        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6420          $self->{line_prev} = $self->{line};
6421          $self->{column_prev} = $self->{column};
6422          $self->{column}++;
6423          $self->{nc}
6424              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6425        } else {
6426          $self->{set_nc}->($self);
6427        }
6428      
6429            redo A;
6430          } elsif ($self->{nc} == 0x0022) { # "
6431            ## XML5: Same as "anything else".
6432            $self->{ca}->{value} = '';
6433            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6434            
6435        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6436          $self->{line_prev} = $self->{line};
6437          $self->{column_prev} = $self->{column};
6438          $self->{column}++;
6439          $self->{nc}
6440              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6441        } else {
6442          $self->{set_nc}->($self);
6443        }
6444      
6445            redo A;
6446          } elsif ($self->{nc} == 0x0027) { # '
6447            ## XML5: Same as "anything else".
6448            $self->{ca}->{value} = '';
6449            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6450            
6451        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6452          $self->{line_prev} = $self->{line};
6453          $self->{column_prev} = $self->{column};
6454          $self->{column}++;
6455          $self->{nc}
6456              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6457        } else {
6458          $self->{set_nc}->($self);
6459        }
6460      
6461            redo A;
6462          } elsif ($self->{nc} == 0x003E) { # >
6463            ## XML5: Same as "anything else".
6464            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6465            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6466            
6467        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6468          $self->{line_prev} = $self->{line};
6469          $self->{column_prev} = $self->{column};
6470          $self->{column}++;
6471          $self->{nc}
6472              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6473        } else {
6474          $self->{set_nc}->($self);
6475        }
6476      
6477            return  ($self->{ct}); # ATTLIST
6478            redo A;
6479          } elsif ($self->{nc} == -1) {
6480            ## XML5: No parse error.
6481            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6482            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6483            
6484        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6485          $self->{line_prev} = $self->{line};
6486          $self->{column_prev} = $self->{column};
6487          $self->{column}++;
6488          $self->{nc}
6489              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6490        } else {
6491          $self->{set_nc}->($self);
6492        }
6493      
6494            return  ($self->{ct});
6495            redo A;
6496          } else {
6497            ## XML5: Switch to the "DOCTYPE bogus comment state".
6498            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6499            $self->{ca}->{value} = '';
6500            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6501            ## Reconsume.
6502            redo A;
6503          }
6504        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6505          if ($is_space->{$self->{nc}}) {
6506            ## Stay in the state.
6507            
6508        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6509          $self->{line_prev} = $self->{line};
6510          $self->{column_prev} = $self->{column};
6511          $self->{column}++;
6512          $self->{nc}
6513              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6514        } else {
6515          $self->{set_nc}->($self);
6516        }
6517      
6518            redo A;
6519          } elsif ($self->{nc} == 0x007C) { # |
6520            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6521            ## Stay in the state.
6522            
6523        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6524          $self->{line_prev} = $self->{line};
6525          $self->{column_prev} = $self->{column};
6526          $self->{column}++;
6527          $self->{nc}
6528              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6529        } else {
6530          $self->{set_nc}->($self);
6531        }
6532      
6533            redo A;
6534          } elsif ($self->{nc} == 0x0029) { # )
6535            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6536            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6537            
6538        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6539          $self->{line_prev} = $self->{line};
6540          $self->{column_prev} = $self->{column};
6541          $self->{column}++;
6542          $self->{nc}
6543              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6544        } else {
6545          $self->{set_nc}->($self);
6546        }
6547      
6548            redo A;
6549          } elsif ($self->{nc} == 0x003E) { # >
6550            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6551            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6552            
6553        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6554          $self->{line_prev} = $self->{line};
6555          $self->{column_prev} = $self->{column};
6556          $self->{column}++;
6557          $self->{nc}
6558              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6559        } else {
6560          $self->{set_nc}->($self);
6561        }
6562      
6563            return  ($self->{ct}); # ATTLIST
6564            redo A;
6565          } elsif ($self->{nc} == -1) {
6566            ## XML5: No parse error.
6567            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6568            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6569            
6570        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6571          $self->{line_prev} = $self->{line};
6572          $self->{column_prev} = $self->{column};
6573          $self->{column}++;
6574          $self->{nc}
6575              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6576        } else {
6577          $self->{set_nc}->($self);
6578        }
6579      
6580            return  ($self->{ct});
6581            redo A;
6582          } else {
6583            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6584            $self->{state} = ALLOWED_TOKEN_STATE;
6585            
6586        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6587          $self->{line_prev} = $self->{line};
6588          $self->{column_prev} = $self->{column};
6589          $self->{column}++;
6590          $self->{nc}
6591              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6592        } else {
6593          $self->{set_nc}->($self);
6594        }
6595      
6596            redo A;
6597          }
6598        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6599          if ($is_space->{$self->{nc}}) {
6600            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6601            
6602        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6603          $self->{line_prev} = $self->{line};
6604          $self->{column_prev} = $self->{column};
6605          $self->{column}++;
6606          $self->{nc}
6607              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6608        } else {
6609          $self->{set_nc}->($self);
6610        }
6611      
6612            redo A;
6613          } elsif ($self->{nc} == 0x007C) { # |
6614            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6615            
6616        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6617          $self->{line_prev} = $self->{line};
6618          $self->{column_prev} = $self->{column};
6619          $self->{column}++;
6620          $self->{nc}
6621              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6622        } else {
6623          $self->{set_nc}->($self);
6624        }
6625      
6626            redo A;
6627          } elsif ($self->{nc} == 0x0029) { # )
6628            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6629            
6630        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6631          $self->{line_prev} = $self->{line};
6632          $self->{column_prev} = $self->{column};
6633          $self->{column}++;
6634          $self->{nc}
6635              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6636        } else {
6637          $self->{set_nc}->($self);
6638        }
6639      
6640            redo A;
6641          } elsif ($self->{nc} == 0x003E) { # >
6642            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6643            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6644            
6645        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6646          $self->{line_prev} = $self->{line};
6647          $self->{column_prev} = $self->{column};
6648          $self->{column}++;
6649          $self->{nc}
6650              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6651        } else {
6652          $self->{set_nc}->($self);
6653        }
6654      
6655            return  ($self->{ct}); # ATTLIST
6656            redo A;
6657          } elsif ($self->{nc} == -1) {
6658            ## XML5: No parse error.
6659            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6660            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6661            
6662        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6663          $self->{line_prev} = $self->{line};
6664          $self->{column_prev} = $self->{column};
6665          $self->{column}++;
6666          $self->{nc}
6667              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6668        } else {
6669          $self->{set_nc}->($self);
6670        }
6671      
6672            return  ($self->{ct});
6673            redo A;
6674          } else {
6675            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6676            ## Stay in the state.
6677            
6678        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6679          $self->{line_prev} = $self->{line};
6680          $self->{column_prev} = $self->{column};
6681          $self->{column}++;
6682          $self->{nc}
6683              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6684        } else {
6685          $self->{set_nc}->($self);
6686        }
6687      
6688            redo A;
6689          }
6690        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6691          if ($is_space->{$self->{nc}}) {
6692            ## Stay in the state.
6693            
6694        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6695          $self->{line_prev} = $self->{line};
6696          $self->{column_prev} = $self->{column};
6697          $self->{column}++;
6698          $self->{nc}
6699              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6700        } else {
6701          $self->{set_nc}->($self);
6702        }
6703      
6704            redo A;
6705          } elsif ($self->{nc} == 0x007C) { # |
6706            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6707            
6708        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6709          $self->{line_prev} = $self->{line};
6710          $self->{column_prev} = $self->{column};
6711          $self->{column}++;
6712          $self->{nc}
6713              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6714        } else {
6715          $self->{set_nc}->($self);
6716        }
6717      
6718            redo A;
6719          } elsif ($self->{nc} == 0x0029) { # )
6720            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6721            
6722        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6723          $self->{line_prev} = $self->{line};
6724          $self->{column_prev} = $self->{column};
6725          $self->{column}++;
6726          $self->{nc}
6727              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6728        } else {
6729          $self->{set_nc}->($self);
6730        }
6731      
6732            redo A;
6733          } elsif ($self->{nc} == 0x003E) { # >
6734            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6735            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6736            
6737        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6738          $self->{line_prev} = $self->{line};
6739          $self->{column_prev} = $self->{column};
6740          $self->{column}++;
6741          $self->{nc}
6742              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6743        } else {
6744          $self->{set_nc}->($self);
6745        }
6746      
6747            return  ($self->{ct}); # ATTLIST
6748            redo A;
6749          } elsif ($self->{nc} == -1) {
6750            ## XML5: No parse error.
6751            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6752            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6753            
6754        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6755          $self->{line_prev} = $self->{line};
6756          $self->{column_prev} = $self->{column};
6757          $self->{column}++;
6758          $self->{nc}
6759              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6760        } else {
6761          $self->{set_nc}->($self);
6762        }
6763      
6764            return  ($self->{ct});
6765            redo A;
6766          } else {
6767            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
6768                            line => $self->{line_prev},
6769                            column => $self->{column_prev});
6770            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
6771            $self->{state} = ALLOWED_TOKEN_STATE;
6772            
6773        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6774          $self->{line_prev} = $self->{line};
6775          $self->{column_prev} = $self->{column};
6776          $self->{column}++;
6777          $self->{nc}
6778              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6779        } else {
6780          $self->{set_nc}->($self);
6781        }
6782      
6783            redo A;
6784          }
6785        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
6786          if ($is_space->{$self->{nc}}) {
6787            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
6788            
6789        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6790          $self->{line_prev} = $self->{line};
6791          $self->{column_prev} = $self->{column};
6792          $self->{column}++;
6793          $self->{nc}
6794              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6795        } else {
6796          $self->{set_nc}->($self);
6797        }
6798      
6799            redo A;
6800          } elsif ($self->{nc} == 0x0023) { # #
6801            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6802            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6803            
6804        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6805          $self->{line_prev} = $self->{line};
6806          $self->{column_prev} = $self->{column};
6807          $self->{column}++;
6808          $self->{nc}
6809              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6810        } else {
6811          $self->{set_nc}->($self);
6812        }
6813      
6814            redo A;
6815          } elsif ($self->{nc} == 0x0022) { # "
6816            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6817            $self->{ca}->{value} = '';
6818            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6819            
6820        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6821          $self->{line_prev} = $self->{line};
6822          $self->{column_prev} = $self->{column};
6823          $self->{column}++;
6824          $self->{nc}
6825              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6826        } else {
6827          $self->{set_nc}->($self);
6828        }
6829      
6830            redo A;
6831          } elsif ($self->{nc} == 0x0027) { # '
6832            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6833            $self->{ca}->{value} = '';
6834            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6835            
6836        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6837          $self->{line_prev} = $self->{line};
6838          $self->{column_prev} = $self->{column};
6839          $self->{column}++;
6840          $self->{nc}
6841              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6842        } else {
6843          $self->{set_nc}->($self);
6844        }
6845      
6846            redo A;
6847          } elsif ($self->{nc} == 0x003E) { # >
6848            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6849            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6850            
6851        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6852          $self->{line_prev} = $self->{line};
6853          $self->{column_prev} = $self->{column};
6854          $self->{column}++;
6855          $self->{nc}
6856              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6857        } else {
6858          $self->{set_nc}->($self);
6859        }
6860      
6861            return  ($self->{ct}); # ATTLIST
6862            redo A;
6863          } elsif ($self->{nc} == -1) {
6864            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6865            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6866            
6867        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6868          $self->{line_prev} = $self->{line};
6869          $self->{column_prev} = $self->{column};
6870          $self->{column}++;
6871          $self->{nc}
6872              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6873        } else {
6874          $self->{set_nc}->($self);
6875        }
6876      
6877            return  ($self->{ct});
6878            redo A;
6879          } else {
6880            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6881            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6882            ## Reconsume.
6883            redo A;
6884          }
6885        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
6886          if ($is_space->{$self->{nc}}) {
6887            ## Stay in the state.
6888            
6889        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6890          $self->{line_prev} = $self->{line};
6891          $self->{column_prev} = $self->{column};
6892          $self->{column}++;
6893          $self->{nc}
6894              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6895        } else {
6896          $self->{set_nc}->($self);
6897        }
6898      
6899            redo A;
6900          } elsif ($self->{nc} == 0x0023) { # #
6901            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6902            
6903        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6904          $self->{line_prev} = $self->{line};
6905          $self->{column_prev} = $self->{column};
6906          $self->{column}++;
6907          $self->{nc}
6908              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6909        } else {
6910          $self->{set_nc}->($self);
6911        }
6912      
6913            redo A;
6914          } elsif ($self->{nc} == 0x0022) { # "
6915            $self->{ca}->{value} = '';
6916            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6917            
6918        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6919          $self->{line_prev} = $self->{line};
6920          $self->{column_prev} = $self->{column};
6921          $self->{column}++;
6922          $self->{nc}
6923              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6924        } else {
6925          $self->{set_nc}->($self);
6926        }
6927      
6928            redo A;
6929          } elsif ($self->{nc} == 0x0027) { # '
6930            $self->{ca}->{value} = '';
6931            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6932            
6933        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6934          $self->{line_prev} = $self->{line};
6935          $self->{column_prev} = $self->{column};
6936          $self->{column}++;
6937          $self->{nc}
6938              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6939        } else {
6940          $self->{set_nc}->($self);
6941        }
6942      
6943            redo A;
6944          } elsif ($self->{nc} == 0x003E) { # >
6945            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6946            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6947            
6948        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6949          $self->{line_prev} = $self->{line};
6950          $self->{column_prev} = $self->{column};
6951          $self->{column}++;
6952          $self->{nc}
6953              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6954        } else {
6955          $self->{set_nc}->($self);
6956        }
6957      
6958            return  ($self->{ct}); # ATTLIST
6959            redo A;
6960          } elsif ($self->{nc} == -1) {
6961            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6962            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6963            
6964        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6965          $self->{line_prev} = $self->{line};
6966          $self->{column_prev} = $self->{column};
6967          $self->{column}++;
6968          $self->{nc}
6969              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6970        } else {
6971          $self->{set_nc}->($self);
6972        }
6973      
6974            return  ($self->{ct});
6975            redo A;
6976          } else {
6977            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6978            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6979            ## Reconsume.
6980            redo A;
6981          }
6982        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
6983          if ($is_space->{$self->{nc}}) {
6984            ## XML5: No parse error.
6985            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
6986            $self->{state} = BOGUS_MD_STATE;
6987            ## Reconsume.
6988            redo A;
6989          } elsif ($self->{nc} == 0x0022) { # "
6990            ## XML5: Same as "anything else".
6991            $self->{ca}->{value} = '';
6992            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6993            
6994        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6995          $self->{line_prev} = $self->{line};
6996          $self->{column_prev} = $self->{column};
6997          $self->{column}++;
6998          $self->{nc}
6999              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7000        } else {
7001          $self->{set_nc}->($self);
7002        }
7003      
7004            redo A;
7005          } elsif ($self->{nc} == 0x0027) { # '
7006            ## XML5: Same as "anything else".
7007            $self->{ca}->{value} = '';
7008            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7009            
7010        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7011          $self->{line_prev} = $self->{line};
7012          $self->{column_prev} = $self->{column};
7013          $self->{column}++;
7014          $self->{nc}
7015              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7016        } else {
7017          $self->{set_nc}->($self);
7018        }
7019      
7020            redo A;
7021          } elsif ($self->{nc} == 0x003E) { # >
7022            ## XML5: Same as "anything else".
7023            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7024            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7025            
7026        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7027          $self->{line_prev} = $self->{line};
7028          $self->{column_prev} = $self->{column};
7029          $self->{column}++;
7030          $self->{nc}
7031              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7032        } else {
7033          $self->{set_nc}->($self);
7034        }
7035      
7036            return  ($self->{ct}); # ATTLIST
7037            redo A;
7038          } elsif ($self->{nc} == -1) {
7039            ## XML5: No parse error.
7040            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7041            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7042            
7043        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7044          $self->{line_prev} = $self->{line};
7045          $self->{column_prev} = $self->{column};
7046          $self->{column}++;
7047          $self->{nc}
7048              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7049        } else {
7050          $self->{set_nc}->($self);
7051        }
7052      
7053            return  ($self->{ct});
7054            redo A;
7055          } else {
7056            $self->{ca}->{default} = chr $self->{nc};
7057            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7058            
7059        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7060          $self->{line_prev} = $self->{line};
7061          $self->{column_prev} = $self->{column};
7062          $self->{column}++;
7063          $self->{nc}
7064              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7065        } else {
7066          $self->{set_nc}->($self);
7067        }
7068      
7069            redo A;
7070          }
7071        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7072          if ($is_space->{$self->{nc}}) {
7073            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7074            
7075        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7076          $self->{line_prev} = $self->{line};
7077          $self->{column_prev} = $self->{column};
7078          $self->{column}++;
7079          $self->{nc}
7080              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7081        } else {
7082          $self->{set_nc}->($self);
7083        }
7084      
7085            redo A;
7086          } elsif ($self->{nc} == 0x0022) { # "
7087            ## XML5: Same as "anything else".
7088            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7089            $self->{ca}->{value} = '';
7090            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7091            
7092        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7093          $self->{line_prev} = $self->{line};
7094          $self->{column_prev} = $self->{column};
7095          $self->{column}++;
7096          $self->{nc}
7097              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7098        } else {
7099          $self->{set_nc}->($self);
7100        }
7101      
7102            redo A;
7103          } elsif ($self->{nc} == 0x0027) { # '
7104            ## XML5: Same as "anything else".
7105            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7106            $self->{ca}->{value} = '';
7107            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7108            
7109        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7110          $self->{line_prev} = $self->{line};
7111          $self->{column_prev} = $self->{column};
7112          $self->{column}++;
7113          $self->{nc}
7114              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7115        } else {
7116          $self->{set_nc}->($self);
7117        }
7118      
7119            redo A;
7120          } elsif ($self->{nc} == 0x003E) { # >
7121            ## XML5: Same as "anything else".
7122            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7123            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7124            
7125        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7126          $self->{line_prev} = $self->{line};
7127          $self->{column_prev} = $self->{column};
7128          $self->{column}++;
7129          $self->{nc}
7130              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7131        } else {
7132          $self->{set_nc}->($self);
7133        }
7134      
7135            return  ($self->{ct}); # ATTLIST
7136            redo A;
7137          } elsif ($self->{nc} == -1) {
7138            ## XML5: No parse error.
7139            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7140            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7141            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7142            
7143        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7144          $self->{line_prev} = $self->{line};
7145          $self->{column_prev} = $self->{column};
7146          $self->{column}++;
7147          $self->{nc}
7148              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7149        } else {
7150          $self->{set_nc}->($self);
7151        }
7152      
7153            return  ($self->{ct});
7154            redo A;
7155          } else {
7156            $self->{ca}->{default} .= chr $self->{nc};
7157            ## Stay in the state.
7158            
7159        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7160          $self->{line_prev} = $self->{line};
7161          $self->{column_prev} = $self->{column};
7162          $self->{column}++;
7163          $self->{nc}
7164              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7165        } else {
7166          $self->{set_nc}->($self);
7167        }
7168      
7169            redo A;
7170          }
7171        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7172          if ($is_space->{$self->{nc}}) {
7173            ## Stay in the state.
7174            
7175        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7176          $self->{line_prev} = $self->{line};
7177          $self->{column_prev} = $self->{column};
7178          $self->{column}++;
7179          $self->{nc}
7180              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7181        } else {
7182          $self->{set_nc}->($self);
7183        }
7184      
7185            redo A;
7186          } elsif ($self->{nc} == 0x0022) { # "
7187            $self->{ca}->{value} = '';
7188            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7189            
7190        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7191          $self->{line_prev} = $self->{line};
7192          $self->{column_prev} = $self->{column};
7193          $self->{column}++;
7194          $self->{nc}
7195              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7196        } else {
7197          $self->{set_nc}->($self);
7198        }
7199      
7200            redo A;
7201          } elsif ($self->{nc} == 0x0027) { # '
7202            $self->{ca}->{value} = '';
7203            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7204            
7205        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7206          $self->{line_prev} = $self->{line};
7207          $self->{column_prev} = $self->{column};
7208          $self->{column}++;
7209          $self->{nc}
7210              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7211        } else {
7212          $self->{set_nc}->($self);
7213        }
7214      
7215            redo A;
7216          } elsif ($self->{nc} == 0x003E) { # >
7217            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7218            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7219            
7220        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7221          $self->{line_prev} = $self->{line};
7222          $self->{column_prev} = $self->{column};
7223          $self->{column}++;
7224          $self->{nc}
7225              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7226        } else {
7227          $self->{set_nc}->($self);
7228        }
7229      
7230            return  ($self->{ct}); # ATTLIST
7231            redo A;
7232          } elsif ($self->{nc} == -1) {
7233            ## XML5: No parse error.
7234            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7235            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7236            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7237            
7238        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7239          $self->{line_prev} = $self->{line};
7240          $self->{column_prev} = $self->{column};
7241          $self->{column}++;
7242          $self->{nc}
7243              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7244        } else {
7245          $self->{set_nc}->($self);
7246        }
7247      
7248            return  ($self->{ct});
7249            redo A;
7250          } else {
7251            ## XML5: Not defined yet.
7252            if ($self->{ca}->{default} eq 'FIXED') {
7253              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7254            } else {
7255              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7256              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7257            }
7258            ## Reconsume.
7259            redo A;
7260          }
7261        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7262          if ($is_space->{$self->{nc}} or
7263              $self->{nc} == -1 or
7264              $self->{nc} == 0x003E) { # >
7265            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7266            ## Reconsume.
7267            redo A;
7268          } else {
7269            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7270            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7271          ## Reconsume.          ## Reconsume.
7272          redo A;          redo A;
7273        }        }
7274    
7275        } elsif ($self->{state} == BOGUS_MD_STATE) {
7276          if ($self->{nc} == 0x003E) { # >
7277            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7278            
7279        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7280          $self->{line_prev} = $self->{line};
7281          $self->{column_prev} = $self->{column};
7282          $self->{column}++;
7283          $self->{nc}
7284              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7285        } else {
7286          $self->{set_nc}->($self);
7287        }
7288      
7289            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7290            redo A;
7291          } elsif ($self->{nc} == -1) {
7292            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7293            ## Reconsume.
7294            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7295            redo A;
7296          } else {
7297            ## Stay in the state.
7298            
7299        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7300          $self->{line_prev} = $self->{line};
7301          $self->{column_prev} = $self->{column};
7302          $self->{column}++;
7303          $self->{nc}
7304              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7305        } else {
7306          $self->{set_nc}->($self);
7307        }
7308      
7309            redo A;
7310          }
7311      } else {      } else {
7312        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
7313      }      }
# Line 5853  sub _get_next_token ($) { Line 7318  sub _get_next_token ($) {
7318    
7319  1;  1;
7320  ## $Date$  ## $Date$
7321                                    

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.16

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24