/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.11 by wakaba, Wed Oct 15 10:50:38 2008 UTC revision 1.21 by wakaba, Sun Oct 19 09:25:21 2008 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145  ## XML states  ## XML-only states
146  sub PI_STATE () { 51 }  sub PI_STATE () { 51 }
147  sub PI_TARGET_STATE () { 52 }  sub PI_TARGET_STATE () { 52 }
148  sub PI_TARGET_AFTER_STATE () { 53 }  sub PI_TARGET_AFTER_STATE () { 53 }
149  sub PI_DATA_STATE () { 54 }  sub PI_DATA_STATE () { 54 }
150  sub PI_AFTER_STATE () { 55 }  sub PI_AFTER_STATE () { 55 }
151  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
# Line 186  sub _initialize_tokenizer ($) { Line 259  sub _initialize_tokenizer ($) {
259    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
260    
261    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
262    $self->{s_kwd} = ''; # state keyword    $self->{s_kwd} = ''; # Data state keyword
263      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
264    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
265    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
266    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 231  sub _initialize_tokenizer ($) { Line 305  sub _initialize_tokenizer ($) {
305  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
306  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
307  ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.  ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
308    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
309    
310  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
311  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
312  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 250  my $is_space = { Line 326  my $is_space = {
326    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
327    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
328    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
329    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
330    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
331    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
332  };  };
# Line 530  sub _get_next_token ($) { Line 606  sub _get_next_token ($) {
606            redo A;            redo A;
607          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
608                        
609            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
610            #            #
611          } else {          } else {
612                        
613              $self->{s_kwd} = '';
614            #            #
615          }          }
616    
617          ## reconsume          ## reconsume
618          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
         $self->{s_kwd} = '';  
619          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
620                    line => $self->{line_prev},                    line => $self->{line_prev},
621                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 720  sub _get_next_token ($) { Line 796  sub _get_next_token ($) {
796        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
797          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
798            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
799            $self->{s_kwd} = '';            $self->{kwd} = '';
800            ## Reconsume.            ## Reconsume.
801            redo A;            redo A;
802          } else {          } else {
# Line 873  sub _get_next_token ($) { Line 949  sub _get_next_token ($) {
949          redo A;          redo A;
950        }        }
951      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
952        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
953        if (length $ch) {        if (length $ch) {
954          my $CH = $ch;          my $CH = $ch;
955          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 881  sub _get_next_token ($) { Line 957  sub _get_next_token ($) {
957          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
958                        
959            ## Stay in the state.            ## Stay in the state.
960            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
961                        
962      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
963        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 900  sub _get_next_token ($) { Line 976  sub _get_next_token ($) {
976            $self->{s_kwd} = '';            $self->{s_kwd} = '';
977            ## Reconsume.            ## Reconsume.
978            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
979                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
980                      line => $self->{line_prev},                      line => $self->{line_prev},
981                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
982                     });                     });
983            redo A;            redo A;
984          }          }
# Line 918  sub _get_next_token ($) { Line 994  sub _get_next_token ($) {
994            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
995            $self->{s_kwd} = '';            $self->{s_kwd} = '';
996            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
997                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
998                      line => $self->{line_prev},                      line => $self->{line_prev},
999                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
1000                     });                     });
1001            redo A;            redo A;
1002          } else {          } else {
# Line 929  sub _get_next_token ($) { Line 1005  sub _get_next_token ($) {
1005                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
1006                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
1007                   line => $self->{line_prev},                   line => $self->{line_prev},
1008                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
1009            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
1010            ## Reconsume.            ## Reconsume.
1011            redo A;            redo A;
# Line 1691  sub _get_next_token ($) { Line 1767  sub _get_next_token ($) {
1767          redo A;          redo A;
1768        }        }
1769      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1770        ## XML5: "Tag attribute value double quoted state".        ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1771          ## ATTLIST attribute value double quoted state".
1772                
1773        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1774                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1775          ## XML5: "Tag attribute name before state".            
1776          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1777              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1778              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1779            } else {
1780              
1781              ## XML5: "Tag attribute name before state".
1782              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1783            }
1784                    
1785      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1786        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1737  sub _get_next_token ($) { Line 1821  sub _get_next_token ($) {
1821          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1822                        
1823            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1824    
1825              $self->{state} = DATA_STATE;
1826              $self->{s_kwd} = '';
1827              ## reconsume
1828              return  ($self->{ct}); # start tag
1829              redo A;
1830          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1831            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1832            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1746  sub _get_next_token ($) { Line 1836  sub _get_next_token ($) {
1836              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1837                            
1838            }            }
1839    
1840              $self->{state} = DATA_STATE;
1841              $self->{s_kwd} = '';
1842              ## reconsume
1843              return  ($self->{ct}); # end tag
1844              redo A;
1845            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1846              ## XML5: No parse error above; not defined yet.
1847              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1848              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1849              ## Reconsume.
1850              return  ($self->{ct}); # ATTLIST
1851              redo A;
1852          } else {          } else {
1853            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1854          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1855        } else {        } else {
1856            ## XML5 [ATTLIST]: Not defined yet.
1857          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1858                        
1859            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1784  sub _get_next_token ($) { Line 1881  sub _get_next_token ($) {
1881          redo A;          redo A;
1882        }        }
1883      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1884        ## XML5: "Tag attribute value single quoted state".        ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1885          ## ATTLIST attribute value single quoted state".
1886    
1887        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1888                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1889          ## XML5: "Before attribute name state" (sic).            
1890          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            ## XML5: "DOCTYPE ATTLIST name after state".
1891              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1892              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1893            } else {
1894              
1895              ## XML5: "Before attribute name state" (sic).
1896              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1897            }
1898                    
1899      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1900        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1830  sub _get_next_token ($) { Line 1935  sub _get_next_token ($) {
1935          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1936                        
1937            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1938    
1939              $self->{state} = DATA_STATE;
1940              $self->{s_kwd} = '';
1941              ## reconsume
1942              return  ($self->{ct}); # start tag
1943              redo A;
1944          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1945            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1946            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1839  sub _get_next_token ($) { Line 1950  sub _get_next_token ($) {
1950              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1951                            
1952            }            }
1953    
1954              $self->{state} = DATA_STATE;
1955              $self->{s_kwd} = '';
1956              ## reconsume
1957              return  ($self->{ct}); # end tag
1958              redo A;
1959            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1960              ## XML5: No parse error above; not defined yet.
1961              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1962              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1963              ## Reconsume.
1964              return  ($self->{ct}); # ATTLIST
1965              redo A;
1966          } else {          } else {
1967            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1968          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1969        } else {        } else {
1970            ## XML5 [ATTLIST]: Not defined yet.
1971          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <          if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1972                        
1973            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1880  sub _get_next_token ($) { Line 1998  sub _get_next_token ($) {
1998        ## XML5: "Tag attribute value unquoted state".        ## XML5: "Tag attribute value unquoted state".
1999    
2000        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2001                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2002          ## XML5: "Tag attribute name before state".            
2003          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2004              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2005            } else {
2006              
2007              ## XML5: "Tag attribute name before state".
2008              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2009            }
2010                    
2011      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2012        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1923  sub _get_next_token ($) { Line 2047  sub _get_next_token ($) {
2047          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2048                        
2049            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2050    
2051              $self->{state} = DATA_STATE;
2052              $self->{s_kwd} = '';
2053              
2054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2055          $self->{line_prev} = $self->{line};
2056          $self->{column_prev} = $self->{column};
2057          $self->{column}++;
2058          $self->{nc}
2059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2060        } else {
2061          $self->{set_nc}->($self);
2062        }
2063      
2064              return  ($self->{ct}); # start tag
2065              redo A;
2066          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2067            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2068            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1932  sub _get_next_token ($) { Line 2072  sub _get_next_token ($) {
2072              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2073                            
2074            }            }
2075          } else {  
2076            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2077          }            $self->{s_kwd} = '';
2078          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2079      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2080        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2081        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1948  sub _get_next_token ($) { Line 2086  sub _get_next_token ($) {
2086        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2087      }      }
2088        
2089              return  ($self->{ct}); # end tag
2090          return  ($self->{ct}); # start tag or end tag            redo A;
2091            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2092          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2093              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2094              
2095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2096          $self->{line_prev} = $self->{line};
2097          $self->{column_prev} = $self->{column};
2098          $self->{column}++;
2099          $self->{nc}
2100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2101        } else {
2102          $self->{set_nc}->($self);
2103        }
2104      
2105              return  ($self->{ct}); # ATTLIST
2106              redo A;
2107            } else {
2108              die "$0: $self->{ct}->{type}: Unknown token type";
2109            }
2110        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2111          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2112                        
2113              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2114            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2115    
2116              $self->{state} = DATA_STATE;
2117              $self->{s_kwd} = '';
2118              ## reconsume
2119              return  ($self->{ct}); # start tag
2120              redo A;
2121          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2122              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2123            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2124            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2125                            
# Line 1966  sub _get_next_token ($) { Line 2128  sub _get_next_token ($) {
2128              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2129                            
2130            }            }
2131    
2132              $self->{state} = DATA_STATE;
2133              $self->{s_kwd} = '';
2134              ## reconsume
2135              return  ($self->{ct}); # end tag
2136              redo A;
2137            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2138              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2139              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2140              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2141              ## Reconsume.
2142              return  ($self->{ct}); # ATTLIST
2143              redo A;
2144          } else {          } else {
2145            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2146          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2147        } else {        } else {
2148          if ({          if ({
2149               0x0022 => 1, # "               0x0022 => 1, # "
# Line 2168  sub _get_next_token ($) { Line 2336  sub _get_next_token ($) {
2336          redo A;          redo A;
2337        }        }
2338      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2339        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2340    
2341        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2342        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2343                
2344        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2345                    if ($self->{in_subset}) {
2346          $self->{state} = DATA_STATE;            
2347          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2348            } else {
2349              
2350              $self->{state} = DATA_STATE;
2351              $self->{s_kwd} = '';
2352            }
2353                    
2354      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2355        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2192  sub _get_next_token ($) { Line 2365  sub _get_next_token ($) {
2365          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2366          redo A;          redo A;
2367        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2368                    if ($self->{in_subset}) {
2369          $self->{state} = DATA_STATE;            
2370          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2371            } else {
2372              
2373              $self->{state} = DATA_STATE;
2374              $self->{s_kwd} = '';
2375            }
2376          ## reconsume          ## reconsume
2377    
2378          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2221  sub _get_next_token ($) { Line 2399  sub _get_next_token ($) {
2399          redo A;          redo A;
2400        }        }
2401      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2402        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2403                
2404        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2405                    
# Line 2243  sub _get_next_token ($) { Line 2421  sub _get_next_token ($) {
2421          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2422                    
2423          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2424          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2425                    
2426      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2427        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2262  sub _get_next_token ($) { Line 2440  sub _get_next_token ($) {
2440                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2441                                                    
2442          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2443          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2444                    
2445      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2446        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2332  sub _get_next_token ($) { Line 2510  sub _get_next_token ($) {
2510              0x0054, # T              0x0054, # T
2511              0x0059, # Y              0x0059, # Y
2512              0x0050, # P              0x0050, # P
2513            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2514            $self->{nc} == [            $self->{nc} == [
2515              undef,              undef,
2516              0x006F, # o              0x006F, # o
# Line 2340  sub _get_next_token ($) { Line 2518  sub _get_next_token ($) {
2518              0x0074, # t              0x0074, # t
2519              0x0079, # y              0x0079, # y
2520              0x0070, # p              0x0070, # p
2521            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2522                    
2523          ## Stay in the state.          ## Stay in the state.
2524          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2525                    
2526      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2527        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2356  sub _get_next_token ($) { Line 2534  sub _get_next_token ($) {
2534      }      }
2535        
2536          redo A;          redo A;
2537        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2538                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2539                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2540          if ($self->{s_kwd} ne 'DOCTYP') {          if ($self->{is_xml} and
2541                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2542                        
2543            ## XML5: case-sensitive.            ## XML5: case-sensitive.
2544            $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO            $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
# Line 2391  sub _get_next_token ($) { Line 2570  sub _get_next_token ($) {
2570                                    
2571          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2572                          line => $self->{line_prev},                          line => $self->{line_prev},
2573                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2574          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2575          ## Reconsume.          ## Reconsume.
2576          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2577                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2578                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2579                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2580                                   };                                   };
2581          redo A;          redo A;
2582        }        }
# Line 2408  sub _get_next_token ($) { Line 2587  sub _get_next_token ($) {
2587              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2588              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2589              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2590            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2591                    
2592          ## Stay in the state.          ## Stay in the state.
2593          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2594                    
2595      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2596        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2424  sub _get_next_token ($) { Line 2603  sub _get_next_token ($) {
2603      }      }
2604        
2605          redo A;          redo A;
2606        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2607                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2608          if ($self->{is_xml} and          if ($self->{is_xml} and
2609              not $self->{tainted} and              not $self->{tainted} and
# Line 2459  sub _get_next_token ($) { Line 2638  sub _get_next_token ($) {
2638                    
2639          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2640                          line => $self->{line_prev},                          line => $self->{line_prev},
2641                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2642          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2643          ## Reconsume.          ## Reconsume.
2644          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2645                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2646                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2647                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2648                                   };                                   };
2649          redo A;          redo A;
2650        }        }
# Line 2486  sub _get_next_token ($) { Line 2665  sub _get_next_token ($) {
2665        
2666          redo A;          redo A;
2667        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2668          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2669          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2670          $self->{s_kwd} = '';            
2671              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2672            } else {
2673              
2674              $self->{state} = DATA_STATE;
2675              $self->{s_kwd} = '';
2676            }
2677                    
2678      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2679        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2506  sub _get_next_token ($) { Line 2690  sub _get_next_token ($) {
2690    
2691          redo A;          redo A;
2692        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2693          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2694          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2695          $self->{s_kwd} = '';            
2696              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2697            } else {
2698              
2699              $self->{state} = DATA_STATE;
2700              $self->{s_kwd} = '';
2701            }
2702          ## reconsume          ## reconsume
2703    
2704          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2550  sub _get_next_token ($) { Line 2739  sub _get_next_token ($) {
2739        
2740          redo A;          redo A;
2741        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2742          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2743          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2744          $self->{s_kwd} = '';            
2745              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2746            } else {
2747              
2748              $self->{state} = DATA_STATE;
2749              $self->{s_kwd} = '';
2750            }
2751                    
2752      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2753        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2570  sub _get_next_token ($) { Line 2764  sub _get_next_token ($) {
2764    
2765          redo A;          redo A;
2766        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2767          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2768          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2769          $self->{s_kwd} = '';            
2770              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2771            } else {
2772              
2773              $self->{state} = DATA_STATE;
2774              $self->{s_kwd} = '';
2775            }
2776          ## reconsume          ## reconsume
2777    
2778          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2598  sub _get_next_token ($) { Line 2797  sub _get_next_token ($) {
2797          redo A;          redo A;
2798        }        }
2799      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2800          ## XML5: "Comment state" and "DOCTYPE comment state".
2801    
2802        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2803                    
2804          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2614  sub _get_next_token ($) { Line 2815  sub _get_next_token ($) {
2815        
2816          redo A;          redo A;
2817        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2818          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2819          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2820          $self->{s_kwd} = '';            
2821              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2822            } else {
2823              
2824              $self->{state} = DATA_STATE;
2825              $self->{s_kwd} = '';
2826            }
2827          ## reconsume          ## reconsume
2828    
2829          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2645  sub _get_next_token ($) { Line 2851  sub _get_next_token ($) {
2851          redo A;          redo A;
2852        }        }
2853      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2854        ## XML5: "comment dash state".        ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2855    
2856        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2857                    
# Line 2663  sub _get_next_token ($) { Line 2869  sub _get_next_token ($) {
2869        
2870          redo A;          redo A;
2871        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2872          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2873          $self->{s_kwd} = '';          if ($self->{in_subset}) {
2874          $self->{state} = DATA_STATE;            
2875          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2876            } else {
2877              
2878              $self->{state} = DATA_STATE;
2879              $self->{s_kwd} = '';
2880            }
2881          ## reconsume          ## reconsume
2882    
2883          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2691  sub _get_next_token ($) { Line 2901  sub _get_next_token ($) {
2901          redo A;          redo A;
2902        }        }
2903      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2904          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2905    
2906        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2907                    if ($self->{in_subset}) {
2908          $self->{state} = DATA_STATE;            
2909          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2910            } else {
2911              
2912              $self->{state} = DATA_STATE;
2913              $self->{s_kwd} = '';
2914            }
2915                    
2916      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2917        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2731  sub _get_next_token ($) { Line 2948  sub _get_next_token ($) {
2948        
2949          redo A;          redo A;
2950        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2951          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2952          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2953          $self->{s_kwd} = '';            
2954              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2955            } else {
2956              
2957              $self->{state} = DATA_STATE;
2958              $self->{s_kwd} = '';
2959            }
2960          ## reconsume          ## reconsume
2961    
2962          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2779  sub _get_next_token ($) { Line 3001  sub _get_next_token ($) {
3001          redo A;          redo A;
3002        } else {        } else {
3003                    
3004            ## XML5: Unless EOF, swith to the bogus comment state.
3005          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3006          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3007          ## reconsume          ## reconsume
3008          redo A;          redo A;
3009        }        }
3010      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
3011          ## XML5: "DOCTYPE root name before state".
3012    
3013        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3014                    
3015          ## Stay in the state          ## Stay in the state
# Line 2802  sub _get_next_token ($) { Line 3027  sub _get_next_token ($) {
3027          redo A;          redo A;
3028        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3029                    
3030            ## XML5: No parse error.
3031          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3032          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3033          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 2830  sub _get_next_token ($) { Line 3056  sub _get_next_token ($) {
3056          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3057    
3058          redo A;          redo A;
3059          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3060            
3061            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3062            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3063            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3064            $self->{in_subset} = 1;
3065            
3066        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3067          $self->{line_prev} = $self->{line};
3068          $self->{column_prev} = $self->{column};
3069          $self->{column}++;
3070          $self->{nc}
3071              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3072        } else {
3073          $self->{set_nc}->($self);
3074        }
3075      
3076            return  ($self->{ct}); # DOCTYPE
3077            redo A;
3078        } else {        } else {
3079                    
3080          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2849  sub _get_next_token ($) { Line 3094  sub _get_next_token ($) {
3094          redo A;          redo A;
3095        }        }
3096      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3097  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3098    
3099          ## ISSUE: Redundant "First," in the spec.
3100    
3101        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3102                    
3103          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2895  sub _get_next_token ($) { Line 3143  sub _get_next_token ($) {
3143          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3144    
3145          redo A;          redo A;
3146          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3147            
3148            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3149            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3150            $self->{in_subset} = 1;
3151            
3152        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3153          $self->{line_prev} = $self->{line};
3154          $self->{column_prev} = $self->{column};
3155          $self->{column}++;
3156          $self->{nc}
3157              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3158        } else {
3159          $self->{set_nc}->($self);
3160        }
3161      
3162            return  ($self->{ct}); # DOCTYPE
3163            redo A;
3164        } else {        } else {
3165                    
3166          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2914  sub _get_next_token ($) { Line 3180  sub _get_next_token ($) {
3180          redo A;          redo A;
3181        }        }
3182      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3183          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3184          ## state", but implemented differently.
3185    
3186        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3187                    
3188          ## Stay in the state          ## Stay in the state
# Line 2930  sub _get_next_token ($) { Line 3199  sub _get_next_token ($) {
3199        
3200          redo A;          redo A;
3201        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3202            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3203              
3204              $self->{state} = DATA_STATE;
3205              $self->{s_kwd} = '';
3206            } else {
3207              
3208              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3209              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3210            }
3211                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3212                    
3213      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3214        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2944  sub _get_next_token ($) { Line 3220  sub _get_next_token ($) {
3220        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3221      }      }
3222        
3223            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3224          redo A;          redo A;
3225        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3226            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3227              
3228              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3229              $self->{state} = DATA_STATE;
3230              $self->{s_kwd} = '';
3231              $self->{ct}->{quirks} = 1;
3232            } else {
3233              
3234              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3235              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3236            }
3237                    
3238          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3239          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3240          redo A;          redo A;
3241        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3242                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3243            
3244          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3245          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3246                    
3247      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3248        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2977  sub _get_next_token ($) { Line 3257  sub _get_next_token ($) {
3257          redo A;          redo A;
3258        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3259                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3260            
3261          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3262          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3263                    
3264      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3265        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2991  sub _get_next_token ($) { Line 3272  sub _get_next_token ($) {
3272      }      }
3273        
3274          redo A;          redo A;
3275        } else {        } elsif ($self->{nc} == 0x0022 and # "
3276                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3277                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3278                    
3279          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3280          $self->{ct}->{quirks} = 1;          $self->{ct}->{value} = ''; # ENTITY
3281            
3282        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3283          $self->{line_prev} = $self->{line};
3284          $self->{column_prev} = $self->{column};
3285          $self->{column}++;
3286          $self->{nc}
3287              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3288        } else {
3289          $self->{set_nc}->($self);
3290        }
3291      
3292            redo A;
3293          } elsif ($self->{nc} == 0x0027 and # '
3294                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3295                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3296            
3297            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3298            $self->{ct}->{value} = ''; # ENTITY
3299            
3300        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3301          $self->{line_prev} = $self->{line};
3302          $self->{column_prev} = $self->{column};
3303          $self->{column}++;
3304          $self->{nc}
3305              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3306        } else {
3307          $self->{set_nc}->($self);
3308        }
3309      
3310            redo A;
3311          } elsif ($self->{is_xml} and
3312                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3313                   $self->{nc} == 0x005B) { # [
3314            
3315            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3316            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3317            $self->{in_subset} = 1;
3318            
3319        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3320          $self->{line_prev} = $self->{line};
3321          $self->{column_prev} = $self->{column};
3322          $self->{column}++;
3323          $self->{nc}
3324              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3325        } else {
3326          $self->{set_nc}->($self);
3327        }
3328      
3329            return  ($self->{ct}); # DOCTYPE
3330            redo A;
3331          } else {
3332            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3333    
3334            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3335              
3336              $self->{ct}->{quirks} = 1;
3337              $self->{state} = BOGUS_DOCTYPE_STATE;
3338            } else {
3339              
3340              $self->{state} = BOGUS_MD_STATE;
3341            }
3342    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3343                    
3344      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3345        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3018  sub _get_next_token ($) { Line 3361  sub _get_next_token ($) {
3361              0x0042, # B              0x0042, # B
3362              0x004C, # L              0x004C, # L
3363              0x0049, # I              0x0049, # I
3364            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3365            $self->{nc} == [            $self->{nc} == [
3366              undef,              undef,
3367              0x0075, # u              0x0075, # u
3368              0x0062, # b              0x0062, # b
3369              0x006C, # l              0x006C, # l
3370              0x0069, # i              0x0069, # i
3371            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3372                    
3373          ## Stay in the state.          ## Stay in the state.
3374          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3375                    
3376      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3377        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3041  sub _get_next_token ($) { Line 3384  sub _get_next_token ($) {
3384      }      }
3385        
3386          redo A;          redo A;
3387        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3388                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3389                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3390                    if ($self->{is_xml} and
3391                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3392              
3393              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3394                              text => 'PUBLIC',
3395                              line => $self->{line_prev},
3396                              column => $self->{column_prev} - 4);
3397            } else {
3398              
3399            }
3400          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3401                    
3402      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3059  sub _get_next_token ($) { Line 3411  sub _get_next_token ($) {
3411        
3412          redo A;          redo A;
3413        } else {        } else {
3414                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3415                          line => $self->{line_prev},                          line => $self->{line_prev},
3416                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3417          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3418              
3419          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3420              $self->{state} = BOGUS_DOCTYPE_STATE;
3421            } else {
3422              
3423              $self->{state} = BOGUS_MD_STATE;
3424            }
3425          ## Reconsume.          ## Reconsume.
3426          redo A;          redo A;
3427        }        }
# Line 3077  sub _get_next_token ($) { Line 3433  sub _get_next_token ($) {
3433              0x0053, # S              0x0053, # S
3434              0x0054, # T              0x0054, # T
3435              0x0045, # E              0x0045, # E
3436            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3437            $self->{nc} == [            $self->{nc} == [
3438              undef,              undef,
3439              0x0079, # y              0x0079, # y
3440              0x0073, # s              0x0073, # s
3441              0x0074, # t              0x0074, # t
3442              0x0065, # e              0x0065, # e
3443            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3444                    
3445          ## Stay in the state.          ## Stay in the state.
3446          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3447                    
3448      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3449        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3100  sub _get_next_token ($) { Line 3456  sub _get_next_token ($) {
3456      }      }
3457        
3458          redo A;          redo A;
3459        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3460                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3461                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3462                    if ($self->{is_xml} and
3463                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3464              
3465              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3466                              text => 'SYSTEM',
3467                              line => $self->{line_prev},
3468                              column => $self->{column_prev} - 4);
3469            } else {
3470              
3471            }
3472          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3473                    
3474      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3118  sub _get_next_token ($) { Line 3483  sub _get_next_token ($) {
3483        
3484          redo A;          redo A;
3485        } else {        } else {
3486                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3487                          line => $self->{line_prev},                          line => $self->{line_prev},
3488                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3489          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3490              
3491          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3492              $self->{state} = BOGUS_DOCTYPE_STATE;
3493            } else {
3494              
3495              $self->{state} = BOGUS_MD_STATE;
3496            }
3497          ## Reconsume.          ## Reconsume.
3498          redo A;          redo A;
3499        }        }
# Line 3177  sub _get_next_token ($) { Line 3546  sub _get_next_token ($) {
3546        
3547          redo A;          redo A;
3548        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3549          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3550            
3551          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3552          $self->{s_kwd} = '';            
3553              $self->{state} = DATA_STATE;
3554              $self->{s_kwd} = '';
3555              $self->{ct}->{quirks} = 1;
3556            } else {
3557              
3558              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3559            }
3560            
3561                    
3562      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3563        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3193  sub _get_next_token ($) { Line 3569  sub _get_next_token ($) {
3569        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3570      }      }
3571        
3572            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3573          redo A;          redo A;
3574        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3575            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3576              
3577              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3578              $self->{state} = DATA_STATE;
3579              $self->{s_kwd} = '';
3580              $self->{ct}->{quirks} = 1;
3581            } else {
3582              
3583              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3584              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3585            }
3586                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3587          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3588          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3589          redo A;          redo A;
3590        } else {        } elsif ($self->{is_xml} and
3591                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3592                   $self->{nc} == 0x005B) { # [
3593                    
3594            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3595            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3596            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3597            $self->{in_subset} = 1;
3598            
3599        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3600          $self->{line_prev} = $self->{line};
3601          $self->{column_prev} = $self->{column};
3602          $self->{column}++;
3603          $self->{nc}
3604              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3605        } else {
3606          $self->{set_nc}->($self);
3607        }
3608      
3609            return  ($self->{ct}); # DOCTYPE
3610            redo A;
3611          } else {
3612          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3613    
3614          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3615              
3616              $self->{ct}->{quirks} = 1;
3617              $self->{state} = BOGUS_DOCTYPE_STATE;
3618            } else {
3619              
3620              $self->{state} = BOGUS_MD_STATE;
3621            }
3622    
3623                    
3624      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3625        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3246  sub _get_next_token ($) { Line 3650  sub _get_next_token ($) {
3650        
3651          redo A;          redo A;
3652        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3653          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3654    
3655          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3656          $self->{s_kwd} = '';            
3657              $self->{state} = DATA_STATE;
3658              $self->{s_kwd} = '';
3659              $self->{ct}->{quirks} = 1;
3660            } else {
3661              
3662              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3663            }
3664    
3665                    
3666      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3667        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3262  sub _get_next_token ($) { Line 3673  sub _get_next_token ($) {
3673        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3674      }      }
3675        
3676            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3677          redo A;          redo A;
3678        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3679          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3680    
3681          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3682          $self->{s_kwd} = '';            
3683          ## reconsume            $self->{state} = DATA_STATE;
3684              $self->{s_kwd} = '';
3685          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3686            } else {
3687              
3688              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3689            }
3690            
3691            ## Reconsume.
3692          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3693          redo A;          redo A;
3694        } else {        } else {
3695                    
3696          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3697          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3698                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3699    
# Line 3317  sub _get_next_token ($) { Line 3728  sub _get_next_token ($) {
3728        
3729          redo A;          redo A;
3730        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3731          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3732    
3733          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3734          $self->{s_kwd} = '';            
3735              $self->{state} = DATA_STATE;
3736              $self->{s_kwd} = '';
3737              $self->{ct}->{quirks} = 1;
3738            } else {
3739              
3740              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3741            }
3742    
3743                    
3744      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3745        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3333  sub _get_next_token ($) { Line 3751  sub _get_next_token ($) {
3751        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3752      }      }
3753        
3754            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3755          redo A;          redo A;
3756        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3757          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3758    
3759          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3760          $self->{s_kwd} = '';            
3761              $self->{state} = DATA_STATE;
3762              $self->{s_kwd} = '';
3763              $self->{ct}->{quirks} = 1;
3764            } else {
3765              
3766              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3767            }
3768          
3769          ## reconsume          ## reconsume
3770            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3771          redo A;          redo A;
3772        } else {        } else {
3773                    
3774          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3775          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3776                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3777    
# Line 3389  sub _get_next_token ($) { Line 3807  sub _get_next_token ($) {
3807          redo A;          redo A;
3808        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3809                    
3810          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3811          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3812                    
3813      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3405  sub _get_next_token ($) { Line 3823  sub _get_next_token ($) {
3823          redo A;          redo A;
3824        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3825                    
3826          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3827          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3828                    
3829      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3420  sub _get_next_token ($) { Line 3838  sub _get_next_token ($) {
3838        
3839          redo A;          redo A;
3840        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3841            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3842              if ($self->{is_xml}) {
3843                
3844                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3845              } else {
3846                
3847              }
3848              $self->{state} = DATA_STATE;
3849              $self->{s_kwd} = '';
3850            } else {
3851              if ($self->{ct}->{type} == NOTATION_TOKEN) {
3852                
3853              } else {
3854                
3855                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3856              }
3857              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3858            }
3859                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3860                    
3861      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3862        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3434  sub _get_next_token ($) { Line 3868  sub _get_next_token ($) {
3868        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3869      }      }
3870        
3871            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3872          redo A;          redo A;
3873        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3874            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3875              
3876              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3877              
3878              $self->{state} = DATA_STATE;
3879              $self->{s_kwd} = '';
3880              $self->{ct}->{quirks} = 1;
3881            } else {
3882              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3883              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3884            }
3885                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3886          ## reconsume          ## reconsume
3887            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3888          $self->{ct}->{quirks} = 1;          redo A;
3889          } elsif ($self->{is_xml} and
3890                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3891                   $self->{nc} == 0x005B) { # [
3892            
3893            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3894            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3895            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3896            $self->{in_subset} = 1;
3897            
3898        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3899          $self->{line_prev} = $self->{line};
3900          $self->{column_prev} = $self->{column};
3901          $self->{column}++;
3902          $self->{nc}
3903              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3904        } else {
3905          $self->{set_nc}->($self);
3906        }
3907      
3908          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3909          redo A;          redo A;
3910        } else {        } else {
           
3911          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3912    
3913          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3914              
3915              $self->{ct}->{quirks} = 1;
3916              $self->{state} = BOGUS_DOCTYPE_STATE;
3917            } else {
3918              
3919              $self->{state} = BOGUS_MD_STATE;
3920            }
3921    
3922                    
3923      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3924        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3518  sub _get_next_token ($) { Line 3981  sub _get_next_token ($) {
3981        
3982          redo A;          redo A;
3983        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3984          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3985                    
3986      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3987        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3534  sub _get_next_token ($) { Line 3994  sub _get_next_token ($) {
3994      }      }
3995        
3996    
3997          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3998          return  ($self->{ct}); # DOCTYPE            
3999              $self->{state} = DATA_STATE;
4000              $self->{s_kwd} = '';
4001              $self->{ct}->{quirks} = 1;
4002            } else {
4003              
4004              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4005            }
4006    
4007            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4008          redo A;          redo A;
4009        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4010            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4011              
4012              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4013              $self->{state} = DATA_STATE;
4014              $self->{s_kwd} = '';
4015              $self->{ct}->{quirks} = 1;
4016            } else {
4017              
4018              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4019              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4020            }
4021                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4022          ## reconsume          ## reconsume
4023            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4024            redo A;
4025          } elsif ($self->{is_xml} and
4026                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4027                   $self->{nc} == 0x005B) { # [
4028            
4029            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4030    
4031          $self->{ct}->{quirks} = 1;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4032            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4033            $self->{in_subset} = 1;
4034            
4035        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4036          $self->{line_prev} = $self->{line};
4037          $self->{column_prev} = $self->{column};
4038          $self->{column}++;
4039          $self->{nc}
4040              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4041        } else {
4042          $self->{set_nc}->($self);
4043        }
4044      
4045          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4046          redo A;          redo A;
4047        } else {        } else {
           
4048          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4049    
4050          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4051                        
4052              $self->{ct}->{quirks} = 1;
4053              $self->{state} = BOGUS_DOCTYPE_STATE;
4054            } else {
4055              
4056              $self->{state} = BOGUS_MD_STATE;
4057            }
4058    
4059                    
4060      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4061        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3585  sub _get_next_token ($) { Line 4085  sub _get_next_token ($) {
4085      }      }
4086        
4087          redo A;          redo A;
4088        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4089          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4090    
4091          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4092          $self->{s_kwd} = '';            
4093              $self->{state} = DATA_STATE;
4094              $self->{s_kwd} = '';
4095              $self->{ct}->{quirks} = 1;
4096            } else {
4097              
4098              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4099            }
4100            
4101                    
4102      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4103        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3602  sub _get_next_token ($) { Line 4109  sub _get_next_token ($) {
4109        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4110      }      }
4111        
4112            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4113          redo A;          redo A;
4114        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4115          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4116    
4117          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4118          $self->{s_kwd} = '';            
4119              $self->{state} = DATA_STATE;
4120              $self->{s_kwd} = '';
4121              $self->{ct}->{quirks} = 1;
4122            } else {
4123              
4124              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4125            }
4126            
4127          ## reconsume          ## reconsume
4128            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4129          redo A;          redo A;
4130        } else {        } else {
4131                    
4132          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4133          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4134                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4135    
# Line 3656  sub _get_next_token ($) { Line 4163  sub _get_next_token ($) {
4163      }      }
4164        
4165          redo A;          redo A;
4166        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4167                    
4168          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4169    
# Line 3679  sub _get_next_token ($) { Line 4186  sub _get_next_token ($) {
4186    
4187          redo A;          redo A;
4188        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4189          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4190    
4191          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4192          $self->{s_kwd} = '';            
4193          ## reconsume            $self->{state} = DATA_STATE;
4194              $self->{s_kwd} = '';
4195          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4196          return  ($self->{ct}); # DOCTYPE          } else {
4197              
4198              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4199            }
4200    
4201            ## reconsume
4202            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4203          redo A;          redo A;
4204        } else {        } else {
4205                    
4206          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4207          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4208                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4209    
# Line 3713  sub _get_next_token ($) { Line 4223  sub _get_next_token ($) {
4223        }        }
4224      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4225        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4226                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4227          ## Stay in the state            
4228              $self->{state} = BEFORE_NDATA_STATE;
4229            } else {
4230              
4231              ## Stay in the state
4232            }
4233                    
4234      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4235        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3728  sub _get_next_token ($) { Line 4243  sub _get_next_token ($) {
4243        
4244          redo A;          redo A;
4245        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4246            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4247              
4248              $self->{state} = DATA_STATE;
4249              $self->{s_kwd} = '';
4250            } else {
4251              
4252              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4253            }
4254    
4255                    
4256          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4257          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4258          $self->{column_prev} = $self->{column};
4259          $self->{column}++;
4260          $self->{nc}
4261              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4262        } else {
4263          $self->{set_nc}->($self);
4264        }
4265      
4266            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4267            redo A;
4268          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4269                   ($self->{nc} == 0x004E or # N
4270                    $self->{nc} == 0x006E)) { # n
4271            
4272            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4273            $self->{state} = NDATA_STATE;
4274            $self->{kwd} = chr $self->{nc};
4275                    
4276      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4277        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3742  sub _get_next_token ($) { Line 4283  sub _get_next_token ($) {
4283        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4284      }      }
4285        
4286            redo A;
4287          } elsif ($self->{nc} == -1) {
4288            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4289              
4290              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4291              $self->{state} = DATA_STATE;
4292              $self->{s_kwd} = '';
4293              $self->{ct}->{quirks} = 1;
4294            } else {
4295              
4296              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4297              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4298            }
4299    
4300            ## reconsume
4301            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4302            redo A;
4303          } elsif ($self->{is_xml} and
4304                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4305                   $self->{nc} == 0x005B) { # [
4306            
4307            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4308            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4309            $self->{in_subset} = 1;
4310            
4311        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4312          $self->{line_prev} = $self->{line};
4313          $self->{column_prev} = $self->{column};
4314          $self->{column}++;
4315          $self->{nc}
4316              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4317        } else {
4318          $self->{set_nc}->($self);
4319        }
4320      
4321          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4322            redo A;
4323          } else {
4324            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4325    
4326            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4327              
4328              #$self->{ct}->{quirks} = 1;
4329              $self->{state} = BOGUS_DOCTYPE_STATE;
4330            } else {
4331              
4332              $self->{state} = BOGUS_MD_STATE;
4333            }
4334    
4335            
4336        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4337          $self->{line_prev} = $self->{line};
4338          $self->{column_prev} = $self->{column};
4339          $self->{column}++;
4340          $self->{nc}
4341              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4342        } else {
4343          $self->{set_nc}->($self);
4344        }
4345      
4346            redo A;
4347          }
4348        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4349          if ($is_space->{$self->{nc}}) {
4350            
4351            ## Stay in the state.
4352            
4353        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4354          $self->{line_prev} = $self->{line};
4355          $self->{column_prev} = $self->{column};
4356          $self->{column}++;
4357          $self->{nc}
4358              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4359        } else {
4360          $self->{set_nc}->($self);
4361        }
4362      
4363            redo A;
4364          } elsif ($self->{nc} == 0x003E) { # >
4365            
4366            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4367            
4368        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4369          $self->{line_prev} = $self->{line};
4370          $self->{column_prev} = $self->{column};
4371          $self->{column}++;
4372          $self->{nc}
4373              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4374        } else {
4375          $self->{set_nc}->($self);
4376        }
4377      
4378            return  ($self->{ct}); # ENTITY
4379            redo A;
4380          } elsif ($self->{nc} == 0x004E or # N
4381                   $self->{nc} == 0x006E) { # n
4382            
4383            $self->{state} = NDATA_STATE;
4384            $self->{kwd} = chr $self->{nc};
4385            
4386        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4387          $self->{line_prev} = $self->{line};
4388          $self->{column_prev} = $self->{column};
4389          $self->{column}++;
4390          $self->{nc}
4391              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4392        } else {
4393          $self->{set_nc}->($self);
4394        }
4395      
4396          redo A;          redo A;
4397        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4398                    
4399          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4400          $self->{state} = DATA_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
         $self->{s_kwd} = '';  
4401          ## reconsume          ## reconsume
4402            return  ($self->{ct}); # ENTITY
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4403          redo A;          redo A;
4404        } else {        } else {
4405                    
4406          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4407          #$self->{ct}->{quirks} = 1;          $self->{state} = BOGUS_MD_STATE;
   
         $self->{state} = BOGUS_DOCTYPE_STATE;  
4408                    
4409      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4410        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3796  sub _get_next_token ($) { Line 4438  sub _get_next_token ($) {
4438          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4439    
4440          redo A;          redo A;
4441          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4442            
4443            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4444            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4445            $self->{in_subset} = 1;
4446            
4447        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4448          $self->{line_prev} = $self->{line};
4449          $self->{column_prev} = $self->{column};
4450          $self->{column}++;
4451          $self->{nc}
4452              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4453        } else {
4454          $self->{set_nc}->($self);
4455        }
4456      
4457            return  ($self->{ct}); # DOCTYPE
4458            redo A;
4459        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4460                    
4461          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3808  sub _get_next_token ($) { Line 4468  sub _get_next_token ($) {
4468        } else {        } else {
4469                    
4470          my $s = '';          my $s = '';
4471          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4472    
4473          ## Stay in the state          ## Stay in the state
4474                    
# Line 3976  sub _get_next_token ($) { Line 4636  sub _get_next_token ($) {
4636        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4637                    
4638          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4639          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4640                    
4641      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4642        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3996  sub _get_next_token ($) { Line 4656  sub _get_next_token ($) {
4656                    
4657          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4658          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4659          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4660          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4661          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4662                    
4663      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4043  sub _get_next_token ($) { Line 4703  sub _get_next_token ($) {
4703          redo A;          redo A;
4704        }        }
4705      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4706        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
4707            $self->{nc} == 0x0058) { # X          
4708            $self->{state} = HEXREF_X_STATE;
4709            $self->{kwd} .= chr $self->{nc};
4710            
4711        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4712          $self->{line_prev} = $self->{line};
4713          $self->{column_prev} = $self->{column};
4714          $self->{column}++;
4715          $self->{nc}
4716              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4717        } else {
4718          $self->{set_nc}->($self);
4719        }
4720      
4721            redo A;
4722          } elsif ($self->{nc} == 0x0058) { # X
4723                    
4724            if ($self->{is_xml}) {
4725              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4726            }
4727          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4728          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4729                    
4730      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4731        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4064  sub _get_next_token ($) { Line 4742  sub _get_next_token ($) {
4742                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4743                    
4744          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4745          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4746                    
4747      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4748        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4110  sub _get_next_token ($) { Line 4788  sub _get_next_token ($) {
4788        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4789            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4790                    
4791          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4792          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4793                    
4794          ## Stay in the state.          ## Stay in the state.
4795                    
# Line 4147  sub _get_next_token ($) { Line 4825  sub _get_next_token ($) {
4825          #          #
4826        }        }
4827    
4828        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4829        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4830        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4831        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4190  sub _get_next_token ($) { Line 4868  sub _get_next_token ($) {
4868          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4869                    
4870          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4871          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4872          ## Reconsume.          ## Reconsume.
4873          redo A;          redo A;
4874        } else {        } else {
# Line 4208  sub _get_next_token ($) { Line 4886  sub _get_next_token ($) {
4886            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4887            ## Reconsume.            ## Reconsume.
4888            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4889                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4890                      line => $self->{line_prev},                      line => $self->{line_prev},
4891                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4892                     });                     });
4893            redo A;            redo A;
4894          } else {          } else {
4895                        
4896            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4897            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4898            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4899            ## Reconsume.            ## Reconsume.
# Line 4226  sub _get_next_token ($) { Line 4904  sub _get_next_token ($) {
4904        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4905          # 0..9          # 0..9
4906                    
4907          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4908          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4909          ## Stay in the state.          ## Stay in the state.
4910                    
4911      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4244  sub _get_next_token ($) { Line 4922  sub _get_next_token ($) {
4922        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4923                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4924                    
4925          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4926          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4927          ## Stay in the state.          ## Stay in the state.
4928                    
4929      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4262  sub _get_next_token ($) { Line 4940  sub _get_next_token ($) {
4940        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4941                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4942                    
4943          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4944          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4945          ## Stay in the state.          ## Stay in the state.
4946                    
4947      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4300  sub _get_next_token ($) { Line 4978  sub _get_next_token ($) {
4978          #          #
4979        }        }
4980    
4981        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4982        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4983        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4984        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4337  sub _get_next_token ($) { Line 5015  sub _get_next_token ($) {
5015          redo A;          redo A;
5016        }        }
5017      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5018        if (length $self->{s_kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5019            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5020            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5021              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5022             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5023              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5024             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B) { # ;
             $self->{nc} <= 0x0039) or # 9  
            $self->{nc} == 0x003B)) { # ;  
5025          our $EntityChar;          our $EntityChar;
5026          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5027          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5028                $self->{ge}->{$self->{kwd}}) {
5029            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5030                            if (defined $self->{ge}->{$self->{kwd}}) {
5031              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5032                    
5033                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5034                  } else {
5035                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5036                      
5037                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5038                                      value => $self->{kwd});
5039                    } else {
5040                      
5041                    }
5042                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5043                  }
5044                } else {
5045                  if ($self->{is_xml}) {
5046                    
5047                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5048                                    value => $self->{kwd},
5049                                    level => {
5050                                              'amp;' => $self->{level}->{warn},
5051                                              'quot;' => $self->{level}->{warn},
5052                                              'lt;' => $self->{level}->{warn},
5053                                              'gt;' => $self->{level}->{warn},
5054                                              'apos;' => $self->{level}->{warn},
5055                                             }->{$self->{kwd}} ||
5056                                             $self->{level}->{must});
5057                  } else {
5058                    
5059                  }
5060                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5061                }
5062              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5063                            
5064      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4367  sub _get_next_token ($) { Line 5074  sub _get_next_token ($) {
5074              #              #
5075            } else {            } else {
5076                            
5077              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
5078              $self->{entity__match} = -1;              $self->{entity__match} = -1;
5079              ## Stay in the state.              ## Stay in the state.
5080                            
# Line 4415  sub _get_next_token ($) { Line 5122  sub _get_next_token ($) {
5122          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
5123              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
5124                        
5125            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
5126            #            #
5127          } else {          } else {
5128                        
# Line 4427  sub _get_next_token ($) { Line 5134  sub _get_next_token ($) {
5134                    
5135          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
5136                          line => $self->{line_prev},                          line => $self->{line_prev},
5137                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
5138          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
5139          #          #
5140        }        }
5141        
# Line 4451  sub _get_next_token ($) { Line 5158  sub _get_next_token ($) {
5158                    data => $data,                    data => $data,
5159                    has_reference => $has_ref,                    has_reference => $has_ref,
5160                    line => $self->{line_prev},                    line => $self->{line_prev},
5161                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
5162                   });                   });
5163          redo A;          redo A;
5164        } else {        } else {
# Line 4467  sub _get_next_token ($) { Line 5174  sub _get_next_token ($) {
5174      ## XML-only states      ## XML-only states
5175    
5176      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
5177          ## XML5: "Pi state" and "DOCTYPE pi state".
5178    
5179        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
5180            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
5181            $self->{nc} == -1) {            $self->{nc} == -1) {
5182            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5183            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5184            ## "DOCTYPE pi state": Parse error, switch to the "data
5185            ## state".
5186          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5187                          line => $self->{line_prev},                          line => $self->{line_prev},
5188                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 4484  sub _get_next_token ($) { Line 5197  sub _get_next_token ($) {
5197                        };                        };
5198          redo A;          redo A;
5199        } else {        } else {
5200            ## XML5: "DOCTYPE pi state": Stay in the state.
5201          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
5202                         target => chr $self->{nc},                         target => chr $self->{nc},
5203                         data => '',                         data => '',
# Line 4521  sub _get_next_token ($) { Line 5235  sub _get_next_token ($) {
5235          redo A;          redo A;
5236        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5237          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5238          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5239          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5240            } else {
5241              $self->{state} = DATA_STATE;
5242              $self->{s_kwd} = '';
5243            }
5244          ## Reconsume.          ## Reconsume.
5245          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5246          redo A;          redo A;
# Line 4593  sub _get_next_token ($) { Line 5311  sub _get_next_token ($) {
5311          redo A;          redo A;
5312        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5313          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5314          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5315          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5316            } else {
5317              $self->{state} = DATA_STATE;
5318              $self->{s_kwd} = '';
5319            }
5320          ## Reprocess.          ## Reprocess.
5321          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5322          redo A;          redo A;
# Line 4618  sub _get_next_token ($) { Line 5340  sub _get_next_token ($) {
5340          redo A;          redo A;
5341        }        }
5342      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
5343          ## XML5: Part of "Pi after state".
5344    
5345        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5346          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5347          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5348            } else {
5349              $self->{state} = DATA_STATE;
5350              $self->{s_kwd} = '';
5351            }
5352                    
5353      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5354        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4663  sub _get_next_token ($) { Line 5391  sub _get_next_token ($) {
5391          redo A;          redo A;
5392        }        }
5393      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5394        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5395    
5396        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5397          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5398          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5399            } else {
5400              $self->{state} = DATA_STATE;
5401              $self->{s_kwd} = '';
5402            }
5403                    
5404      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5405        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4701  sub _get_next_token ($) { Line 5434  sub _get_next_token ($) {
5434          ## Reprocess.          ## Reprocess.
5435          redo A;          redo A;
5436        }        }
5437    
5438        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5439          if ($self->{nc} == 0x003C) { # <
5440            $self->{state} = DOCTYPE_TAG_STATE;
5441                    
5442        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5443          $self->{line_prev} = $self->{line};
5444          $self->{column_prev} = $self->{column};
5445          $self->{column}++;
5446          $self->{nc}
5447              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5448        } else {
5449          $self->{set_nc}->($self);
5450        }
5451      
5452            redo A;
5453          } elsif ($self->{nc} == 0x0025) { # %
5454            ## XML5: Not defined yet.
5455    
5456            ## TODO:
5457            
5458        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5459          $self->{line_prev} = $self->{line};
5460          $self->{column_prev} = $self->{column};
5461          $self->{column}++;
5462          $self->{nc}
5463              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5464        } else {
5465          $self->{set_nc}->($self);
5466        }
5467      
5468            redo A;
5469          } elsif ($self->{nc} == 0x005D) { # ]
5470            delete $self->{in_subset};
5471            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5472            
5473        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5474          $self->{line_prev} = $self->{line};
5475          $self->{column_prev} = $self->{column};
5476          $self->{column}++;
5477          $self->{nc}
5478              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5479        } else {
5480          $self->{set_nc}->($self);
5481        }
5482      
5483            redo A;
5484          } elsif ($is_space->{$self->{nc}}) {
5485            ## Stay in the state.
5486            
5487        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5488          $self->{line_prev} = $self->{line};
5489          $self->{column_prev} = $self->{column};
5490          $self->{column}++;
5491          $self->{nc}
5492              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5493        } else {
5494          $self->{set_nc}->($self);
5495        }
5496      
5497            redo A;
5498          } elsif ($self->{nc} == -1) {
5499            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5500            delete $self->{in_subset};
5501            $self->{state} = DATA_STATE;
5502            $self->{s_kwd} = '';
5503            ## Reconsume.
5504            return  ({type => END_OF_DOCTYPE_TOKEN});
5505            redo A;
5506          } else {
5507            unless ($self->{internal_subset_tainted}) {
5508              ## XML5: No parse error.
5509              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5510              $self->{internal_subset_tainted} = 1;
5511            }
5512            ## Stay in the state.
5513            
5514        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5515          $self->{line_prev} = $self->{line};
5516          $self->{column_prev} = $self->{column};
5517          $self->{column}++;
5518          $self->{nc}
5519              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5520        } else {
5521          $self->{set_nc}->($self);
5522        }
5523      
5524            redo A;
5525          }
5526        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5527          if ($self->{nc} == 0x003E) { # >
5528            $self->{state} = DATA_STATE;
5529            $self->{s_kwd} = '';
5530            
5531        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5532          $self->{line_prev} = $self->{line};
5533          $self->{column_prev} = $self->{column};
5534          $self->{column}++;
5535          $self->{nc}
5536              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5537        } else {
5538          $self->{set_nc}->($self);
5539        }
5540      
5541            return  ({type => END_OF_DOCTYPE_TOKEN});
5542            redo A;
5543          } elsif ($self->{nc} == -1) {
5544            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5545            $self->{state} = DATA_STATE;
5546            $self->{s_kwd} = '';
5547            ## Reconsume.
5548            return  ({type => END_OF_DOCTYPE_TOKEN});
5549            redo A;
5550          } else {
5551            ## XML5: No parse error and stay in the state.
5552            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5553    
5554            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5555            
5556        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5557          $self->{line_prev} = $self->{line};
5558          $self->{column_prev} = $self->{column};
5559          $self->{column}++;
5560          $self->{nc}
5561              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5562        } else {
5563          $self->{set_nc}->($self);
5564        }
5565      
5566            redo A;
5567          }
5568        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5569          if ($self->{nc} == 0x003E) { # >
5570            $self->{state} = DATA_STATE;
5571            $self->{s_kwd} = '';
5572            
5573        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5574          $self->{line_prev} = $self->{line};
5575          $self->{column_prev} = $self->{column};
5576          $self->{column}++;
5577          $self->{nc}
5578              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5579        } else {
5580          $self->{set_nc}->($self);
5581        }
5582      
5583            return  ({type => END_OF_DOCTYPE_TOKEN});
5584            redo A;
5585          } elsif ($self->{nc} == -1) {
5586            $self->{state} = DATA_STATE;
5587            $self->{s_kwd} = '';
5588            ## Reconsume.
5589            return  ({type => END_OF_DOCTYPE_TOKEN});
5590            redo A;
5591          } else {
5592            ## Stay in the state.
5593            
5594        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5595          $self->{line_prev} = $self->{line};
5596          $self->{column_prev} = $self->{column};
5597          $self->{column}++;
5598          $self->{nc}
5599              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5600        } else {
5601          $self->{set_nc}->($self);
5602        }
5603      
5604            redo A;
5605          }
5606        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5607          if ($self->{nc} == 0x0021) { # !
5608            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5609            
5610        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5611          $self->{line_prev} = $self->{line};
5612          $self->{column_prev} = $self->{column};
5613          $self->{column}++;
5614          $self->{nc}
5615              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5616        } else {
5617          $self->{set_nc}->($self);
5618        }
5619      
5620            redo A;
5621          } elsif ($self->{nc} == 0x003F) { # ?
5622            $self->{state} = PI_STATE;
5623            
5624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5625          $self->{line_prev} = $self->{line};
5626          $self->{column_prev} = $self->{column};
5627          $self->{column}++;
5628          $self->{nc}
5629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5630        } else {
5631          $self->{set_nc}->($self);
5632        }
5633      
5634            redo A;
5635          } elsif ($self->{nc} == -1) {
5636            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5637            $self->{state} = DATA_STATE;
5638            $self->{s_kwd} = '';
5639            ## Reconsume.
5640            redo A;
5641          } else {
5642            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5643                            line => $self->{line_prev},
5644                            column => $self->{column_prev});
5645            $self->{state} = BOGUS_COMMENT_STATE;
5646            $self->{ct} = {type => COMMENT_TOKEN,
5647                           data => '',
5648                          }; ## NOTE: Will be discarded.
5649            
5650        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5651          $self->{line_prev} = $self->{line};
5652          $self->{column_prev} = $self->{column};
5653          $self->{column}++;
5654          $self->{nc}
5655              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5656        } else {
5657          $self->{set_nc}->($self);
5658        }
5659      
5660            redo A;
5661          }
5662        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5663          ## XML5: "DOCTYPE markup declaration state".
5664          
5665          if ($self->{nc} == 0x002D) { # -
5666            $self->{state} = MD_HYPHEN_STATE;
5667            
5668        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5669          $self->{line_prev} = $self->{line};
5670          $self->{column_prev} = $self->{column};
5671          $self->{column}++;
5672          $self->{nc}
5673              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5674        } else {
5675          $self->{set_nc}->($self);
5676        }
5677      
5678            redo A;
5679          } elsif ($self->{nc} == 0x0045 or # E
5680                   $self->{nc} == 0x0065) { # e
5681            $self->{state} = MD_E_STATE;
5682            $self->{kwd} = chr $self->{nc};
5683            
5684        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5685          $self->{line_prev} = $self->{line};
5686          $self->{column_prev} = $self->{column};
5687          $self->{column}++;
5688          $self->{nc}
5689              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5690        } else {
5691          $self->{set_nc}->($self);
5692        }
5693      
5694            redo A;
5695          } elsif ($self->{nc} == 0x0041 or # A
5696                   $self->{nc} == 0x0061) { # a
5697            $self->{state} = MD_ATTLIST_STATE;
5698            $self->{kwd} = chr $self->{nc};
5699            
5700        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5701          $self->{line_prev} = $self->{line};
5702          $self->{column_prev} = $self->{column};
5703          $self->{column}++;
5704          $self->{nc}
5705              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5706        } else {
5707          $self->{set_nc}->($self);
5708        }
5709      
5710            redo A;
5711          } elsif ($self->{nc} == 0x004E or # N
5712                   $self->{nc} == 0x006E) { # n
5713            $self->{state} = MD_NOTATION_STATE;
5714            $self->{kwd} = chr $self->{nc};
5715            
5716        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5717          $self->{line_prev} = $self->{line};
5718          $self->{column_prev} = $self->{column};
5719          $self->{column}++;
5720          $self->{nc}
5721              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5722        } else {
5723          $self->{set_nc}->($self);
5724        }
5725      
5726            redo A;
5727          } else {
5728            #
5729          }
5730          
5731          ## XML5: No parse error.
5732          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5733                          line => $self->{line_prev},
5734                          column => $self->{column_prev} - 1);
5735          ## Reconsume.
5736          $self->{state} = BOGUS_COMMENT_STATE;
5737          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5738          redo A;
5739        } elsif ($self->{state} == MD_E_STATE) {
5740          if ($self->{nc} == 0x004E or # N
5741              $self->{nc} == 0x006E) { # n
5742            $self->{state} = MD_ENTITY_STATE;
5743            $self->{kwd} .= chr $self->{nc};
5744            
5745        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5746          $self->{line_prev} = $self->{line};
5747          $self->{column_prev} = $self->{column};
5748          $self->{column}++;
5749          $self->{nc}
5750              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5751        } else {
5752          $self->{set_nc}->($self);
5753        }
5754      
5755            redo A;
5756          } elsif ($self->{nc} == 0x004C or # L
5757                   $self->{nc} == 0x006C) { # l
5758            ## XML5: <!ELEMENT> not supported.
5759            $self->{state} = MD_ELEMENT_STATE;
5760            $self->{kwd} .= chr $self->{nc};
5761            
5762        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5763          $self->{line_prev} = $self->{line};
5764          $self->{column_prev} = $self->{column};
5765          $self->{column}++;
5766          $self->{nc}
5767              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5768        } else {
5769          $self->{set_nc}->($self);
5770        }
5771      
5772            redo A;
5773          } else {
5774            ## XML5: No parse error.
5775            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5776                            line => $self->{line_prev},
5777                            column => $self->{column_prev} - 2
5778                                + 1 * ($self->{nc} == -1));
5779            ## Reconsume.
5780            $self->{state} = BOGUS_COMMENT_STATE;
5781            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5782            redo A;
5783          }
5784        } elsif ($self->{state} == MD_ENTITY_STATE) {
5785          if ($self->{nc} == [
5786                undef,
5787                undef,
5788                0x0054, # T
5789                0x0049, # I
5790                0x0054, # T
5791              ]->[length $self->{kwd}] or
5792              $self->{nc} == [
5793                undef,
5794                undef,
5795                0x0074, # t
5796                0x0069, # i
5797                0x0074, # t
5798              ]->[length $self->{kwd}]) {
5799            ## Stay in the state.
5800            $self->{kwd} .= chr $self->{nc};
5801            
5802        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5803          $self->{line_prev} = $self->{line};
5804          $self->{column_prev} = $self->{column};
5805          $self->{column}++;
5806          $self->{nc}
5807              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5808        } else {
5809          $self->{set_nc}->($self);
5810        }
5811      
5812            redo A;
5813          } elsif ((length $self->{kwd}) == 5 and
5814                   ($self->{nc} == 0x0059 or # Y
5815                    $self->{nc} == 0x0079)) { # y
5816            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5817              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5818                              text => 'ENTITY',
5819                              line => $self->{line_prev},
5820                              column => $self->{column_prev} - 4);
5821            }
5822            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5823                           line => $self->{line_prev},
5824                           column => $self->{column_prev} - 6};
5825            $self->{state} = DOCTYPE_MD_STATE;
5826            
5827        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5828          $self->{line_prev} = $self->{line};
5829          $self->{column_prev} = $self->{column};
5830          $self->{column}++;
5831          $self->{nc}
5832              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5833        } else {
5834          $self->{set_nc}->($self);
5835        }
5836      
5837            redo A;
5838          } else {
5839            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5840                            line => $self->{line_prev},
5841                            column => $self->{column_prev} - 1
5842                                - (length $self->{kwd})
5843                                + 1 * ($self->{nc} == -1));
5844            $self->{state} = BOGUS_COMMENT_STATE;
5845            ## Reconsume.
5846            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5847            redo A;
5848          }
5849        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5850          if ($self->{nc} == [
5851               undef,
5852               undef,
5853               0x0045, # E
5854               0x004D, # M
5855               0x0045, # E
5856               0x004E, # N
5857              ]->[length $self->{kwd}] or
5858              $self->{nc} == [
5859               undef,
5860               undef,
5861               0x0065, # e
5862               0x006D, # m
5863               0x0065, # e
5864               0x006E, # n
5865              ]->[length $self->{kwd}]) {
5866            ## Stay in the state.
5867            $self->{kwd} .= chr $self->{nc};
5868            
5869        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5870          $self->{line_prev} = $self->{line};
5871          $self->{column_prev} = $self->{column};
5872          $self->{column}++;
5873          $self->{nc}
5874              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5875        } else {
5876          $self->{set_nc}->($self);
5877        }
5878      
5879            redo A;
5880          } elsif ((length $self->{kwd}) == 6 and
5881                   ($self->{nc} == 0x0054 or # T
5882                    $self->{nc} == 0x0074)) { # t
5883            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5884              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5885                              text => 'ELEMENT',
5886                              line => $self->{line_prev},
5887                              column => $self->{column_prev} - 5);
5888            }
5889            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5890                           line => $self->{line_prev},
5891                           column => $self->{column_prev} - 6};
5892            $self->{state} = DOCTYPE_MD_STATE;
5893            
5894        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5895          $self->{line_prev} = $self->{line};
5896          $self->{column_prev} = $self->{column};
5897          $self->{column}++;
5898          $self->{nc}
5899              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5900        } else {
5901          $self->{set_nc}->($self);
5902        }
5903      
5904            redo A;
5905          } else {
5906            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5907                            line => $self->{line_prev},
5908                            column => $self->{column_prev} - 1
5909                                - (length $self->{kwd})
5910                                + 1 * ($self->{nc} == -1));
5911            $self->{state} = BOGUS_COMMENT_STATE;
5912            ## Reconsume.
5913            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5914            redo A;
5915          }
5916        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5917          if ($self->{nc} == [
5918               undef,
5919               0x0054, # T
5920               0x0054, # T
5921               0x004C, # L
5922               0x0049, # I
5923               0x0053, # S
5924              ]->[length $self->{kwd}] or
5925              $self->{nc} == [
5926               undef,
5927               0x0074, # t
5928               0x0074, # t
5929               0x006C, # l
5930               0x0069, # i
5931               0x0073, # s
5932              ]->[length $self->{kwd}]) {
5933            ## Stay in the state.
5934            $self->{kwd} .= chr $self->{nc};
5935            
5936        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5937          $self->{line_prev} = $self->{line};
5938          $self->{column_prev} = $self->{column};
5939          $self->{column}++;
5940          $self->{nc}
5941              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5942        } else {
5943          $self->{set_nc}->($self);
5944        }
5945      
5946            redo A;
5947          } elsif ((length $self->{kwd}) == 6 and
5948                   ($self->{nc} == 0x0054 or # T
5949                    $self->{nc} == 0x0074)) { # t
5950            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
5951              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5952                              text => 'ATTLIST',
5953                              line => $self->{line_prev},
5954                              column => $self->{column_prev} - 5);
5955            }
5956            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5957                           attrdefs => [],
5958                           line => $self->{line_prev},
5959                           column => $self->{column_prev} - 6};
5960            $self->{state} = DOCTYPE_MD_STATE;
5961            
5962        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5963          $self->{line_prev} = $self->{line};
5964          $self->{column_prev} = $self->{column};
5965          $self->{column}++;
5966          $self->{nc}
5967              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5968        } else {
5969          $self->{set_nc}->($self);
5970        }
5971      
5972            redo A;
5973          } else {
5974            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5975                            line => $self->{line_prev},
5976                            column => $self->{column_prev} - 1
5977                                 - (length $self->{kwd})
5978                                 + 1 * ($self->{nc} == -1));
5979            $self->{state} = BOGUS_COMMENT_STATE;
5980            ## Reconsume.
5981            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5982            redo A;
5983          }
5984        } elsif ($self->{state} == MD_NOTATION_STATE) {
5985          if ($self->{nc} == [
5986               undef,
5987               0x004F, # O
5988               0x0054, # T
5989               0x0041, # A
5990               0x0054, # T
5991               0x0049, # I
5992               0x004F, # O
5993              ]->[length $self->{kwd}] or
5994              $self->{nc} == [
5995               undef,
5996               0x006F, # o
5997               0x0074, # t
5998               0x0061, # a
5999               0x0074, # t
6000               0x0069, # i
6001               0x006F, # o
6002              ]->[length $self->{kwd}]) {
6003            ## Stay in the state.
6004            $self->{kwd} .= chr $self->{nc};
6005            
6006        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6007          $self->{line_prev} = $self->{line};
6008          $self->{column_prev} = $self->{column};
6009          $self->{column}++;
6010          $self->{nc}
6011              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6012        } else {
6013          $self->{set_nc}->($self);
6014        }
6015      
6016            redo A;
6017          } elsif ((length $self->{kwd}) == 7 and
6018                   ($self->{nc} == 0x004E or # N
6019                    $self->{nc} == 0x006E)) { # n
6020            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6021              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6022                              text => 'NOTATION',
6023                              line => $self->{line_prev},
6024                              column => $self->{column_prev} - 6);
6025            }
6026            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6027                           line => $self->{line_prev},
6028                           column => $self->{column_prev} - 6};
6029            $self->{state} = DOCTYPE_MD_STATE;
6030            
6031        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6032          $self->{line_prev} = $self->{line};
6033          $self->{column_prev} = $self->{column};
6034          $self->{column}++;
6035          $self->{nc}
6036              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6037        } else {
6038          $self->{set_nc}->($self);
6039        }
6040      
6041            redo A;
6042          } else {
6043            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6044                            line => $self->{line_prev},
6045                            column => $self->{column_prev} - 1
6046                                - (length $self->{kwd})
6047                                + 1 * ($self->{nc} == -1));
6048            $self->{state} = BOGUS_COMMENT_STATE;
6049            ## Reconsume.
6050            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6051            redo A;
6052          }
6053        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6054          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6055          ## "DOCTYPE NOTATION state".
6056    
6057          if ($is_space->{$self->{nc}}) {
6058            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6059            $self->{state} = BEFORE_MD_NAME_STATE;
6060            
6061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6062          $self->{line_prev} = $self->{line};
6063          $self->{column_prev} = $self->{column};
6064          $self->{column}++;
6065          $self->{nc}
6066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6067        } else {
6068          $self->{set_nc}->($self);
6069        }
6070      
6071            redo A;
6072          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6073                   $self->{nc} == 0x0025) { # %
6074            ## XML5: Switch to the "DOCTYPE bogus comment state".
6075            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6076            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6077            
6078        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6079          $self->{line_prev} = $self->{line};
6080          $self->{column_prev} = $self->{column};
6081          $self->{column}++;
6082          $self->{nc}
6083              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6084        } else {
6085          $self->{set_nc}->($self);
6086        }
6087      
6088            redo A;
6089          } elsif ($self->{nc} == -1) {
6090            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6091            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6092            ## Reconsume.
6093            redo A;
6094          } elsif ($self->{nc} == 0x003E) { # >
6095            ## XML5: Switch to the "DOCTYPE bogus comment state".
6096            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6097            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6098            
6099        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6100          $self->{line_prev} = $self->{line};
6101          $self->{column_prev} = $self->{column};
6102          $self->{column}++;
6103          $self->{nc}
6104              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6105        } else {
6106          $self->{set_nc}->($self);
6107        }
6108      
6109            redo A;
6110          } else {
6111            ## XML5: Switch to the "DOCTYPE bogus comment state".
6112            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6113            $self->{state} = BEFORE_MD_NAME_STATE;
6114            redo A;
6115          }
6116        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6117          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6118          ## before state", "DOCTYPE ATTLIST name before state".
6119    
6120          if ($is_space->{$self->{nc}}) {
6121            ## Stay in the state.
6122            
6123        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6124          $self->{line_prev} = $self->{line};
6125          $self->{column_prev} = $self->{column};
6126          $self->{column}++;
6127          $self->{nc}
6128              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6129        } else {
6130          $self->{set_nc}->($self);
6131        }
6132      
6133            redo A;
6134          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6135                   $self->{nc} == 0x0025) { # %
6136            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6137            
6138        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6139          $self->{line_prev} = $self->{line};
6140          $self->{column_prev} = $self->{column};
6141          $self->{column}++;
6142          $self->{nc}
6143              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6144        } else {
6145          $self->{set_nc}->($self);
6146        }
6147      
6148            redo A;
6149          } elsif ($self->{nc} == 0x003E) { # >
6150            ## XML5: Same as "Anything else".
6151            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6152            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6153            
6154        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6155          $self->{line_prev} = $self->{line};
6156          $self->{column_prev} = $self->{column};
6157          $self->{column}++;
6158          $self->{nc}
6159              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6160        } else {
6161          $self->{set_nc}->($self);
6162        }
6163      
6164            redo A;
6165          } elsif ($self->{nc} == -1) {
6166            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6167            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6168            ## Reconsume.
6169            redo A;
6170          } else {
6171            ## XML5: [ATTLIST] Not defined yet.
6172            $self->{ct}->{name} .= chr $self->{nc};
6173            $self->{state} = MD_NAME_STATE;
6174            
6175        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6176          $self->{line_prev} = $self->{line};
6177          $self->{column_prev} = $self->{column};
6178          $self->{column}++;
6179          $self->{nc}
6180              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6181        } else {
6182          $self->{set_nc}->($self);
6183        }
6184      
6185            redo A;
6186          }
6187        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6188          if ($is_space->{$self->{nc}}) {
6189            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6190            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6191            $self->{state} = BEFORE_MD_NAME_STATE;
6192            
6193        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6194          $self->{line_prev} = $self->{line};
6195          $self->{column_prev} = $self->{column};
6196          $self->{column}++;
6197          $self->{nc}
6198              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6199        } else {
6200          $self->{set_nc}->($self);
6201        }
6202      
6203            redo A;
6204          } elsif ($self->{nc} == 0x003E) { # >
6205            ## XML5: Same as "Anything else".
6206            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6207            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6208            
6209        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6210          $self->{line_prev} = $self->{line};
6211          $self->{column_prev} = $self->{column};
6212          $self->{column}++;
6213          $self->{nc}
6214              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6215        } else {
6216          $self->{set_nc}->($self);
6217        }
6218      
6219            redo A;
6220          } elsif ($self->{nc} == -1) {
6221            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6222            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6223            ## Reconsume.
6224            redo A;
6225          } else {
6226            ## XML5: No parse error.
6227            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6228            $self->{state} = BOGUS_COMMENT_STATE;
6229            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6230            ## Reconsume.
6231            redo A;
6232          }
6233        } elsif ($self->{state} == MD_NAME_STATE) {
6234          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6235          
6236          if ($is_space->{$self->{nc}}) {
6237            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6238              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6239            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6240              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6241            } else { # ENTITY/NOTATION
6242              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6243            }
6244            
6245        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6246          $self->{line_prev} = $self->{line};
6247          $self->{column_prev} = $self->{column};
6248          $self->{column}++;
6249          $self->{nc}
6250              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6251        } else {
6252          $self->{set_nc}->($self);
6253        }
6254      
6255            redo A;
6256          } elsif ($self->{nc} == 0x003E) { # >
6257            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6258              #
6259            } else {
6260              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6261            }
6262            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6263            
6264        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6265          $self->{line_prev} = $self->{line};
6266          $self->{column_prev} = $self->{column};
6267          $self->{column}++;
6268          $self->{nc}
6269              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6270        } else {
6271          $self->{set_nc}->($self);
6272        }
6273      
6274            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6275            redo A;
6276          } elsif ($self->{nc} == -1) {
6277            ## XML5: [ATTLIST] No parse error.
6278            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6279            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6280            ## Reconsume.
6281            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6282            redo A;
6283          } else {
6284            ## XML5: [ATTLIST] Not defined yet.
6285            $self->{ct}->{name} .= chr $self->{nc};
6286            ## Stay in the state.
6287            
6288        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6289          $self->{line_prev} = $self->{line};
6290          $self->{column_prev} = $self->{column};
6291          $self->{column}++;
6292          $self->{nc}
6293              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6294        } else {
6295          $self->{set_nc}->($self);
6296        }
6297      
6298            redo A;
6299          }
6300        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6301          if ($is_space->{$self->{nc}}) {
6302            ## Stay in the state.
6303            
6304        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6305          $self->{line_prev} = $self->{line};
6306          $self->{column_prev} = $self->{column};
6307          $self->{column}++;
6308          $self->{nc}
6309              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6310        } else {
6311          $self->{set_nc}->($self);
6312        }
6313      
6314            redo A;
6315          } elsif ($self->{nc} == 0x003E) { # >
6316            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6317            
6318        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6319          $self->{line_prev} = $self->{line};
6320          $self->{column_prev} = $self->{column};
6321          $self->{column}++;
6322          $self->{nc}
6323              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6324        } else {
6325          $self->{set_nc}->($self);
6326        }
6327      
6328            return  ($self->{ct}); # ATTLIST
6329            redo A;
6330          } elsif ($self->{nc} == -1) {
6331            ## XML5: No parse error.
6332            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6333            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6334            return  ($self->{ct});
6335            redo A;
6336          } else {
6337            ## XML5: Not defined yet.
6338            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6339                           tokens => [],
6340                           line => $self->{line}, column => $self->{column}};
6341            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6342            
6343        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6344          $self->{line_prev} = $self->{line};
6345          $self->{column_prev} = $self->{column};
6346          $self->{column}++;
6347          $self->{nc}
6348              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6349        } else {
6350          $self->{set_nc}->($self);
6351        }
6352      
6353            redo A;
6354          }
6355        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6356          if ($is_space->{$self->{nc}}) {
6357            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6358            
6359        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6360          $self->{line_prev} = $self->{line};
6361          $self->{column_prev} = $self->{column};
6362          $self->{column}++;
6363          $self->{nc}
6364              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6365        } else {
6366          $self->{set_nc}->($self);
6367        }
6368      
6369            redo A;
6370          } elsif ($self->{nc} == 0x003E) { # >
6371            ## XML5: Same as "anything else".
6372            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6373            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6374            
6375        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6376          $self->{line_prev} = $self->{line};
6377          $self->{column_prev} = $self->{column};
6378          $self->{column}++;
6379          $self->{nc}
6380              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6381        } else {
6382          $self->{set_nc}->($self);
6383        }
6384      
6385            return  ($self->{ct}); # ATTLIST
6386            redo A;
6387          } elsif ($self->{nc} == 0x0028) { # (
6388            ## XML5: Same as "anything else".
6389            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6390            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6391            
6392        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6393          $self->{line_prev} = $self->{line};
6394          $self->{column_prev} = $self->{column};
6395          $self->{column}++;
6396          $self->{nc}
6397              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6398        } else {
6399          $self->{set_nc}->($self);
6400        }
6401      
6402            redo A;
6403          } elsif ($self->{nc} == -1) {
6404            ## XML5: No parse error.
6405            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6406            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6407            
6408        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6409          $self->{line_prev} = $self->{line};
6410          $self->{column_prev} = $self->{column};
6411          $self->{column}++;
6412          $self->{nc}
6413              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6414        } else {
6415          $self->{set_nc}->($self);
6416        }
6417      
6418            return  ($self->{ct}); # ATTLIST
6419            redo A;
6420          } else {
6421            ## XML5: Not defined yet.
6422            $self->{ca}->{name} .= chr $self->{nc};
6423            ## Stay in the state.
6424            
6425        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6426          $self->{line_prev} = $self->{line};
6427          $self->{column_prev} = $self->{column};
6428          $self->{column}++;
6429          $self->{nc}
6430              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6431        } else {
6432          $self->{set_nc}->($self);
6433        }
6434      
6435            redo A;
6436          }
6437        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6438          if ($is_space->{$self->{nc}}) {
6439            ## Stay in the state.
6440            
6441        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6442          $self->{line_prev} = $self->{line};
6443          $self->{column_prev} = $self->{column};
6444          $self->{column}++;
6445          $self->{nc}
6446              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6447        } else {
6448          $self->{set_nc}->($self);
6449        }
6450      
6451            redo A;
6452          } elsif ($self->{nc} == 0x003E) { # >
6453            ## XML5: Same as "anything else".
6454            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6455            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6456            
6457        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6458          $self->{line_prev} = $self->{line};
6459          $self->{column_prev} = $self->{column};
6460          $self->{column}++;
6461          $self->{nc}
6462              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6463        } else {
6464          $self->{set_nc}->($self);
6465        }
6466      
6467            return  ($self->{ct}); # ATTLIST
6468            redo A;
6469          } elsif ($self->{nc} == 0x0028) { # (
6470            ## XML5: Same as "anything else".
6471            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6472            
6473        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6474          $self->{line_prev} = $self->{line};
6475          $self->{column_prev} = $self->{column};
6476          $self->{column}++;
6477          $self->{nc}
6478              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6479        } else {
6480          $self->{set_nc}->($self);
6481        }
6482      
6483            redo A;
6484          } elsif ($self->{nc} == -1) {
6485            ## XML5: No parse error.
6486            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6487            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6488            
6489        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6490          $self->{line_prev} = $self->{line};
6491          $self->{column_prev} = $self->{column};
6492          $self->{column}++;
6493          $self->{nc}
6494              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6495        } else {
6496          $self->{set_nc}->($self);
6497        }
6498      
6499            return  ($self->{ct});
6500            redo A;
6501          } else {
6502            ## XML5: Not defined yet.
6503            $self->{ca}->{type} = chr $self->{nc};
6504            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6505            
6506        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6507          $self->{line_prev} = $self->{line};
6508          $self->{column_prev} = $self->{column};
6509          $self->{column}++;
6510          $self->{nc}
6511              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6512        } else {
6513          $self->{set_nc}->($self);
6514        }
6515      
6516            redo A;
6517          }
6518        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6519          if ($is_space->{$self->{nc}}) {
6520            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6521            
6522        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6523          $self->{line_prev} = $self->{line};
6524          $self->{column_prev} = $self->{column};
6525          $self->{column}++;
6526          $self->{nc}
6527              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6528        } else {
6529          $self->{set_nc}->($self);
6530        }
6531      
6532            redo A;
6533          } elsif ($self->{nc} == 0x0023) { # #
6534            ## XML5: Same as "anything else".
6535            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6536            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6537            
6538        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6539          $self->{line_prev} = $self->{line};
6540          $self->{column_prev} = $self->{column};
6541          $self->{column}++;
6542          $self->{nc}
6543              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6544        } else {
6545          $self->{set_nc}->($self);
6546        }
6547      
6548            redo A;
6549          } elsif ($self->{nc} == 0x0022) { # "
6550            ## XML5: Same as "anything else".
6551            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6552            $self->{ca}->{value} = '';
6553            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6554            
6555        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6556          $self->{line_prev} = $self->{line};
6557          $self->{column_prev} = $self->{column};
6558          $self->{column}++;
6559          $self->{nc}
6560              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6561        } else {
6562          $self->{set_nc}->($self);
6563        }
6564      
6565            redo A;
6566          } elsif ($self->{nc} == 0x0027) { # '
6567            ## XML5: Same as "anything else".
6568            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6569            $self->{ca}->{value} = '';
6570            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6571            
6572        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6573          $self->{line_prev} = $self->{line};
6574          $self->{column_prev} = $self->{column};
6575          $self->{column}++;
6576          $self->{nc}
6577              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6578        } else {
6579          $self->{set_nc}->($self);
6580        }
6581      
6582            redo A;
6583          } elsif ($self->{nc} == 0x003E) { # >
6584            ## XML5: Same as "anything else".
6585            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6586            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6587            
6588        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6589          $self->{line_prev} = $self->{line};
6590          $self->{column_prev} = $self->{column};
6591          $self->{column}++;
6592          $self->{nc}
6593              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6594        } else {
6595          $self->{set_nc}->($self);
6596        }
6597      
6598            return  ($self->{ct}); # ATTLIST
6599            redo A;
6600          } elsif ($self->{nc} == 0x0028) { # (
6601            ## XML5: Same as "anything else".
6602            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6603            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6604            
6605        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6606          $self->{line_prev} = $self->{line};
6607          $self->{column_prev} = $self->{column};
6608          $self->{column}++;
6609          $self->{nc}
6610              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6611        } else {
6612          $self->{set_nc}->($self);
6613        }
6614      
6615            redo A;
6616          } elsif ($self->{nc} == -1) {
6617            ## XML5: No parse error.
6618            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6619            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6620            
6621        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6622          $self->{line_prev} = $self->{line};
6623          $self->{column_prev} = $self->{column};
6624          $self->{column}++;
6625          $self->{nc}
6626              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6627        } else {
6628          $self->{set_nc}->($self);
6629        }
6630      
6631            return  ($self->{ct});
6632            redo A;
6633          } else {
6634            ## XML5: Not defined yet.
6635            $self->{ca}->{type} .= chr $self->{nc};
6636            ## Stay in the state.
6637            
6638        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6639          $self->{line_prev} = $self->{line};
6640          $self->{column_prev} = $self->{column};
6641          $self->{column}++;
6642          $self->{nc}
6643              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6644        } else {
6645          $self->{set_nc}->($self);
6646        }
6647      
6648            redo A;
6649          }
6650        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6651          if ($is_space->{$self->{nc}}) {
6652            ## Stay in the state.
6653            
6654        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6655          $self->{line_prev} = $self->{line};
6656          $self->{column_prev} = $self->{column};
6657          $self->{column}++;
6658          $self->{nc}
6659              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6660        } else {
6661          $self->{set_nc}->($self);
6662        }
6663      
6664            redo A;
6665          } elsif ($self->{nc} == 0x0028) { # (
6666            ## XML5: Same as "anything else".
6667            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6668            
6669        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6670          $self->{line_prev} = $self->{line};
6671          $self->{column_prev} = $self->{column};
6672          $self->{column}++;
6673          $self->{nc}
6674              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6675        } else {
6676          $self->{set_nc}->($self);
6677        }
6678      
6679            redo A;
6680          } elsif ($self->{nc} == 0x0023) { # #
6681            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6682            
6683        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6684          $self->{line_prev} = $self->{line};
6685          $self->{column_prev} = $self->{column};
6686          $self->{column}++;
6687          $self->{nc}
6688              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6689        } else {
6690          $self->{set_nc}->($self);
6691        }
6692      
6693            redo A;
6694          } elsif ($self->{nc} == 0x0022) { # "
6695            ## XML5: Same as "anything else".
6696            $self->{ca}->{value} = '';
6697            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6698            
6699        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6700          $self->{line_prev} = $self->{line};
6701          $self->{column_prev} = $self->{column};
6702          $self->{column}++;
6703          $self->{nc}
6704              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6705        } else {
6706          $self->{set_nc}->($self);
6707        }
6708      
6709            redo A;
6710          } elsif ($self->{nc} == 0x0027) { # '
6711            ## XML5: Same as "anything else".
6712            $self->{ca}->{value} = '';
6713            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6714            
6715        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6716          $self->{line_prev} = $self->{line};
6717          $self->{column_prev} = $self->{column};
6718          $self->{column}++;
6719          $self->{nc}
6720              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6721        } else {
6722          $self->{set_nc}->($self);
6723        }
6724      
6725            redo A;
6726          } elsif ($self->{nc} == 0x003E) { # >
6727            ## XML5: Same as "anything else".
6728            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6729            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6730            
6731        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6732          $self->{line_prev} = $self->{line};
6733          $self->{column_prev} = $self->{column};
6734          $self->{column}++;
6735          $self->{nc}
6736              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6737        } else {
6738          $self->{set_nc}->($self);
6739        }
6740      
6741            return  ($self->{ct}); # ATTLIST
6742            redo A;
6743          } elsif ($self->{nc} == -1) {
6744            ## XML5: No parse error.
6745            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6746            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6747            
6748        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6749          $self->{line_prev} = $self->{line};
6750          $self->{column_prev} = $self->{column};
6751          $self->{column}++;
6752          $self->{nc}
6753              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6754        } else {
6755          $self->{set_nc}->($self);
6756        }
6757      
6758            return  ($self->{ct});
6759            redo A;
6760          } else {
6761            ## XML5: Switch to the "DOCTYPE bogus comment state".
6762            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6763            $self->{ca}->{value} = '';
6764            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6765            ## Reconsume.
6766            redo A;
6767          }
6768        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6769          if ($is_space->{$self->{nc}}) {
6770            ## Stay in the state.
6771            
6772        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6773          $self->{line_prev} = $self->{line};
6774          $self->{column_prev} = $self->{column};
6775          $self->{column}++;
6776          $self->{nc}
6777              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6778        } else {
6779          $self->{set_nc}->($self);
6780        }
6781      
6782            redo A;
6783          } elsif ($self->{nc} == 0x007C) { # |
6784            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6785            ## Stay in the state.
6786            
6787        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6788          $self->{line_prev} = $self->{line};
6789          $self->{column_prev} = $self->{column};
6790          $self->{column}++;
6791          $self->{nc}
6792              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6793        } else {
6794          $self->{set_nc}->($self);
6795        }
6796      
6797            redo A;
6798          } elsif ($self->{nc} == 0x0029) { # )
6799            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6800            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6801            
6802        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6803          $self->{line_prev} = $self->{line};
6804          $self->{column_prev} = $self->{column};
6805          $self->{column}++;
6806          $self->{nc}
6807              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6808        } else {
6809          $self->{set_nc}->($self);
6810        }
6811      
6812            redo A;
6813          } elsif ($self->{nc} == 0x003E) { # >
6814            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6815            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6816            
6817        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6818          $self->{line_prev} = $self->{line};
6819          $self->{column_prev} = $self->{column};
6820          $self->{column}++;
6821          $self->{nc}
6822              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6823        } else {
6824          $self->{set_nc}->($self);
6825        }
6826      
6827            return  ($self->{ct}); # ATTLIST
6828            redo A;
6829          } elsif ($self->{nc} == -1) {
6830            ## XML5: No parse error.
6831            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6832            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6833            
6834        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6835          $self->{line_prev} = $self->{line};
6836          $self->{column_prev} = $self->{column};
6837          $self->{column}++;
6838          $self->{nc}
6839              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6840        } else {
6841          $self->{set_nc}->($self);
6842        }
6843      
6844            return  ($self->{ct});
6845            redo A;
6846          } else {
6847            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6848            $self->{state} = ALLOWED_TOKEN_STATE;
6849            
6850        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6851          $self->{line_prev} = $self->{line};
6852          $self->{column_prev} = $self->{column};
6853          $self->{column}++;
6854          $self->{nc}
6855              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6856        } else {
6857          $self->{set_nc}->($self);
6858        }
6859      
6860            redo A;
6861          }
6862        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6863          if ($is_space->{$self->{nc}}) {
6864            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6865            
6866        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6867          $self->{line_prev} = $self->{line};
6868          $self->{column_prev} = $self->{column};
6869          $self->{column}++;
6870          $self->{nc}
6871              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6872        } else {
6873          $self->{set_nc}->($self);
6874        }
6875      
6876            redo A;
6877          } elsif ($self->{nc} == 0x007C) { # |
6878            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6879            
6880        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6881          $self->{line_prev} = $self->{line};
6882          $self->{column_prev} = $self->{column};
6883          $self->{column}++;
6884          $self->{nc}
6885              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6886        } else {
6887          $self->{set_nc}->($self);
6888        }
6889      
6890            redo A;
6891          } elsif ($self->{nc} == 0x0029) { # )
6892            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6893            
6894        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6895          $self->{line_prev} = $self->{line};
6896          $self->{column_prev} = $self->{column};
6897          $self->{column}++;
6898          $self->{nc}
6899              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6900        } else {
6901          $self->{set_nc}->($self);
6902        }
6903      
6904            redo A;
6905          } elsif ($self->{nc} == 0x003E) { # >
6906            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6907            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6908            
6909        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6910          $self->{line_prev} = $self->{line};
6911          $self->{column_prev} = $self->{column};
6912          $self->{column}++;
6913          $self->{nc}
6914              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6915        } else {
6916          $self->{set_nc}->($self);
6917        }
6918      
6919            return  ($self->{ct}); # ATTLIST
6920            redo A;
6921          } elsif ($self->{nc} == -1) {
6922            ## XML5: No parse error.
6923            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6924            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6925            
6926        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6927          $self->{line_prev} = $self->{line};
6928          $self->{column_prev} = $self->{column};
6929          $self->{column}++;
6930          $self->{nc}
6931              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6932        } else {
6933          $self->{set_nc}->($self);
6934        }
6935      
6936            return  ($self->{ct});
6937            redo A;
6938          } else {
6939            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6940            ## Stay in the state.
6941            
6942        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6943          $self->{line_prev} = $self->{line};
6944          $self->{column_prev} = $self->{column};
6945          $self->{column}++;
6946          $self->{nc}
6947              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6948        } else {
6949          $self->{set_nc}->($self);
6950        }
6951      
6952            redo A;
6953          }
6954        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6955          if ($is_space->{$self->{nc}}) {
6956            ## Stay in the state.
6957            
6958        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6959          $self->{line_prev} = $self->{line};
6960          $self->{column_prev} = $self->{column};
6961          $self->{column}++;
6962          $self->{nc}
6963              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6964        } else {
6965          $self->{set_nc}->($self);
6966        }
6967      
6968            redo A;
6969          } elsif ($self->{nc} == 0x007C) { # |
6970            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6971            
6972        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6973          $self->{line_prev} = $self->{line};
6974          $self->{column_prev} = $self->{column};
6975          $self->{column}++;
6976          $self->{nc}
6977              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6978        } else {
6979          $self->{set_nc}->($self);
6980        }
6981      
6982            redo A;
6983          } elsif ($self->{nc} == 0x0029) { # )
6984            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6985            
6986        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6987          $self->{line_prev} = $self->{line};
6988          $self->{column_prev} = $self->{column};
6989          $self->{column}++;
6990          $self->{nc}
6991              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6992        } else {
6993          $self->{set_nc}->($self);
6994        }
6995      
6996            redo A;
6997          } elsif ($self->{nc} == 0x003E) { # >
6998            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6999            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7000            
7001        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7002          $self->{line_prev} = $self->{line};
7003          $self->{column_prev} = $self->{column};
7004          $self->{column}++;
7005          $self->{nc}
7006              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7007        } else {
7008          $self->{set_nc}->($self);
7009        }
7010      
7011            return  ($self->{ct}); # ATTLIST
7012            redo A;
7013          } elsif ($self->{nc} == -1) {
7014            ## XML5: No parse error.
7015            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7016            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7017            
7018        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7019          $self->{line_prev} = $self->{line};
7020          $self->{column_prev} = $self->{column};
7021          $self->{column}++;
7022          $self->{nc}
7023              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7024        } else {
7025          $self->{set_nc}->($self);
7026        }
7027      
7028            return  ($self->{ct});
7029            redo A;
7030          } else {
7031            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7032                            line => $self->{line_prev},
7033                            column => $self->{column_prev});
7034            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7035            $self->{state} = ALLOWED_TOKEN_STATE;
7036            
7037        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7038          $self->{line_prev} = $self->{line};
7039          $self->{column_prev} = $self->{column};
7040          $self->{column}++;
7041          $self->{nc}
7042              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7043        } else {
7044          $self->{set_nc}->($self);
7045        }
7046      
7047            redo A;
7048          }
7049        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7050          if ($is_space->{$self->{nc}}) {
7051            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7052            
7053        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7054          $self->{line_prev} = $self->{line};
7055          $self->{column_prev} = $self->{column};
7056          $self->{column}++;
7057          $self->{nc}
7058              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7059        } else {
7060          $self->{set_nc}->($self);
7061        }
7062      
7063            redo A;
7064          } elsif ($self->{nc} == 0x0023) { # #
7065            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7066            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7067            
7068        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7069          $self->{line_prev} = $self->{line};
7070          $self->{column_prev} = $self->{column};
7071          $self->{column}++;
7072          $self->{nc}
7073              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7074        } else {
7075          $self->{set_nc}->($self);
7076        }
7077      
7078            redo A;
7079          } elsif ($self->{nc} == 0x0022) { # "
7080            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7081            $self->{ca}->{value} = '';
7082            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7083            
7084        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7085          $self->{line_prev} = $self->{line};
7086          $self->{column_prev} = $self->{column};
7087          $self->{column}++;
7088          $self->{nc}
7089              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7090        } else {
7091          $self->{set_nc}->($self);
7092        }
7093      
7094            redo A;
7095          } elsif ($self->{nc} == 0x0027) { # '
7096            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7097            $self->{ca}->{value} = '';
7098            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7099            
7100        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7101          $self->{line_prev} = $self->{line};
7102          $self->{column_prev} = $self->{column};
7103          $self->{column}++;
7104          $self->{nc}
7105              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7106        } else {
7107          $self->{set_nc}->($self);
7108        }
7109      
7110            redo A;
7111          } elsif ($self->{nc} == 0x003E) { # >
7112            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7113            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7114            
7115        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7116          $self->{line_prev} = $self->{line};
7117          $self->{column_prev} = $self->{column};
7118          $self->{column}++;
7119          $self->{nc}
7120              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7121        } else {
7122          $self->{set_nc}->($self);
7123        }
7124      
7125            return  ($self->{ct}); # ATTLIST
7126            redo A;
7127          } elsif ($self->{nc} == -1) {
7128            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7129            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7130            
7131        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7132          $self->{line_prev} = $self->{line};
7133          $self->{column_prev} = $self->{column};
7134          $self->{column}++;
7135          $self->{nc}
7136              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7137        } else {
7138          $self->{set_nc}->($self);
7139        }
7140      
7141            return  ($self->{ct});
7142            redo A;
7143          } else {
7144            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7145            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7146            ## Reconsume.
7147            redo A;
7148          }
7149        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7150          if ($is_space->{$self->{nc}}) {
7151            ## Stay in the state.
7152            
7153        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7154          $self->{line_prev} = $self->{line};
7155          $self->{column_prev} = $self->{column};
7156          $self->{column}++;
7157          $self->{nc}
7158              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7159        } else {
7160          $self->{set_nc}->($self);
7161        }
7162      
7163            redo A;
7164          } elsif ($self->{nc} == 0x0023) { # #
7165            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7166            
7167        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7168          $self->{line_prev} = $self->{line};
7169          $self->{column_prev} = $self->{column};
7170          $self->{column}++;
7171          $self->{nc}
7172              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7173        } else {
7174          $self->{set_nc}->($self);
7175        }
7176      
7177            redo A;
7178          } elsif ($self->{nc} == 0x0022) { # "
7179            $self->{ca}->{value} = '';
7180            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7181            
7182        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7183          $self->{line_prev} = $self->{line};
7184          $self->{column_prev} = $self->{column};
7185          $self->{column}++;
7186          $self->{nc}
7187              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7188        } else {
7189          $self->{set_nc}->($self);
7190        }
7191      
7192            redo A;
7193          } elsif ($self->{nc} == 0x0027) { # '
7194            $self->{ca}->{value} = '';
7195            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7196            
7197        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7198          $self->{line_prev} = $self->{line};
7199          $self->{column_prev} = $self->{column};
7200          $self->{column}++;
7201          $self->{nc}
7202              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7203        } else {
7204          $self->{set_nc}->($self);
7205        }
7206      
7207            redo A;
7208          } elsif ($self->{nc} == 0x003E) { # >
7209            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7210            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7211            
7212        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7213          $self->{line_prev} = $self->{line};
7214          $self->{column_prev} = $self->{column};
7215          $self->{column}++;
7216          $self->{nc}
7217              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7218        } else {
7219          $self->{set_nc}->($self);
7220        }
7221      
7222            return  ($self->{ct}); # ATTLIST
7223            redo A;
7224          } elsif ($self->{nc} == -1) {
7225            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7226            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7227            
7228        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7229          $self->{line_prev} = $self->{line};
7230          $self->{column_prev} = $self->{column};
7231          $self->{column}++;
7232          $self->{nc}
7233              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7234        } else {
7235          $self->{set_nc}->($self);
7236        }
7237      
7238            return  ($self->{ct});
7239            redo A;
7240          } else {
7241            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7242            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7243            ## Reconsume.
7244            redo A;
7245          }
7246        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7247          if ($is_space->{$self->{nc}}) {
7248            ## XML5: No parse error.
7249            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7250            $self->{state} = BOGUS_MD_STATE;
7251            ## Reconsume.
7252            redo A;
7253          } elsif ($self->{nc} == 0x0022) { # "
7254            ## XML5: Same as "anything else".
7255            $self->{ca}->{value} = '';
7256            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7257            
7258        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7259          $self->{line_prev} = $self->{line};
7260          $self->{column_prev} = $self->{column};
7261          $self->{column}++;
7262          $self->{nc}
7263              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7264        } else {
7265          $self->{set_nc}->($self);
7266        }
7267      
7268            redo A;
7269          } elsif ($self->{nc} == 0x0027) { # '
7270            ## XML5: Same as "anything else".
7271            $self->{ca}->{value} = '';
7272            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7273            
7274        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7275          $self->{line_prev} = $self->{line};
7276          $self->{column_prev} = $self->{column};
7277          $self->{column}++;
7278          $self->{nc}
7279              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7280        } else {
7281          $self->{set_nc}->($self);
7282        }
7283      
7284            redo A;
7285          } elsif ($self->{nc} == 0x003E) { # >
7286            ## XML5: Same as "anything else".
7287            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7288            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7289            
7290        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7291          $self->{line_prev} = $self->{line};
7292          $self->{column_prev} = $self->{column};
7293          $self->{column}++;
7294          $self->{nc}
7295              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7296        } else {
7297          $self->{set_nc}->($self);
7298        }
7299      
7300            return  ($self->{ct}); # ATTLIST
7301            redo A;
7302          } elsif ($self->{nc} == -1) {
7303            ## XML5: No parse error.
7304            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7305            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7306            
7307        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7308          $self->{line_prev} = $self->{line};
7309          $self->{column_prev} = $self->{column};
7310          $self->{column}++;
7311          $self->{nc}
7312              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7313        } else {
7314          $self->{set_nc}->($self);
7315        }
7316      
7317            return  ($self->{ct});
7318            redo A;
7319          } else {
7320            $self->{ca}->{default} = chr $self->{nc};
7321            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7322            
7323        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7324          $self->{line_prev} = $self->{line};
7325          $self->{column_prev} = $self->{column};
7326          $self->{column}++;
7327          $self->{nc}
7328              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7329        } else {
7330          $self->{set_nc}->($self);
7331        }
7332      
7333            redo A;
7334          }
7335        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7336          if ($is_space->{$self->{nc}}) {
7337            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7338            
7339        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7340          $self->{line_prev} = $self->{line};
7341          $self->{column_prev} = $self->{column};
7342          $self->{column}++;
7343          $self->{nc}
7344              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7345        } else {
7346          $self->{set_nc}->($self);
7347        }
7348      
7349            redo A;
7350          } elsif ($self->{nc} == 0x0022) { # "
7351            ## XML5: Same as "anything else".
7352            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7353            $self->{ca}->{value} = '';
7354            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7355            
7356        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7357          $self->{line_prev} = $self->{line};
7358          $self->{column_prev} = $self->{column};
7359          $self->{column}++;
7360          $self->{nc}
7361              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7362        } else {
7363          $self->{set_nc}->($self);
7364        }
7365      
7366            redo A;
7367          } elsif ($self->{nc} == 0x0027) { # '
7368            ## XML5: Same as "anything else".
7369            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7370            $self->{ca}->{value} = '';
7371            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7372            
7373        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7374          $self->{line_prev} = $self->{line};
7375          $self->{column_prev} = $self->{column};
7376          $self->{column}++;
7377          $self->{nc}
7378              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7379        } else {
7380          $self->{set_nc}->($self);
7381        }
7382      
7383            redo A;
7384          } elsif ($self->{nc} == 0x003E) { # >
7385            ## XML5: Same as "anything else".
7386            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7387            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7388            
7389        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7390          $self->{line_prev} = $self->{line};
7391          $self->{column_prev} = $self->{column};
7392          $self->{column}++;
7393          $self->{nc}
7394              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7395        } else {
7396          $self->{set_nc}->($self);
7397        }
7398      
7399            return  ($self->{ct}); # ATTLIST
7400            redo A;
7401          } elsif ($self->{nc} == -1) {
7402            ## XML5: No parse error.
7403            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7404            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7405            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7406            
7407        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7408          $self->{line_prev} = $self->{line};
7409          $self->{column_prev} = $self->{column};
7410          $self->{column}++;
7411          $self->{nc}
7412              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7413        } else {
7414          $self->{set_nc}->($self);
7415        }
7416      
7417            return  ($self->{ct});
7418            redo A;
7419          } else {
7420            $self->{ca}->{default} .= chr $self->{nc};
7421            ## Stay in the state.
7422            
7423        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7424          $self->{line_prev} = $self->{line};
7425          $self->{column_prev} = $self->{column};
7426          $self->{column}++;
7427          $self->{nc}
7428              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7429        } else {
7430          $self->{set_nc}->($self);
7431        }
7432      
7433            redo A;
7434          }
7435        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7436          if ($is_space->{$self->{nc}}) {
7437            ## Stay in the state.
7438            
7439        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7440          $self->{line_prev} = $self->{line};
7441          $self->{column_prev} = $self->{column};
7442          $self->{column}++;
7443          $self->{nc}
7444              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7445        } else {
7446          $self->{set_nc}->($self);
7447        }
7448      
7449            redo A;
7450          } elsif ($self->{nc} == 0x0022) { # "
7451            $self->{ca}->{value} = '';
7452            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7453            
7454        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7455          $self->{line_prev} = $self->{line};
7456          $self->{column_prev} = $self->{column};
7457          $self->{column}++;
7458          $self->{nc}
7459              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7460        } else {
7461          $self->{set_nc}->($self);
7462        }
7463      
7464            redo A;
7465          } elsif ($self->{nc} == 0x0027) { # '
7466            $self->{ca}->{value} = '';
7467            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7468            
7469        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7470          $self->{line_prev} = $self->{line};
7471          $self->{column_prev} = $self->{column};
7472          $self->{column}++;
7473          $self->{nc}
7474              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7475        } else {
7476          $self->{set_nc}->($self);
7477        }
7478      
7479            redo A;
7480          } elsif ($self->{nc} == 0x003E) { # >
7481            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7482            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7483            
7484        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7485          $self->{line_prev} = $self->{line};
7486          $self->{column_prev} = $self->{column};
7487          $self->{column}++;
7488          $self->{nc}
7489              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7490        } else {
7491          $self->{set_nc}->($self);
7492        }
7493      
7494            return  ($self->{ct}); # ATTLIST
7495            redo A;
7496          } elsif ($self->{nc} == -1) {
7497            ## XML5: No parse error.
7498            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7499            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7500            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7501            
7502        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7503          $self->{line_prev} = $self->{line};
7504          $self->{column_prev} = $self->{column};
7505          $self->{column}++;
7506          $self->{nc}
7507              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7508        } else {
7509          $self->{set_nc}->($self);
7510        }
7511      
7512            return  ($self->{ct});
7513            redo A;
7514          } else {
7515            ## XML5: Not defined yet.
7516            if ($self->{ca}->{default} eq 'FIXED') {
7517              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7518            } else {
7519              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7520              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7521            }
7522            ## Reconsume.
7523            redo A;
7524          }
7525        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7526          if ($is_space->{$self->{nc}} or
7527              $self->{nc} == -1 or
7528              $self->{nc} == 0x003E) { # >
7529            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7530            ## Reconsume.
7531            redo A;
7532          } else {
7533            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7534            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7535            ## Reconsume.
7536            redo A;
7537          }
7538        } elsif ($self->{state} == NDATA_STATE) {
7539          ## ASCII case-insensitive
7540          if ($self->{nc} == [
7541                undef,
7542                0x0044, # D
7543                0x0041, # A
7544                0x0054, # T
7545              ]->[length $self->{kwd}] or
7546              $self->{nc} == [
7547                undef,
7548                0x0064, # d
7549                0x0061, # a
7550                0x0074, # t
7551              ]->[length $self->{kwd}]) {
7552            
7553            ## Stay in the state.
7554            $self->{kwd} .= chr $self->{nc};
7555            
7556        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7557          $self->{line_prev} = $self->{line};
7558          $self->{column_prev} = $self->{column};
7559          $self->{column}++;
7560          $self->{nc}
7561              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7562        } else {
7563          $self->{set_nc}->($self);
7564        }
7565      
7566            redo A;
7567          } elsif ((length $self->{kwd}) == 4 and
7568                   ($self->{nc} == 0x0041 or # A
7569                    $self->{nc} == 0x0061)) { # a
7570            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7571              
7572              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7573                              text => 'NDATA',
7574                              line => $self->{line_prev},
7575                              column => $self->{column_prev} - 4);
7576            } else {
7577              
7578            }
7579            $self->{state} = AFTER_NDATA_STATE;
7580            
7581        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7582          $self->{line_prev} = $self->{line};
7583          $self->{column_prev} = $self->{column};
7584          $self->{column}++;
7585          $self->{nc}
7586              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7587        } else {
7588          $self->{set_nc}->($self);
7589        }
7590      
7591            redo A;
7592          } else {
7593            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7594                            line => $self->{line_prev},
7595                            column => $self->{column_prev} + 1
7596                                - length $self->{kwd});
7597            
7598            $self->{state} = BOGUS_MD_STATE;
7599            ## Reconsume.
7600            redo A;
7601          }
7602        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7603          if ($is_space->{$self->{nc}}) {
7604            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7605            
7606        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7607          $self->{line_prev} = $self->{line};
7608          $self->{column_prev} = $self->{column};
7609          $self->{column}++;
7610          $self->{nc}
7611              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7612        } else {
7613          $self->{set_nc}->($self);
7614        }
7615      
7616            redo A;
7617          } elsif ($self->{nc} == 0x003E) { # >
7618            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7619            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7620            
7621        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7622          $self->{line_prev} = $self->{line};
7623          $self->{column_prev} = $self->{column};
7624          $self->{column}++;
7625          $self->{nc}
7626              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7627        } else {
7628          $self->{set_nc}->($self);
7629        }
7630      
7631            return  ($self->{ct}); # ENTITY
7632            redo A;
7633          } elsif ($self->{nc} == -1) {
7634            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7635            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7636            
7637        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7638          $self->{line_prev} = $self->{line};
7639          $self->{column_prev} = $self->{column};
7640          $self->{column}++;
7641          $self->{nc}
7642              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7643        } else {
7644          $self->{set_nc}->($self);
7645        }
7646      
7647            return  ($self->{ct}); # ENTITY
7648            redo A;
7649          } else {
7650            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7651                            line => $self->{line_prev},
7652                            column => $self->{column_prev} + 1
7653                                - length $self->{kwd});
7654            $self->{state} = BOGUS_MD_STATE;
7655            ## Reconsume.
7656            redo A;
7657          }
7658        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7659          if ($is_space->{$self->{nc}}) {
7660            ## Stay in the state.
7661            
7662        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7663          $self->{line_prev} = $self->{line};
7664          $self->{column_prev} = $self->{column};
7665          $self->{column}++;
7666          $self->{nc}
7667              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7668        } else {
7669          $self->{set_nc}->($self);
7670        }
7671      
7672            redo A;
7673          } elsif ($self->{nc} == 0x003E) { # >
7674            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7675            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7676            
7677        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7678          $self->{line_prev} = $self->{line};
7679          $self->{column_prev} = $self->{column};
7680          $self->{column}++;
7681          $self->{nc}
7682              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7683        } else {
7684          $self->{set_nc}->($self);
7685        }
7686      
7687            return  ($self->{ct}); # ENTITY
7688            redo A;
7689          } elsif ($self->{nc} == -1) {
7690            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7691            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7692            
7693        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7694          $self->{line_prev} = $self->{line};
7695          $self->{column_prev} = $self->{column};
7696          $self->{column}++;
7697          $self->{nc}
7698              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7699        } else {
7700          $self->{set_nc}->($self);
7701        }
7702      
7703            return  ($self->{ct}); # ENTITY
7704            redo A;
7705          } else {
7706            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7707            $self->{state} = NOTATION_NAME_STATE;
7708            
7709        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7710          $self->{line_prev} = $self->{line};
7711          $self->{column_prev} = $self->{column};
7712          $self->{column}++;
7713          $self->{nc}
7714              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7715        } else {
7716          $self->{set_nc}->($self);
7717        }
7718      
7719            redo A;
7720          }
7721        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7722          if ($is_space->{$self->{nc}}) {
7723            $self->{state} = AFTER_MD_DEF_STATE;
7724            
7725        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7726          $self->{line_prev} = $self->{line};
7727          $self->{column_prev} = $self->{column};
7728          $self->{column}++;
7729          $self->{nc}
7730              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7731        } else {
7732          $self->{set_nc}->($self);
7733        }
7734      
7735            redo A;
7736          } elsif ($self->{nc} == 0x003E) { # >
7737            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7738            
7739        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7740          $self->{line_prev} = $self->{line};
7741          $self->{column_prev} = $self->{column};
7742          $self->{column}++;
7743          $self->{nc}
7744              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7745        } else {
7746          $self->{set_nc}->($self);
7747        }
7748      
7749            return  ($self->{ct}); # ENTITY
7750            redo A;
7751          } elsif ($self->{nc} == -1) {
7752            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7753            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7754            
7755        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7756          $self->{line_prev} = $self->{line};
7757          $self->{column_prev} = $self->{column};
7758          $self->{column}++;
7759          $self->{nc}
7760              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7761        } else {
7762          $self->{set_nc}->($self);
7763        }
7764      
7765            return  ($self->{ct}); # ENTITY
7766            redo A;
7767          } else {
7768            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7769            ## Stay in the state.
7770            
7771        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7772          $self->{line_prev} = $self->{line};
7773          $self->{column_prev} = $self->{column};
7774          $self->{column}++;
7775          $self->{nc}
7776              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7777        } else {
7778          $self->{set_nc}->($self);
7779        }
7780      
7781            redo A;
7782          }
7783        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7784          if ($self->{nc} == 0x0022) { # "
7785            $self->{state} = AFTER_MD_DEF_STATE;
7786            
7787        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7788          $self->{line_prev} = $self->{line};
7789          $self->{column_prev} = $self->{column};
7790          $self->{column}++;
7791          $self->{nc}
7792              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7793        } else {
7794          $self->{set_nc}->($self);
7795        }
7796      
7797            redo A;
7798          } elsif ($self->{nc} == 0x0026) { # &
7799            $self->{prev_state} = $self->{state};
7800            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7801            $self->{entity_add} = 0x0022; # "
7802            
7803        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7804          $self->{line_prev} = $self->{line};
7805          $self->{column_prev} = $self->{column};
7806          $self->{column}++;
7807          $self->{nc}
7808              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7809        } else {
7810          $self->{set_nc}->($self);
7811        }
7812      
7813            redo A;
7814    ## TODO: %
7815          } elsif ($self->{nc} == -1) {
7816            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7817            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7818            ## Reconsume.
7819            return  ($self->{ct}); # ENTITY
7820            redo A;
7821          } else {
7822            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7823            
7824        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7825          $self->{line_prev} = $self->{line};
7826          $self->{column_prev} = $self->{column};
7827          $self->{column}++;
7828          $self->{nc}
7829              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7830        } else {
7831          $self->{set_nc}->($self);
7832        }
7833      
7834            redo A;
7835          }
7836        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7837          if ($self->{nc} == 0x0027) { # '
7838            $self->{state} = AFTER_MD_DEF_STATE;
7839            
7840        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7841          $self->{line_prev} = $self->{line};
7842          $self->{column_prev} = $self->{column};
7843          $self->{column}++;
7844          $self->{nc}
7845              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7846        } else {
7847          $self->{set_nc}->($self);
7848        }
7849      
7850            redo A;
7851          } elsif ($self->{nc} == 0x0026) { # &
7852            $self->{prev_state} = $self->{state};
7853            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7854            $self->{entity_add} = 0x0027; # '
7855            
7856        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7857          $self->{line_prev} = $self->{line};
7858          $self->{column_prev} = $self->{column};
7859          $self->{column}++;
7860          $self->{nc}
7861              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7862        } else {
7863          $self->{set_nc}->($self);
7864        }
7865      
7866            redo A;
7867    ## TODO: %
7868          } elsif ($self->{nc} == -1) {
7869            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7870            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7871            ## Reconsume.
7872            return  ($self->{ct}); # ENTITY
7873            redo A;
7874          } else {
7875            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7876            
7877        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7878          $self->{line_prev} = $self->{line};
7879          $self->{column_prev} = $self->{column};
7880          $self->{column}++;
7881          $self->{nc}
7882              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7883        } else {
7884          $self->{set_nc}->($self);
7885        }
7886      
7887            redo A;
7888          }
7889        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7890          ## TODO: XMLize
7891    
7892          if ($is_space->{$self->{nc}} or
7893              {
7894                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7895                $self->{entity_add} => 1,
7896              }->{$self->{nc}}) {
7897            ## Don't consume
7898            ## No error
7899            ## Return nothing.
7900            #
7901          } elsif ($self->{nc} == 0x0023) { # #
7902            $self->{ca} = $self->{ct};
7903            $self->{state} = ENTITY_HASH_STATE;
7904            $self->{kwd} = '#';
7905            
7906        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7907          $self->{line_prev} = $self->{line};
7908          $self->{column_prev} = $self->{column};
7909          $self->{column}++;
7910          $self->{nc}
7911              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7912        } else {
7913          $self->{set_nc}->($self);
7914        }
7915      
7916            redo A;
7917          } elsif ((0x0041 <= $self->{nc} and
7918                    $self->{nc} <= 0x005A) or # A..Z
7919                   (0x0061 <= $self->{nc} and
7920                    $self->{nc} <= 0x007A)) { # a..z
7921            #
7922          } else {
7923            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero');
7924            ## Return nothing.
7925            #
7926          }
7927    
7928          $self->{ct}->{value} .= '&';
7929          $self->{state} = $self->{prev_state};
7930          ## Reconsume.
7931          redo A;
7932        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
7933          if ($is_space->{$self->{nc}}) {
7934            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
7935            
7936        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7937          $self->{line_prev} = $self->{line};
7938          $self->{column_prev} = $self->{column};
7939          $self->{column}++;
7940          $self->{nc}
7941              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7942        } else {
7943          $self->{set_nc}->($self);
7944        }
7945      
7946            redo A;
7947          } elsif ($self->{nc} == 0x0028) { # (
7948            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
7949            $self->{ct}->{content} = ['('];
7950            $self->{group_depth} = 1;
7951            
7952        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7953          $self->{line_prev} = $self->{line};
7954          $self->{column_prev} = $self->{column};
7955          $self->{column}++;
7956          $self->{nc}
7957              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7958        } else {
7959          $self->{set_nc}->($self);
7960        }
7961      
7962            redo A;
7963          } elsif ($self->{nc} == 0x003E) { # >
7964            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
7965            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7966            
7967        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7968          $self->{line_prev} = $self->{line};
7969          $self->{column_prev} = $self->{column};
7970          $self->{column}++;
7971          $self->{nc}
7972              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7973        } else {
7974          $self->{set_nc}->($self);
7975        }
7976      
7977            return  ($self->{ct}); # ELEMENT
7978            redo A;
7979          } elsif ($self->{nc} == -1) {
7980            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7981            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7982            
7983        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7984          $self->{line_prev} = $self->{line};
7985          $self->{column_prev} = $self->{column};
7986          $self->{column}++;
7987          $self->{nc}
7988              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7989        } else {
7990          $self->{set_nc}->($self);
7991        }
7992      
7993            return  ($self->{ct}); # ELEMENT
7994            redo A;
7995          } else {
7996            $self->{ct}->{content} = [chr $self->{nc}];
7997            $self->{state} = CONTENT_KEYWORD_STATE;
7998            
7999        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8000          $self->{line_prev} = $self->{line};
8001          $self->{column_prev} = $self->{column};
8002          $self->{column}++;
8003          $self->{nc}
8004              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8005        } else {
8006          $self->{set_nc}->($self);
8007        }
8008      
8009            redo A;
8010          }
8011        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8012          if ($is_space->{$self->{nc}}) {
8013            $self->{state} = AFTER_MD_DEF_STATE;
8014            
8015        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8016          $self->{line_prev} = $self->{line};
8017          $self->{column_prev} = $self->{column};
8018          $self->{column}++;
8019          $self->{nc}
8020              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8021        } else {
8022          $self->{set_nc}->($self);
8023        }
8024      
8025            redo A;
8026          } elsif ($self->{nc} == 0x003E) { # >
8027            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8028            
8029        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8030          $self->{line_prev} = $self->{line};
8031          $self->{column_prev} = $self->{column};
8032          $self->{column}++;
8033          $self->{nc}
8034              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8035        } else {
8036          $self->{set_nc}->($self);
8037        }
8038      
8039            return  ($self->{ct}); # ELEMENT
8040            redo A;
8041          } elsif ($self->{nc} == -1) {
8042            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8043            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8044            
8045        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8046          $self->{line_prev} = $self->{line};
8047          $self->{column_prev} = $self->{column};
8048          $self->{column}++;
8049          $self->{nc}
8050              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8051        } else {
8052          $self->{set_nc}->($self);
8053        }
8054      
8055            return  ($self->{ct}); # ELEMENT
8056            redo A;
8057          } else {
8058            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8059            ## Stay in the state.
8060            
8061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8062          $self->{line_prev} = $self->{line};
8063          $self->{column_prev} = $self->{column};
8064          $self->{column}++;
8065          $self->{nc}
8066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8067        } else {
8068          $self->{set_nc}->($self);
8069        }
8070      
8071            redo A;
8072          }
8073        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8074          if ($is_space->{$self->{nc}}) {
8075            ## Stay in the state.
8076            
8077        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8078          $self->{line_prev} = $self->{line};
8079          $self->{column_prev} = $self->{column};
8080          $self->{column}++;
8081          $self->{nc}
8082              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8083        } else {
8084          $self->{set_nc}->($self);
8085        }
8086      
8087            redo A;
8088          } elsif ($self->{nc} == 0x0028) { # (
8089            $self->{group_depth}++;
8090            push @{$self->{ct}->{content}}, chr $self->{nc};
8091            ## Stay in the state.
8092            
8093        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8094          $self->{line_prev} = $self->{line};
8095          $self->{column_prev} = $self->{column};
8096          $self->{column}++;
8097          $self->{nc}
8098              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8099        } else {
8100          $self->{set_nc}->($self);
8101        }
8102      
8103            redo A;
8104          } elsif ($self->{nc} == 0x007C or # |
8105                   $self->{nc} == 0x002C) { # ,
8106            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8107            ## Stay in the state.
8108            
8109        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8110          $self->{line_prev} = $self->{line};
8111          $self->{column_prev} = $self->{column};
8112          $self->{column}++;
8113          $self->{nc}
8114              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8115        } else {
8116          $self->{set_nc}->($self);
8117        }
8118      
8119            redo A;
8120          } elsif ($self->{nc} == 0x0029) { # )
8121            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8122            push @{$self->{ct}->{content}}, chr $self->{nc};
8123            $self->{group_depth}--;
8124            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8125            
8126        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8127          $self->{line_prev} = $self->{line};
8128          $self->{column_prev} = $self->{column};
8129          $self->{column}++;
8130          $self->{nc}
8131              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8132        } else {
8133          $self->{set_nc}->($self);
8134        }
8135      
8136            redo A;
8137          } elsif ($self->{nc} == 0x003E) { # >
8138            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8139            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8140            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8141            
8142        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8143          $self->{line_prev} = $self->{line};
8144          $self->{column_prev} = $self->{column};
8145          $self->{column}++;
8146          $self->{nc}
8147              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8148        } else {
8149          $self->{set_nc}->($self);
8150        }
8151      
8152            return  ($self->{ct}); # ELEMENT
8153            redo A;
8154          } elsif ($self->{nc} == -1) {
8155            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8156            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8157            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8158            
8159        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8160          $self->{line_prev} = $self->{line};
8161          $self->{column_prev} = $self->{column};
8162          $self->{column}++;
8163          $self->{nc}
8164              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8165        } else {
8166          $self->{set_nc}->($self);
8167        }
8168      
8169            return  ($self->{ct}); # ELEMENT
8170            redo A;
8171          } else {
8172            push @{$self->{ct}->{content}}, chr $self->{nc};
8173            $self->{state} = CM_ELEMENT_NAME_STATE;
8174            
8175        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8176          $self->{line_prev} = $self->{line};
8177          $self->{column_prev} = $self->{column};
8178          $self->{column}++;
8179          $self->{nc}
8180              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8181        } else {
8182          $self->{set_nc}->($self);
8183        }
8184      
8185            redo A;
8186          }
8187        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8188          if ($is_space->{$self->{nc}}) {
8189            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8190            
8191        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8192          $self->{line_prev} = $self->{line};
8193          $self->{column_prev} = $self->{column};
8194          $self->{column}++;
8195          $self->{nc}
8196              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8197        } else {
8198          $self->{set_nc}->($self);
8199        }
8200      
8201            redo A;
8202          } elsif ($self->{nc} == 0x002A or # *
8203                   $self->{nc} == 0x002B or # +
8204                   $self->{nc} == 0x003F) { # ?
8205            push @{$self->{ct}->{content}}, chr $self->{nc};
8206            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8207            
8208        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8209          $self->{line_prev} = $self->{line};
8210          $self->{column_prev} = $self->{column};
8211          $self->{column}++;
8212          $self->{nc}
8213              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8214        } else {
8215          $self->{set_nc}->($self);
8216        }
8217      
8218            redo A;
8219          } elsif ($self->{nc} == 0x007C or # |
8220                   $self->{nc} == 0x002C) { # ,
8221            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8222            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8223            
8224        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8225          $self->{line_prev} = $self->{line};
8226          $self->{column_prev} = $self->{column};
8227          $self->{column}++;
8228          $self->{nc}
8229              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8230        } else {
8231          $self->{set_nc}->($self);
8232        }
8233      
8234            redo A;
8235          } elsif ($self->{nc} == 0x0029) { # )
8236            $self->{group_depth}--;
8237            push @{$self->{ct}->{content}}, chr $self->{nc};
8238            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8239            
8240        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8241          $self->{line_prev} = $self->{line};
8242          $self->{column_prev} = $self->{column};
8243          $self->{column}++;
8244          $self->{nc}
8245              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8246        } else {
8247          $self->{set_nc}->($self);
8248        }
8249      
8250            redo A;
8251          } elsif ($self->{nc} == 0x003E) { # >
8252            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8253            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8254            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8255            
8256        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8257          $self->{line_prev} = $self->{line};
8258          $self->{column_prev} = $self->{column};
8259          $self->{column}++;
8260          $self->{nc}
8261              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8262        } else {
8263          $self->{set_nc}->($self);
8264        }
8265      
8266            return  ($self->{ct}); # ELEMENT
8267            redo A;
8268          } elsif ($self->{nc} == -1) {
8269            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8270            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8271            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8272            
8273        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8274          $self->{line_prev} = $self->{line};
8275          $self->{column_prev} = $self->{column};
8276          $self->{column}++;
8277          $self->{nc}
8278              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8279        } else {
8280          $self->{set_nc}->($self);
8281        }
8282      
8283            return  ($self->{ct}); # ELEMENT
8284            redo A;
8285          } else {
8286            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8287            ## Stay in the state.
8288            
8289        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8290          $self->{line_prev} = $self->{line};
8291          $self->{column_prev} = $self->{column};
8292          $self->{column}++;
8293          $self->{nc}
8294              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8295        } else {
8296          $self->{set_nc}->($self);
8297        }
8298      
8299            redo A;
8300          }
8301        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8302          if ($is_space->{$self->{nc}}) {
8303            ## Stay in the state.
8304            
8305        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8306          $self->{line_prev} = $self->{line};
8307          $self->{column_prev} = $self->{column};
8308          $self->{column}++;
8309          $self->{nc}
8310              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8311        } else {
8312          $self->{set_nc}->($self);
8313        }
8314      
8315            redo A;
8316          } elsif ($self->{nc} == 0x007C or # |
8317                   $self->{nc} == 0x002C) { # ,
8318            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8319            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8320            
8321        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8322          $self->{line_prev} = $self->{line};
8323          $self->{column_prev} = $self->{column};
8324          $self->{column}++;
8325          $self->{nc}
8326              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8327        } else {
8328          $self->{set_nc}->($self);
8329        }
8330      
8331            redo A;
8332          } elsif ($self->{nc} == 0x0029) { # )
8333            $self->{group_depth}--;
8334            push @{$self->{ct}->{content}}, chr $self->{nc};
8335            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8336            
8337        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8338          $self->{line_prev} = $self->{line};
8339          $self->{column_prev} = $self->{column};
8340          $self->{column}++;
8341          $self->{nc}
8342              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8343        } else {
8344          $self->{set_nc}->($self);
8345        }
8346      
8347            redo A;
8348          } elsif ($self->{nc} == 0x003E) { # >
8349            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8350            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8351            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8352            
8353        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8354          $self->{line_prev} = $self->{line};
8355          $self->{column_prev} = $self->{column};
8356          $self->{column}++;
8357          $self->{nc}
8358              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8359        } else {
8360          $self->{set_nc}->($self);
8361        }
8362      
8363            return  ($self->{ct}); # ELEMENT
8364            redo A;
8365          } elsif ($self->{nc} == -1) {
8366            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8367            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8368            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8369            
8370        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8371          $self->{line_prev} = $self->{line};
8372          $self->{column_prev} = $self->{column};
8373          $self->{column}++;
8374          $self->{nc}
8375              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8376        } else {
8377          $self->{set_nc}->($self);
8378        }
8379      
8380            return  ($self->{ct}); # ELEMENT
8381            redo A;
8382          } else {
8383            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8384            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8385            $self->{state} = BOGUS_MD_STATE;
8386            
8387        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8388          $self->{line_prev} = $self->{line};
8389          $self->{column_prev} = $self->{column};
8390          $self->{column}++;
8391          $self->{nc}
8392              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8393        } else {
8394          $self->{set_nc}->($self);
8395        }
8396      
8397            redo A;
8398          }
8399        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8400          if ($is_space->{$self->{nc}}) {
8401            if ($self->{group_depth}) {
8402              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8403            } else {
8404              $self->{state} = AFTER_MD_DEF_STATE;
8405            }
8406            
8407        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8408          $self->{line_prev} = $self->{line};
8409          $self->{column_prev} = $self->{column};
8410          $self->{column}++;
8411          $self->{nc}
8412              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8413        } else {
8414          $self->{set_nc}->($self);
8415        }
8416      
8417            redo A;
8418          } elsif ($self->{nc} == 0x002A or # *
8419                   $self->{nc} == 0x002B or # +
8420                   $self->{nc} == 0x003F) { # ?
8421            push @{$self->{ct}->{content}}, chr $self->{nc};
8422            if ($self->{group_depth}) {
8423              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8424            } else {
8425              $self->{state} = AFTER_MD_DEF_STATE;
8426            }
8427            
8428        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8429          $self->{line_prev} = $self->{line};
8430          $self->{column_prev} = $self->{column};
8431          $self->{column}++;
8432          $self->{nc}
8433              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8434        } else {
8435          $self->{set_nc}->($self);
8436        }
8437      
8438            redo A;
8439          } elsif ($self->{nc} == 0x0029) { # )
8440            if ($self->{group_depth}) {
8441              $self->{group_depth}--;
8442              push @{$self->{ct}->{content}}, chr $self->{nc};
8443              ## Stay in the state.
8444              
8445        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8446          $self->{line_prev} = $self->{line};
8447          $self->{column_prev} = $self->{column};
8448          $self->{column}++;
8449          $self->{nc}
8450              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8451        } else {
8452          $self->{set_nc}->($self);
8453        }
8454      
8455              redo A;
8456            } else {
8457              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8458              $self->{state} = BOGUS_MD_STATE;
8459              ## Reconsume.
8460              redo A;
8461            }
8462          } elsif ($self->{nc} == 0x003E) { # >
8463            if ($self->{group_depth}) {
8464              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8465              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8466            }
8467            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8468            
8469        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8470          $self->{line_prev} = $self->{line};
8471          $self->{column_prev} = $self->{column};
8472          $self->{column}++;
8473          $self->{nc}
8474              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8475        } else {
8476          $self->{set_nc}->($self);
8477        }
8478      
8479            return  ($self->{ct}); # ELEMENT
8480            redo A;
8481          } elsif ($self->{nc} == -1) {
8482            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8483            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8484            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8485            
8486        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8487          $self->{line_prev} = $self->{line};
8488          $self->{column_prev} = $self->{column};
8489          $self->{column}++;
8490          $self->{nc}
8491              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8492        } else {
8493          $self->{set_nc}->($self);
8494        }
8495      
8496            return  ($self->{ct}); # ELEMENT
8497            redo A;
8498          } else {
8499            if ($self->{group_depth}) {
8500              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8501            } else {
8502              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8503              $self->{state} = BOGUS_MD_STATE;
8504            }
8505            ## Reconsume.
8506            redo A;
8507          }
8508        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8509          if ($is_space->{$self->{nc}}) {
8510            ## Stay in the state.
8511            
8512        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8513          $self->{line_prev} = $self->{line};
8514          $self->{column_prev} = $self->{column};
8515          $self->{column}++;
8516          $self->{nc}
8517              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8518        } else {
8519          $self->{set_nc}->($self);
8520        }
8521      
8522            redo A;
8523          } elsif ($self->{nc} == 0x003E) { # >
8524            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8525            
8526        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8527          $self->{line_prev} = $self->{line};
8528          $self->{column_prev} = $self->{column};
8529          $self->{column}++;
8530          $self->{nc}
8531              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8532        } else {
8533          $self->{set_nc}->($self);
8534        }
8535      
8536            return  ($self->{ct}); # ENTITY/ELEMENT
8537            redo A;
8538          } elsif ($self->{nc} == -1) {
8539            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8540            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8541            
8542        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8543          $self->{line_prev} = $self->{line};
8544          $self->{column_prev} = $self->{column};
8545          $self->{column}++;
8546          $self->{nc}
8547              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8548        } else {
8549          $self->{set_nc}->($self);
8550        }
8551      
8552            return  ($self->{ct}); # ENTITY/ELEMENT
8553            redo A;
8554          } else {
8555            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8556            $self->{state} = BOGUS_MD_STATE;
8557            ## Reconsume.
8558            redo A;
8559          }
8560        } elsif ($self->{state} == BOGUS_MD_STATE) {
8561          if ($self->{nc} == 0x003E) { # >
8562            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8563            
8564        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8565          $self->{line_prev} = $self->{line};
8566          $self->{column_prev} = $self->{column};
8567          $self->{column}++;
8568          $self->{nc}
8569              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8570        } else {
8571          $self->{set_nc}->($self);
8572        }
8573      
8574            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8575            redo A;
8576          } elsif ($self->{nc} == -1) {
8577            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8578            ## Reconsume.
8579            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8580            redo A;
8581          } else {
8582            ## Stay in the state.
8583            
8584        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8585          $self->{line_prev} = $self->{line};
8586          $self->{column_prev} = $self->{column};
8587          $self->{column}++;
8588          $self->{nc}
8589              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8590        } else {
8591          $self->{set_nc}->($self);
8592        }
8593      
8594            redo A;
8595          }
8596      } else {      } else {
8597        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8598      }      }
# Line 4712  sub _get_next_token ($) { Line 8603  sub _get_next_token ($) {
8603    
8604  1;  1;
8605  ## $Date$  ## $Date$
8606                                    

Legend:
Removed from v.1.11  
changed lines
  Added in v.1.21

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24