/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.9 by wakaba, Wed Oct 15 08:05:47 2008 UTC revision 1.28 by wakaba, Sun Jul 5 04:38:45 2009 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145  ## XML states  ## XML-only states
146  sub PI_STATE () { 51 }  sub PI_STATE () { 51 }
147  sub PI_TARGET_STATE () { 52 }  sub PI_TARGET_STATE () { 52 }
148  sub PI_TARGET_AFTER_STATE () { 53 }  sub PI_TARGET_AFTER_STATE () { 53 }
149  sub PI_DATA_STATE () { 54 }  sub PI_DATA_STATE () { 54 }
150  sub PI_AFTER_STATE () { 55 }  sub PI_AFTER_STATE () { 55 }
151  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BEFORE_NDATA_STATE () { 85 }
181    sub NDATA_STATE () { 86 }
182    sub AFTER_NDATA_STATE () { 87 }
183    sub BEFORE_NOTATION_NAME_STATE () { 88 }
184    sub NOTATION_NAME_STATE () { 89 }
185    sub DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
186    sub DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
187    sub ENTITY_VALUE_ENTITY_STATE () { 92 }
188    sub AFTER_ELEMENT_NAME_STATE () { 93 }
189    sub BEFORE_ELEMENT_CONTENT_STATE () { 94 }
190    sub CONTENT_KEYWORD_STATE () { 95 }
191    sub AFTER_CM_GROUP_OPEN_STATE () { 96 }
192    sub CM_ELEMENT_NAME_STATE () { 97 }
193    sub AFTER_CM_ELEMENT_NAME_STATE () { 98 }
194    sub AFTER_CM_GROUP_CLOSE_STATE () { 99 }
195    sub AFTER_MD_DEF_STATE () { 100 }
196    sub BOGUS_MD_STATE () { 101 }
197    
198  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
199  ## list and descriptions)  ## list and descriptions)
# Line 186  sub _initialize_tokenizer ($) { Line 259  sub _initialize_tokenizer ($) {
259    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
260    
261    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
262    $self->{s_kwd} = ''; # state keyword    $self->{s_kwd} = ''; # Data state keyword
263      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
264    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
265    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
266    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 216  sub _initialize_tokenizer ($) { Line 290  sub _initialize_tokenizer ($) {
290    
291  ## A token has:  ## A token has:
292  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
293  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
294  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
295  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
296    ##   ->{target} (PI_TOKEN)
297  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
298  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
299  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 226  sub _initialize_tokenizer ($) { Line 301  sub _initialize_tokenizer ($) {
301  ##        ->{name}  ##        ->{name}
302  ##        ->{value}  ##        ->{value}
303  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
304  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
305    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
306  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
307    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
308    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
309    
310  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
311  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
312  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 247  my $is_space = { Line 326  my $is_space = {
326    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
327    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
328    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
329    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
330    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
331    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
332  };  };
# Line 507  sub _get_next_token ($) { Line 586  sub _get_next_token ($) {
586        return  ($token);        return  ($token);
587        redo A;        redo A;
588      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
589          ## XML5: "tag state".
590    
591        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
592          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
593                        
# Line 525  sub _get_next_token ($) { Line 606  sub _get_next_token ($) {
606            redo A;            redo A;
607          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
608                        
609            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
610            #            #
611          } else {          } else {
612                        
613              $self->{s_kwd} = '';
614            #            #
615          }          }
616    
617          ## reconsume          ## reconsume
618          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
         $self->{s_kwd} = '';  
619          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
620                    line => $self->{line_prev},                    line => $self->{line_prev},
621                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 709  sub _get_next_token ($) { Line 790  sub _get_next_token ($) {
790        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
791        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
792    
793          ## XML5: "end tag state".
794    
795        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
796        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
797          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
798            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
799            $self->{s_kwd} = '';            $self->{kwd} = '';
800            ## Reconsume.            ## Reconsume.
801            redo A;            redo A;
802          } else {          } else {
# Line 770  sub _get_next_token ($) { Line 853  sub _get_next_token ($) {
853        
854          redo A;          redo A;
855        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
856          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
857                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
858                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
859          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
860          $self->{s_kwd} = '';          $self->{s_kwd} = '';
861                    if ($self->{is_xml}) {
862              
863              ## XML5: No parse error.
864              
865              ## NOTE: This parser raises a parse error, since it supports
866              ## XML1, not XML5.
867    
868              ## NOTE: A short end tag token.
869              my $ct = {type => END_TAG_TOKEN,
870                        tag_name => '',
871                        line => $self->{line_prev},
872                        column => $self->{column_prev} - 1,
873                       };
874              
875        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
876          $self->{line_prev} = $self->{line};
877          $self->{column_prev} = $self->{column};
878          $self->{column}++;
879          $self->{nc}
880              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
881        } else {
882          $self->{set_nc}->($self);
883        }
884      
885              return  ($ct);
886            } else {
887              
888              
889      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
890        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
891        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 787  sub _get_next_token ($) { Line 896  sub _get_next_token ($) {
896        $self->{set_nc}->($self);        $self->{set_nc}->($self);
897      }      }
898        
899            }
900          redo A;          redo A;
901        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
902                    
# Line 800  sub _get_next_token ($) { Line 910  sub _get_next_token ($) {
910                   });                   });
911    
912          redo A;          redo A;
913        } else {        } elsif (not $self->{is_xml} or
914                   $is_space->{$self->{nc}}) {
915                    
916          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
917                            line => $self->{line_prev}, # "<" of "</"
918                            column => $self->{column_prev} - 1);
919          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
920          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
921                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 815  sub _get_next_token ($) { Line 928  sub _get_next_token ($) {
928          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
929          ## "bogus comment state" entry.          ## "bogus comment state" entry.
930          redo A;          redo A;
931          } else {
932            ## XML5: "</:" is a parse error.
933            
934            $self->{ct} = {type => END_TAG_TOKEN,
935                           tag_name => chr ($self->{nc}),
936                           line => $l, column => $c};
937            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
938            
939        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
940          $self->{line_prev} = $self->{line};
941          $self->{column_prev} = $self->{column};
942          $self->{column}++;
943          $self->{nc}
944              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
945        } else {
946          $self->{set_nc}->($self);
947        }
948      
949            redo A;
950        }        }
951      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
952        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
953        if (length $ch) {        if (length $ch) {
954          my $CH = $ch;          my $CH = $ch;
955          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 825  sub _get_next_token ($) { Line 957  sub _get_next_token ($) {
957          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
958                        
959            ## Stay in the state.            ## Stay in the state.
960            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
961                        
962      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
963        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 844  sub _get_next_token ($) { Line 976  sub _get_next_token ($) {
976            $self->{s_kwd} = '';            $self->{s_kwd} = '';
977            ## Reconsume.            ## Reconsume.
978            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
979                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
980                      line => $self->{line_prev},                      line => $self->{line_prev},
981                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
982                     });                     });
983            redo A;            redo A;
984          }          }
# Line 862  sub _get_next_token ($) { Line 994  sub _get_next_token ($) {
994            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
995            $self->{s_kwd} = '';            $self->{s_kwd} = '';
996            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
997                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
998                      line => $self->{line_prev},                      line => $self->{line_prev},
999                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
1000                     });                     });
1001            redo A;            redo A;
1002          } else {          } else {
# Line 873  sub _get_next_token ($) { Line 1005  sub _get_next_token ($) {
1005                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
1006                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
1007                   line => $self->{line_prev},                   line => $self->{line_prev},
1008                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
1009            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
1010            ## Reconsume.            ## Reconsume.
1011            redo A;            redo A;
# Line 1005  sub _get_next_token ($) { Line 1137  sub _get_next_token ($) {
1137          redo A;          redo A;
1138        }        }
1139      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1140          ## XML5: "Tag attribute name before state".
1141    
1142        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1143                    
1144          ## Stay in the state          ## Stay in the state
# Line 1117  sub _get_next_token ($) { Line 1251  sub _get_next_token ($) {
1251               0x003D => 1, # =               0x003D => 1, # =
1252              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1253                        
1254              ## XML5: Not a parse error.
1255            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1256          } else {          } else {
1257                        
1258              ## XML5: ":" raises a parse error and is ignored.
1259          }          }
1260          $self->{ca}          $self->{ca}
1261              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1140  sub _get_next_token ($) { Line 1276  sub _get_next_token ($) {
1276          redo A;          redo A;
1277        }        }
1278      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1279          ## XML5: "Tag attribute name state".
1280    
1281        my $before_leave = sub {        my $before_leave = sub {
1282          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1283              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1150  sub _get_next_token ($) { Line 1288  sub _get_next_token ($) {
1288                        
1289            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1290              = $self->{ca};              = $self->{ca};
1291              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1292          }          }
1293        }; # $before_leave        }; # $before_leave
1294    
# Line 1186  sub _get_next_token ($) { Line 1325  sub _get_next_token ($) {
1325        
1326          redo A;          redo A;
1327        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1328            if ($self->{is_xml}) {
1329              
1330              ## XML5: Not a parse error.
1331              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1332            } else {
1333              
1334            }
1335    
1336          $before_leave->();          $before_leave->();
1337          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1338                        
# Line 1235  sub _get_next_token ($) { Line 1382  sub _get_next_token ($) {
1382        
1383          redo A;          redo A;
1384        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1385            if ($self->{is_xml}) {
1386              
1387              ## XML5: Not a parse error.
1388              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1389            } else {
1390              
1391            }
1392                    
1393          $before_leave->();          $before_leave->();
1394          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1279  sub _get_next_token ($) { Line 1433  sub _get_next_token ($) {
1433          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1434              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1435                        
1436              ## XML5: Not a parse error.
1437            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1438          } else {          } else {
1439                        
# Line 1299  sub _get_next_token ($) { Line 1454  sub _get_next_token ($) {
1454          redo A;          redo A;
1455        }        }
1456      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1457          ## XML5: "Tag attribute name after state".
1458          
1459        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1460                    
1461          ## Stay in the state          ## Stay in the state
# Line 1330  sub _get_next_token ($) { Line 1487  sub _get_next_token ($) {
1487        
1488          redo A;          redo A;
1489        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1490            if ($self->{is_xml}) {
1491              
1492              ## XML5: Not a parse error.
1493              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1494            } else {
1495              
1496            }
1497    
1498          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1499                        
1500            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1383  sub _get_next_token ($) { Line 1548  sub _get_next_token ($) {
1548        
1549          redo A;          redo A;
1550        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1551            if ($self->{is_xml}) {
1552              
1553              ## XML5: Not a parse error.
1554              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1555            } else {
1556              
1557            }
1558                    
1559          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1560                    
# Line 1422  sub _get_next_token ($) { Line 1594  sub _get_next_token ($) {
1594    
1595          redo A;          redo A;
1596        } else {        } else {
1597            if ($self->{is_xml}) {
1598              
1599              ## XML5: Not a parse error.
1600              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1601            } else {
1602              
1603            }
1604    
1605          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1606              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1607                        
1608              ## XML5: Not a parse error.
1609            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1610          } else {          } else {
1611                        
# Line 1448  sub _get_next_token ($) { Line 1629  sub _get_next_token ($) {
1629          redo A;                  redo A;        
1630        }        }
1631      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1632          ## XML5: "Tag attribute value before state".
1633    
1634        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1635                    
1636          ## Stay in the state          ## Stay in the state
# Line 1557  sub _get_next_token ($) { Line 1740  sub _get_next_token ($) {
1740    
1741          redo A;          redo A;
1742        } else {        } else {
1743          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, <
1744                        
1745              ## XML5: Not a parse error.
1746            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1747            } elsif ($self->{is_xml}) {
1748              
1749              ## XML5: No parse error.
1750              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1751          } else {          } else {
1752                        
1753          }          }
# Line 1579  sub _get_next_token ($) { Line 1767  sub _get_next_token ($) {
1767          redo A;          redo A;
1768        }        }
1769      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1770          ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1771          ## ATTLIST attribute value double quoted state".
1772          
1773        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1774                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1775          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1776              ## XML5: "DOCTYPE ATTLIST name after state".
1777              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1778              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1779            } else {
1780              
1781              ## XML5: "Tag attribute name before state".
1782              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1783            }
1784                    
1785      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1786        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1596  sub _get_next_token ($) { Line 1795  sub _get_next_token ($) {
1795          redo A;          redo A;
1796        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1797                    
1798            ## XML5: Not defined yet.
1799    
1800          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1801          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1802          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1615  sub _get_next_token ($) { Line 1816  sub _get_next_token ($) {
1816      }      }
1817        
1818          redo A;          redo A;
1819          } elsif ($self->{is_xml} and
1820                   $is_space->{$self->{nc}}) {
1821            
1822            $self->{ca}->{value} .= ' ';
1823            ## Stay in the state.
1824            
1825        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1826          $self->{line_prev} = $self->{line};
1827          $self->{column_prev} = $self->{column};
1828          $self->{column}++;
1829          $self->{nc}
1830              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1831        } else {
1832          $self->{set_nc}->($self);
1833        }
1834      
1835            redo A;
1836        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1837          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1838          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1839                        
1840            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1841    
1842              $self->{state} = DATA_STATE;
1843              $self->{s_kwd} = '';
1844              ## reconsume
1845              return  ($self->{ct}); # start tag
1846              redo A;
1847          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1848            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1849            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1629  sub _get_next_token ($) { Line 1853  sub _get_next_token ($) {
1853              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1854                            
1855            }            }
1856    
1857              $self->{state} = DATA_STATE;
1858              $self->{s_kwd} = '';
1859              ## reconsume
1860              return  ($self->{ct}); # end tag
1861              redo A;
1862            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1863              ## XML5: No parse error above; not defined yet.
1864              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1865              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1866              ## Reconsume.
1867              return  ($self->{ct}); # ATTLIST
1868              redo A;
1869          } else {          } else {
1870            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1871          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1872        } else {        } else {
1873                    ## XML5 [ATTLIST]: Not defined yet.
1874            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1875              
1876              ## XML5: Not a parse error.
1877              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1878            } else {
1879              
1880            }
1881          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1882          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1883                                q["&],                                qq["&<\x09\x0C\x20],
1884                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1885    
1886          ## Stay in the state          ## Stay in the state
# Line 1661  sub _get_next_token ($) { Line 1898  sub _get_next_token ($) {
1898          redo A;          redo A;
1899        }        }
1900      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1901          ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1902          ## ATTLIST attribute value single quoted state".
1903    
1904        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1905                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1906          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1907              ## XML5: "DOCTYPE ATTLIST name after state".
1908              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1909              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1910            } else {
1911              
1912              ## XML5: "Before attribute name state" (sic).
1913              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1914            }
1915                    
1916      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1917        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1678  sub _get_next_token ($) { Line 1926  sub _get_next_token ($) {
1926          redo A;          redo A;
1927        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1928                    
1929            ## XML5: Not defined yet.
1930    
1931          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1932          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1933          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1697  sub _get_next_token ($) { Line 1947  sub _get_next_token ($) {
1947      }      }
1948        
1949          redo A;          redo A;
1950          } elsif ($self->{is_xml} and
1951                   $is_space->{$self->{nc}}) {
1952            
1953            $self->{ca}->{value} .= ' ';
1954            ## Stay in the state.
1955            
1956        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1957          $self->{line_prev} = $self->{line};
1958          $self->{column_prev} = $self->{column};
1959          $self->{column}++;
1960          $self->{nc}
1961              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
1962        } else {
1963          $self->{set_nc}->($self);
1964        }
1965      
1966            redo A;
1967        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1968          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed attribute value');
1969          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1970                        
1971            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1972    
1973              $self->{state} = DATA_STATE;
1974              $self->{s_kwd} = '';
1975              ## reconsume
1976              return  ($self->{ct}); # start tag
1977              redo A;
1978          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1979            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1980            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1711  sub _get_next_token ($) { Line 1984  sub _get_next_token ($) {
1984              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1985                            
1986            }            }
1987    
1988              $self->{state} = DATA_STATE;
1989              $self->{s_kwd} = '';
1990              ## reconsume
1991              return  ($self->{ct}); # end tag
1992              redo A;
1993            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1994              ## XML5: No parse error above; not defined yet.
1995              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1996              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1997              ## Reconsume.
1998              return  ($self->{ct}); # ATTLIST
1999              redo A;
2000          } else {          } else {
2001            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2002          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2003        } else {        } else {
2004                    ## XML5 [ATTLIST]: Not defined yet.
2005            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
2006              
2007              ## XML5: Not a parse error.
2008              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
2009            } else {
2010              
2011            }
2012          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2013          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2014                                q['&],                                qq['&<\x09\x0C\x20],
2015                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2016    
2017          ## Stay in the state          ## Stay in the state
# Line 1743  sub _get_next_token ($) { Line 2029  sub _get_next_token ($) {
2029          redo A;          redo A;
2030        }        }
2031      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
2032          ## XML5: "Tag attribute value unquoted state".
2033    
2034        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2035                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
2036          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            
2037              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2038              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
2039            } else {
2040              
2041              ## XML5: "Tag attribute name before state".
2042              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2043            }
2044                    
2045      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2046        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1760  sub _get_next_token ($) { Line 2055  sub _get_next_token ($) {
2055          redo A;          redo A;
2056        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
2057                    
2058    
2059            ## XML5: Not defined yet.
2060    
2061          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
2062          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
2063          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1783  sub _get_next_token ($) { Line 2081  sub _get_next_token ($) {
2081          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2082                        
2083            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2084    
2085              $self->{state} = DATA_STATE;
2086              $self->{s_kwd} = '';
2087              
2088        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2089          $self->{line_prev} = $self->{line};
2090          $self->{column_prev} = $self->{column};
2091          $self->{column}++;
2092          $self->{nc}
2093              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2094        } else {
2095          $self->{set_nc}->($self);
2096        }
2097      
2098              return  ($self->{ct}); # start tag
2099              redo A;
2100          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2101            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2102            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1792  sub _get_next_token ($) { Line 2106  sub _get_next_token ($) {
2106              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2107                            
2108            }            }
2109          } else {  
2110            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2111          }            $self->{s_kwd} = '';
2112          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2113      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2114        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2115        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1808  sub _get_next_token ($) { Line 2120  sub _get_next_token ($) {
2120        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2121      }      }
2122        
2123              return  ($self->{ct}); # end tag
2124          return  ($self->{ct}); # start tag or end tag            redo A;
2125            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2126          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2127              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2128              
2129        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2130          $self->{line_prev} = $self->{line};
2131          $self->{column_prev} = $self->{column};
2132          $self->{column}++;
2133          $self->{nc}
2134              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2135        } else {
2136          $self->{set_nc}->($self);
2137        }
2138      
2139              return  ($self->{ct}); # ATTLIST
2140              redo A;
2141            } else {
2142              die "$0: $self->{ct}->{type}: Unknown token type";
2143            }
2144        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2145          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2146                        
2147              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2148            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2149    
2150              $self->{state} = DATA_STATE;
2151              $self->{s_kwd} = '';
2152              ## reconsume
2153              return  ($self->{ct}); # start tag
2154              redo A;
2155          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2156              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2157            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2158            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2159                            
# Line 1826  sub _get_next_token ($) { Line 2162  sub _get_next_token ($) {
2162              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2163                            
2164            }            }
2165    
2166              $self->{state} = DATA_STATE;
2167              $self->{s_kwd} = '';
2168              ## reconsume
2169              return  ($self->{ct}); # end tag
2170              redo A;
2171            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2172              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2173              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2174              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2175              ## Reconsume.
2176              return  ($self->{ct}); # ATTLIST
2177              redo A;
2178          } else {          } else {
2179            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2180          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2181        } else {        } else {
2182          if ({          if ({
2183               0x0022 => 1, # "               0x0022 => 1, # "
2184               0x0027 => 1, # '               0x0027 => 1, # '
2185               0x003D => 1, # =               0x003D => 1, # =
2186                 0x003C => 1, # <
2187              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2188                        
2189              ## XML5: Not a parse error.
2190            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
2191          } else {          } else {
2192                        
2193          }          }
2194          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
2195          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
2196                                q["'=& >],                                qq["'=& \x09\x0C>],
2197                                length $self->{ca}->{value});                                length $self->{ca}->{value});
2198    
2199          ## Stay in the state          ## Stay in the state
# Line 1959  sub _get_next_token ($) { Line 2303  sub _get_next_token ($) {
2303          redo A;          redo A;
2304        }        }
2305      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2306          ## XML5: "Empty tag state".
2307    
2308        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2309          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2310                        
# Line 2010  sub _get_next_token ($) { Line 2356  sub _get_next_token ($) {
2356          } else {          } else {
2357            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2358          }          }
2359            ## XML5: "Tag attribute name before state".
2360          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2361          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2362          ## Reconsume.          ## Reconsume.
# Line 2024  sub _get_next_token ($) { Line 2371  sub _get_next_token ($) {
2371          redo A;          redo A;
2372        }        }
2373      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2374        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2375    
2376        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2377        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2378                
2379        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2380                    if ($self->{in_subset}) {
2381          $self->{state} = DATA_STATE;            
2382          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2383            } else {
2384              
2385              $self->{state} = DATA_STATE;
2386              $self->{s_kwd} = '';
2387            }
2388                    
2389      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2390        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2048  sub _get_next_token ($) { Line 2400  sub _get_next_token ($) {
2400          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2401          redo A;          redo A;
2402        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2403                    if ($self->{in_subset}) {
2404          $self->{state} = DATA_STATE;            
2405          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2406            } else {
2407              
2408              $self->{state} = DATA_STATE;
2409              $self->{s_kwd} = '';
2410            }
2411          ## reconsume          ## reconsume
2412    
2413          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2077  sub _get_next_token ($) { Line 2434  sub _get_next_token ($) {
2434          redo A;          redo A;
2435        }        }
2436      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2437        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2438                
2439        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2440                    
# Line 2099  sub _get_next_token ($) { Line 2456  sub _get_next_token ($) {
2456          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2457                    
2458          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2459          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2460                    
2461      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2462        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2118  sub _get_next_token ($) { Line 2475  sub _get_next_token ($) {
2475                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2476                                                    
2477          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2478          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2479                    
2480      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2481        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2152  sub _get_next_token ($) { Line 2509  sub _get_next_token ($) {
2509                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2510                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2511                                   };                                   };
2512          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2513                    
2514      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2515        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2188  sub _get_next_token ($) { Line 2545  sub _get_next_token ($) {
2545              0x0054, # T              0x0054, # T
2546              0x0059, # Y              0x0059, # Y
2547              0x0050, # P              0x0050, # P
2548            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2549            $self->{nc} == [            $self->{nc} == [
2550              undef,              undef,
2551              0x006F, # o              0x006F, # o
# Line 2196  sub _get_next_token ($) { Line 2553  sub _get_next_token ($) {
2553              0x0074, # t              0x0074, # t
2554              0x0079, # y              0x0079, # y
2555              0x0070, # p              0x0070, # p
2556            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2557                    
2558          ## Stay in the state.          ## Stay in the state.
2559          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2560                    
2561      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2562        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2212  sub _get_next_token ($) { Line 2569  sub _get_next_token ($) {
2569      }      }
2570        
2571          redo A;          redo A;
2572        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2573                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2574                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2575                    if ($self->{is_xml} and
2576                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2577              
2578              ## XML5: case-sensitive.
2579              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2580                              text => 'DOCTYPE',
2581                              line => $self->{line_prev},
2582                              column => $self->{column_prev} - 5);
2583            } else {
2584              
2585            }
2586          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2587          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2588                                    quirks => 1,                                    quirks => 1,
# Line 2238  sub _get_next_token ($) { Line 2605  sub _get_next_token ($) {
2605                                    
2606          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2607                          line => $self->{line_prev},                          line => $self->{line_prev},
2608                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2609          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2610          ## Reconsume.          ## Reconsume.
2611          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2612                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2613                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2614                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2615                                   };                                   };
2616          redo A;          redo A;
2617        }        }
# Line 2255  sub _get_next_token ($) { Line 2622  sub _get_next_token ($) {
2622              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2623              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2624              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2625            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2626                    
2627          ## Stay in the state.          ## Stay in the state.
2628          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2629                    
2630      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2631        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2271  sub _get_next_token ($) { Line 2638  sub _get_next_token ($) {
2638      }      }
2639        
2640          redo A;          redo A;
2641        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2642                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2643          if ($self->{is_xml} and          if ($self->{is_xml} and
2644              not $self->{tainted} and              not $self->{tainted} and
# Line 2306  sub _get_next_token ($) { Line 2673  sub _get_next_token ($) {
2673                    
2674          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2675                          line => $self->{line_prev},                          line => $self->{line_prev},
2676                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2677          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2678          ## Reconsume.          ## Reconsume.
2679          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2680                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2681                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2682                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2683                                   };                                   };
2684          redo A;          redo A;
2685        }        }
# Line 2333  sub _get_next_token ($) { Line 2700  sub _get_next_token ($) {
2700        
2701          redo A;          redo A;
2702        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2703          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2704          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2705          $self->{s_kwd} = '';            
2706              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2707            } else {
2708              
2709              $self->{state} = DATA_STATE;
2710              $self->{s_kwd} = '';
2711            }
2712                    
2713      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2714        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2353  sub _get_next_token ($) { Line 2725  sub _get_next_token ($) {
2725    
2726          redo A;          redo A;
2727        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2728          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2729          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2730          $self->{s_kwd} = '';            
2731              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2732            } else {
2733              
2734              $self->{state} = DATA_STATE;
2735              $self->{s_kwd} = '';
2736            }
2737          ## reconsume          ## reconsume
2738    
2739          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2397  sub _get_next_token ($) { Line 2774  sub _get_next_token ($) {
2774        
2775          redo A;          redo A;
2776        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2777          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2778          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2779          $self->{s_kwd} = '';            
2780              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2781            } else {
2782              
2783              $self->{state} = DATA_STATE;
2784              $self->{s_kwd} = '';
2785            }
2786                    
2787      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2788        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2417  sub _get_next_token ($) { Line 2799  sub _get_next_token ($) {
2799    
2800          redo A;          redo A;
2801        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2802          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2803          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2804          $self->{s_kwd} = '';            
2805              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2806            } else {
2807              
2808              $self->{state} = DATA_STATE;
2809              $self->{s_kwd} = '';
2810            }
2811          ## reconsume          ## reconsume
2812    
2813          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2445  sub _get_next_token ($) { Line 2832  sub _get_next_token ($) {
2832          redo A;          redo A;
2833        }        }
2834      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2835          ## XML5: "Comment state" and "DOCTYPE comment state".
2836    
2837        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2838                    
2839          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2461  sub _get_next_token ($) { Line 2850  sub _get_next_token ($) {
2850        
2851          redo A;          redo A;
2852        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2853          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2854          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2855          $self->{s_kwd} = '';            
2856              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2857            } else {
2858              
2859              $self->{state} = DATA_STATE;
2860              $self->{s_kwd} = '';
2861            }
2862          ## reconsume          ## reconsume
2863    
2864          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2492  sub _get_next_token ($) { Line 2886  sub _get_next_token ($) {
2886          redo A;          redo A;
2887        }        }
2888      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2889          ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2890    
2891        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2892                    
2893          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2508  sub _get_next_token ($) { Line 2904  sub _get_next_token ($) {
2904        
2905          redo A;          redo A;
2906        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2907          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2908          $self->{s_kwd} = '';          if ($self->{in_subset}) {
2909          $self->{state} = DATA_STATE;            
2910          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2911            } else {
2912              
2913              $self->{state} = DATA_STATE;
2914              $self->{s_kwd} = '';
2915            }
2916          ## reconsume          ## reconsume
2917    
2918          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2536  sub _get_next_token ($) { Line 2936  sub _get_next_token ($) {
2936          redo A;          redo A;
2937        }        }
2938      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2939          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2940    
2941        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2942                    if ($self->{in_subset}) {
2943          $self->{state} = DATA_STATE;            
2944          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2945            } else {
2946              
2947              $self->{state} = DATA_STATE;
2948              $self->{s_kwd} = '';
2949            }
2950                    
2951      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2952        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2557  sub _get_next_token ($) { Line 2964  sub _get_next_token ($) {
2964          redo A;          redo A;
2965        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2966                    
2967            ## XML5: Not a parse error.
2968          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2969                          line => $self->{line_prev},                          line => $self->{line_prev},
2970                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2575  sub _get_next_token ($) { Line 2983  sub _get_next_token ($) {
2983        
2984          redo A;          redo A;
2985        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2986          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2987          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2988          $self->{s_kwd} = '';            
2989              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2990            } else {
2991              
2992              $self->{state} = DATA_STATE;
2993              $self->{s_kwd} = '';
2994            }
2995          ## reconsume          ## reconsume
2996    
2997          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2586  sub _get_next_token ($) { Line 2999  sub _get_next_token ($) {
2999          redo A;          redo A;
3000        } else {        } else {
3001                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',  
                         line => $self->{line_prev},  
                         column => $self->{column_prev});  
3002          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
3003          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
3004                    
# Line 2620  sub _get_next_token ($) { Line 3030  sub _get_next_token ($) {
3030      }      }
3031        
3032          redo A;          redo A;
3033          } elsif ($self->{nc} == -1) {
3034            
3035            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3036            $self->{ct}->{quirks} = 1;
3037    
3038            $self->{state} = DATA_STATE;
3039            ## Reconsume.
3040            return  ($self->{ct}); # DOCTYPE (quirks)
3041    
3042            redo A;
3043        } else {        } else {
3044                    
3045            ## XML5: Swith to the bogus comment state.
3046          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
3047          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
3048          ## reconsume          ## reconsume
3049          redo A;          redo A;
3050        }        }
3051      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
3052          ## XML5: "DOCTYPE root name before state".
3053    
3054        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3055                    
3056          ## Stay in the state          ## Stay in the state
# Line 2645  sub _get_next_token ($) { Line 3068  sub _get_next_token ($) {
3068          redo A;          redo A;
3069        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3070                    
3071            ## XML5: No parse error.
3072          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3073          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3074          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 2673  sub _get_next_token ($) { Line 3097  sub _get_next_token ($) {
3097          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3098    
3099          redo A;          redo A;
3100          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3101            
3102            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3103            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3104            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3105            $self->{in_subset} = 1;
3106            
3107        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3108          $self->{line_prev} = $self->{line};
3109          $self->{column_prev} = $self->{column};
3110          $self->{column}++;
3111          $self->{nc}
3112              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3113        } else {
3114          $self->{set_nc}->($self);
3115        }
3116      
3117            return  ($self->{ct}); # DOCTYPE
3118            redo A;
3119        } else {        } else {
3120                    
3121          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2692  sub _get_next_token ($) { Line 3135  sub _get_next_token ($) {
3135          redo A;          redo A;
3136        }        }
3137      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3138  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3139    
3140          ## ISSUE: Redundant "First," in the spec.
3141    
3142        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3143                    
3144          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2738  sub _get_next_token ($) { Line 3184  sub _get_next_token ($) {
3184          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3185    
3186          redo A;          redo A;
3187          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3188            
3189            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3190            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3191            $self->{in_subset} = 1;
3192            
3193        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3194          $self->{line_prev} = $self->{line};
3195          $self->{column_prev} = $self->{column};
3196          $self->{column}++;
3197          $self->{nc}
3198              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3199        } else {
3200          $self->{set_nc}->($self);
3201        }
3202      
3203            return  ($self->{ct}); # DOCTYPE
3204            redo A;
3205        } else {        } else {
3206                    
3207          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2757  sub _get_next_token ($) { Line 3221  sub _get_next_token ($) {
3221          redo A;          redo A;
3222        }        }
3223      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3224          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3225          ## state", but implemented differently.
3226    
3227        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3228                    
3229          ## Stay in the state          ## Stay in the state
# Line 2773  sub _get_next_token ($) { Line 3240  sub _get_next_token ($) {
3240        
3241          redo A;          redo A;
3242        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3243            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3244              
3245              $self->{state} = DATA_STATE;
3246              $self->{s_kwd} = '';
3247            } else {
3248              
3249              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3250              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3251            }
3252                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3253                    
3254      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3255        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2787  sub _get_next_token ($) { Line 3261  sub _get_next_token ($) {
3261        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3262      }      }
3263        
3264            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3265          redo A;          redo A;
3266        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3267            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3268              
3269              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3270              $self->{state} = DATA_STATE;
3271              $self->{s_kwd} = '';
3272              $self->{ct}->{quirks} = 1;
3273            } else {
3274              
3275              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3276              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3277            }
3278                    
3279          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3280          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3281          redo A;          redo A;
3282        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3283                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3284            
3285          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3286          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3287                    
3288      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3289        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2820  sub _get_next_token ($) { Line 3298  sub _get_next_token ($) {
3298          redo A;          redo A;
3299        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3300                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3301            
3302          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3303          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3304                    
3305      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3306        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2834  sub _get_next_token ($) { Line 3313  sub _get_next_token ($) {
3313      }      }
3314        
3315          redo A;          redo A;
3316        } else {        } elsif ($self->{nc} == 0x0022 and # "
3317                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3318                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3319                    
3320          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
3321          $self->{ct}->{quirks} = 1;          $self->{ct}->{value} = ''; # ENTITY
3322            
3323        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3324          $self->{line_prev} = $self->{line};
3325          $self->{column_prev} = $self->{column};
3326          $self->{column}++;
3327          $self->{nc}
3328              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3329        } else {
3330          $self->{set_nc}->($self);
3331        }
3332      
3333            redo A;
3334          } elsif ($self->{nc} == 0x0027 and # '
3335                   ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN or
3336                    $self->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
3337            
3338            $self->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
3339            $self->{ct}->{value} = ''; # ENTITY
3340            
3341        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3342          $self->{line_prev} = $self->{line};
3343          $self->{column_prev} = $self->{column};
3344          $self->{column}++;
3345          $self->{nc}
3346              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3347        } else {
3348          $self->{set_nc}->($self);
3349        }
3350      
3351            redo A;
3352          } elsif ($self->{is_xml} and
3353                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3354                   $self->{nc} == 0x005B) { # [
3355            
3356            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3357            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3358            $self->{in_subset} = 1;
3359            
3360        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3361          $self->{line_prev} = $self->{line};
3362          $self->{column_prev} = $self->{column};
3363          $self->{column}++;
3364          $self->{nc}
3365              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3366        } else {
3367          $self->{set_nc}->($self);
3368        }
3369      
3370            return  ($self->{ct}); # DOCTYPE
3371            redo A;
3372          } else {
3373            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3374    
3375            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3376              
3377              $self->{ct}->{quirks} = 1;
3378              $self->{state} = BOGUS_DOCTYPE_STATE;
3379            } else {
3380              
3381              $self->{state} = BOGUS_MD_STATE;
3382            }
3383    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3384                    
3385      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3386        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2861  sub _get_next_token ($) { Line 3402  sub _get_next_token ($) {
3402              0x0042, # B              0x0042, # B
3403              0x004C, # L              0x004C, # L
3404              0x0049, # I              0x0049, # I
3405            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3406            $self->{nc} == [            $self->{nc} == [
3407              undef,              undef,
3408              0x0075, # u              0x0075, # u
3409              0x0062, # b              0x0062, # b
3410              0x006C, # l              0x006C, # l
3411              0x0069, # i              0x0069, # i
3412            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3413                    
3414          ## Stay in the state.          ## Stay in the state.
3415          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3416                    
3417      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3418        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2884  sub _get_next_token ($) { Line 3425  sub _get_next_token ($) {
3425      }      }
3426        
3427          redo A;          redo A;
3428        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3429                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3430                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3431                    if ($self->{is_xml} and
3432                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3433              
3434              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3435                              text => 'PUBLIC',
3436                              line => $self->{line_prev},
3437                              column => $self->{column_prev} - 4);
3438            } else {
3439              
3440            }
3441          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3442                    
3443      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2902  sub _get_next_token ($) { Line 3452  sub _get_next_token ($) {
3452        
3453          redo A;          redo A;
3454        } else {        } else {
3455                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3456                          line => $self->{line_prev},                          line => $self->{line_prev},
3457                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3458          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3459              
3460          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3461              $self->{state} = BOGUS_DOCTYPE_STATE;
3462            } else {
3463              
3464              $self->{state} = BOGUS_MD_STATE;
3465            }
3466          ## Reconsume.          ## Reconsume.
3467          redo A;          redo A;
3468        }        }
# Line 2920  sub _get_next_token ($) { Line 3474  sub _get_next_token ($) {
3474              0x0053, # S              0x0053, # S
3475              0x0054, # T              0x0054, # T
3476              0x0045, # E              0x0045, # E
3477            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3478            $self->{nc} == [            $self->{nc} == [
3479              undef,              undef,
3480              0x0079, # y              0x0079, # y
3481              0x0073, # s              0x0073, # s
3482              0x0074, # t              0x0074, # t
3483              0x0065, # e              0x0065, # e
3484            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3485                    
3486          ## Stay in the state.          ## Stay in the state.
3487          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3488                    
3489      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3490        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2943  sub _get_next_token ($) { Line 3497  sub _get_next_token ($) {
3497      }      }
3498        
3499          redo A;          redo A;
3500        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3501                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3502                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3503                    if ($self->{is_xml} and
3504                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3505              
3506              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3507                              text => 'SYSTEM',
3508                              line => $self->{line_prev},
3509                              column => $self->{column_prev} - 4);
3510            } else {
3511              
3512            }
3513          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3514                    
3515      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2961  sub _get_next_token ($) { Line 3524  sub _get_next_token ($) {
3524        
3525          redo A;          redo A;
3526        } else {        } else {
3527                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3528                          line => $self->{line_prev},                          line => $self->{line_prev},
3529                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3530          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3531              
3532          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3533              $self->{state} = BOGUS_DOCTYPE_STATE;
3534            } else {
3535              
3536              $self->{state} = BOGUS_MD_STATE;
3537            }
3538          ## Reconsume.          ## Reconsume.
3539          redo A;          redo A;
3540        }        }
# Line 3020  sub _get_next_token ($) { Line 3587  sub _get_next_token ($) {
3587        
3588          redo A;          redo A;
3589        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3590          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3591            
3592          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3593          $self->{s_kwd} = '';            
3594              $self->{state} = DATA_STATE;
3595              $self->{s_kwd} = '';
3596              $self->{ct}->{quirks} = 1;
3597            } else {
3598              
3599              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3600            }
3601            
3602                    
3603      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3604        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3036  sub _get_next_token ($) { Line 3610  sub _get_next_token ($) {
3610        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3611      }      }
3612        
3613            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3614          redo A;          redo A;
3615        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3616            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3617              
3618              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3619              $self->{state} = DATA_STATE;
3620              $self->{s_kwd} = '';
3621              $self->{ct}->{quirks} = 1;
3622            } else {
3623              
3624              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3625              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3626            }
3627                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3628          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3629          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3630          redo A;          redo A;
3631        } else {        } elsif ($self->{is_xml} and
3632                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3633                   $self->{nc} == 0x005B) { # [
3634                    
3635            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3636            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3637            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3638            $self->{in_subset} = 1;
3639            
3640        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3641          $self->{line_prev} = $self->{line};
3642          $self->{column_prev} = $self->{column};
3643          $self->{column}++;
3644          $self->{nc}
3645              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3646        } else {
3647          $self->{set_nc}->($self);
3648        }
3649      
3650            return  ($self->{ct}); # DOCTYPE
3651            redo A;
3652          } else {
3653          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3654    
3655          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3656              
3657              $self->{ct}->{quirks} = 1;
3658              $self->{state} = BOGUS_DOCTYPE_STATE;
3659            } else {
3660              
3661              $self->{state} = BOGUS_MD_STATE;
3662            }
3663    
3664                    
3665      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3666        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3089  sub _get_next_token ($) { Line 3691  sub _get_next_token ($) {
3691        
3692          redo A;          redo A;
3693        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3694          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3695    
3696          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3697          $self->{s_kwd} = '';            
3698              $self->{state} = DATA_STATE;
3699              $self->{s_kwd} = '';
3700              $self->{ct}->{quirks} = 1;
3701            } else {
3702              
3703              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3704            }
3705    
3706                    
3707      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3708        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3105  sub _get_next_token ($) { Line 3714  sub _get_next_token ($) {
3714        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3715      }      }
3716        
3717            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3718          redo A;          redo A;
3719        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3720          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3721    
3722          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3723          $self->{s_kwd} = '';            
3724          ## reconsume            $self->{state} = DATA_STATE;
3725              $self->{s_kwd} = '';
3726          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3727            } else {
3728              
3729              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3730            }
3731            
3732            ## Reconsume.
3733          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3734          redo A;          redo A;
3735        } else {        } else {
3736                    
3737          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3738          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3739                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3740    
# Line 3160  sub _get_next_token ($) { Line 3769  sub _get_next_token ($) {
3769        
3770          redo A;          redo A;
3771        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3772          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3773    
3774          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3775          $self->{s_kwd} = '';            
3776              $self->{state} = DATA_STATE;
3777              $self->{s_kwd} = '';
3778              $self->{ct}->{quirks} = 1;
3779            } else {
3780              
3781              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3782            }
3783    
3784                    
3785      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3786        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3176  sub _get_next_token ($) { Line 3792  sub _get_next_token ($) {
3792        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3793      }      }
3794        
3795            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3796          redo A;          redo A;
3797        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3798          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3799    
3800          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3801          $self->{s_kwd} = '';            
3802              $self->{state} = DATA_STATE;
3803              $self->{s_kwd} = '';
3804              $self->{ct}->{quirks} = 1;
3805            } else {
3806              
3807              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3808            }
3809          
3810          ## reconsume          ## reconsume
3811            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3812          redo A;          redo A;
3813        } else {        } else {
3814                    
3815          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3816          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3817                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3818    
# Line 3232  sub _get_next_token ($) { Line 3848  sub _get_next_token ($) {
3848          redo A;          redo A;
3849        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3850                    
3851          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3852          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3853                    
3854      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3248  sub _get_next_token ($) { Line 3864  sub _get_next_token ($) {
3864          redo A;          redo A;
3865        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3866                    
3867          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3868          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3869                    
3870      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3263  sub _get_next_token ($) { Line 3879  sub _get_next_token ($) {
3879        
3880          redo A;          redo A;
3881        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3882            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3883              if ($self->{is_xml}) {
3884                
3885                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3886              } else {
3887                
3888              }
3889              $self->{state} = DATA_STATE;
3890              $self->{s_kwd} = '';
3891            } else {
3892              if ($self->{ct}->{type} == NOTATION_TOKEN) {
3893                
3894              } else {
3895                
3896                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3897              }
3898              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3899            }
3900                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3901                    
3902      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3903        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3277  sub _get_next_token ($) { Line 3909  sub _get_next_token ($) {
3909        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3910      }      }
3911        
3912            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3913          redo A;          redo A;
3914        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3915            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3916              
3917              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3918              
3919              $self->{state} = DATA_STATE;
3920              $self->{s_kwd} = '';
3921              $self->{ct}->{quirks} = 1;
3922            } else {
3923              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3924              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3925            }
3926                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3927          ## reconsume          ## reconsume
3928            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3929          $self->{ct}->{quirks} = 1;          redo A;
3930          } elsif ($self->{is_xml} and
3931                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3932                   $self->{nc} == 0x005B) { # [
3933            
3934            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3935            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3936            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3937            $self->{in_subset} = 1;
3938            
3939        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3940          $self->{line_prev} = $self->{line};
3941          $self->{column_prev} = $self->{column};
3942          $self->{column}++;
3943          $self->{nc}
3944              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3945        } else {
3946          $self->{set_nc}->($self);
3947        }
3948      
3949          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3950          redo A;          redo A;
3951        } else {        } else {
           
3952          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3953    
3954          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3955              
3956              $self->{ct}->{quirks} = 1;
3957              $self->{state} = BOGUS_DOCTYPE_STATE;
3958            } else {
3959              
3960              $self->{state} = BOGUS_MD_STATE;
3961            }
3962    
3963                    
3964      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3965        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3361  sub _get_next_token ($) { Line 4022  sub _get_next_token ($) {
4022        
4023          redo A;          redo A;
4024        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
4025          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4026                    
4027      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4028        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3377  sub _get_next_token ($) { Line 4035  sub _get_next_token ($) {
4035      }      }
4036        
4037    
4038          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4039          return  ($self->{ct}); # DOCTYPE            
4040              $self->{state} = DATA_STATE;
4041              $self->{s_kwd} = '';
4042              $self->{ct}->{quirks} = 1;
4043            } else {
4044              
4045              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4046            }
4047    
4048            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4049          redo A;          redo A;
4050        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4051            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4052              
4053              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4054              $self->{state} = DATA_STATE;
4055              $self->{s_kwd} = '';
4056              $self->{ct}->{quirks} = 1;
4057            } else {
4058              
4059              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4060              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4061            }
4062                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
4063          ## reconsume          ## reconsume
4064            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4065            redo A;
4066          } elsif ($self->{is_xml} and
4067                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4068                   $self->{nc} == 0x005B) { # [
4069            
4070            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
4071    
4072          $self->{ct}->{quirks} = 1;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4073            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4074            $self->{in_subset} = 1;
4075            
4076        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4077          $self->{line_prev} = $self->{line};
4078          $self->{column_prev} = $self->{column};
4079          $self->{column}++;
4080          $self->{nc}
4081              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4082        } else {
4083          $self->{set_nc}->($self);
4084        }
4085      
4086          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4087          redo A;          redo A;
4088        } else {        } else {
           
4089          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
4090    
4091          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4092                        
4093              $self->{ct}->{quirks} = 1;
4094              $self->{state} = BOGUS_DOCTYPE_STATE;
4095            } else {
4096              
4097              $self->{state} = BOGUS_MD_STATE;
4098            }
4099    
4100                    
4101      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4102        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3428  sub _get_next_token ($) { Line 4126  sub _get_next_token ($) {
4126      }      }
4127        
4128          redo A;          redo A;
4129        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4130          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4131    
4132          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4133          $self->{s_kwd} = '';            
4134              $self->{state} = DATA_STATE;
4135              $self->{s_kwd} = '';
4136              $self->{ct}->{quirks} = 1;
4137            } else {
4138              
4139              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4140            }
4141            
4142                    
4143      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4144        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3445  sub _get_next_token ($) { Line 4150  sub _get_next_token ($) {
4150        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4151      }      }
4152        
4153            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4154          redo A;          redo A;
4155        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4156          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4157    
4158          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4159          $self->{s_kwd} = '';            
4160              $self->{state} = DATA_STATE;
4161              $self->{s_kwd} = '';
4162              $self->{ct}->{quirks} = 1;
4163            } else {
4164              
4165              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4166            }
4167            
4168          ## reconsume          ## reconsume
4169            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4170          redo A;          redo A;
4171        } else {        } else {
4172                    
4173          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4174          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4175                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4176    
# Line 3499  sub _get_next_token ($) { Line 4204  sub _get_next_token ($) {
4204      }      }
4205        
4206          redo A;          redo A;
4207        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4208                    
4209          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4210    
# Line 3522  sub _get_next_token ($) { Line 4227  sub _get_next_token ($) {
4227    
4228          redo A;          redo A;
4229        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4230          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4231    
4232          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4233          $self->{s_kwd} = '';            
4234          ## reconsume            $self->{state} = DATA_STATE;
4235              $self->{s_kwd} = '';
4236          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4237          return  ($self->{ct}); # DOCTYPE          } else {
4238              
4239              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4240            }
4241    
4242            ## reconsume
4243            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4244          redo A;          redo A;
4245        } else {        } else {
4246                    
4247          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4248          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4249                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4250    
# Line 3556  sub _get_next_token ($) { Line 4264  sub _get_next_token ($) {
4264        }        }
4265      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
4266        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
4267                    if ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
4268          ## Stay in the state            
4269              $self->{state} = BEFORE_NDATA_STATE;
4270            } else {
4271              
4272              ## Stay in the state
4273            }
4274                    
4275      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4276        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3571  sub _get_next_token ($) { Line 4284  sub _get_next_token ($) {
4284        
4285          redo A;          redo A;
4286        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4287            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4288              
4289              $self->{state} = DATA_STATE;
4290              $self->{s_kwd} = '';
4291            } else {
4292              
4293              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4294            }
4295    
4296                    
4297          $self->{state} = DATA_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4298          $self->{s_kwd} = '';        $self->{line_prev} = $self->{line};
4299          $self->{column_prev} = $self->{column};
4300          $self->{column}++;
4301          $self->{nc}
4302              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4303        } else {
4304          $self->{set_nc}->($self);
4305        }
4306      
4307            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4308            redo A;
4309          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
4310                   ($self->{nc} == 0x004E or # N
4311                    $self->{nc} == 0x006E)) { # n
4312            
4313            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before NDATA'); ## TODO: type
4314            $self->{state} = NDATA_STATE;
4315            $self->{kwd} = chr $self->{nc};
4316                    
4317      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4318        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3585  sub _get_next_token ($) { Line 4324  sub _get_next_token ($) {
4324        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4325      }      }
4326        
4327            redo A;
4328          } elsif ($self->{nc} == -1) {
4329            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4330              
4331              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4332              $self->{state} = DATA_STATE;
4333              $self->{s_kwd} = '';
4334              $self->{ct}->{quirks} = 1;
4335            } else {
4336              
4337              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4338              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4339            }
4340    
4341            ## reconsume
4342            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4343            redo A;
4344          } elsif ($self->{is_xml} and
4345                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4346                   $self->{nc} == 0x005B) { # [
4347            
4348            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4349            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4350            $self->{in_subset} = 1;
4351            
4352        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4353          $self->{line_prev} = $self->{line};
4354          $self->{column_prev} = $self->{column};
4355          $self->{column}++;
4356          $self->{nc}
4357              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4358        } else {
4359          $self->{set_nc}->($self);
4360        }
4361      
4362          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4363            redo A;
4364          } else {
4365            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4366    
4367            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4368              
4369              #$self->{ct}->{quirks} = 1;
4370              $self->{state} = BOGUS_DOCTYPE_STATE;
4371            } else {
4372              
4373              $self->{state} = BOGUS_MD_STATE;
4374            }
4375    
4376            
4377        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4378          $self->{line_prev} = $self->{line};
4379          $self->{column_prev} = $self->{column};
4380          $self->{column}++;
4381          $self->{nc}
4382              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4383        } else {
4384          $self->{set_nc}->($self);
4385        }
4386      
4387            redo A;
4388          }
4389        } elsif ($self->{state} == BEFORE_NDATA_STATE) {
4390          if ($is_space->{$self->{nc}}) {
4391            
4392            ## Stay in the state.
4393            
4394        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4395          $self->{line_prev} = $self->{line};
4396          $self->{column_prev} = $self->{column};
4397          $self->{column}++;
4398          $self->{nc}
4399              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4400        } else {
4401          $self->{set_nc}->($self);
4402        }
4403      
4404            redo A;
4405          } elsif ($self->{nc} == 0x003E) { # >
4406            
4407            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4408            
4409        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4410          $self->{line_prev} = $self->{line};
4411          $self->{column_prev} = $self->{column};
4412          $self->{column}++;
4413          $self->{nc}
4414              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4415        } else {
4416          $self->{set_nc}->($self);
4417        }
4418      
4419            return  ($self->{ct}); # ENTITY
4420            redo A;
4421          } elsif ($self->{nc} == 0x004E or # N
4422                   $self->{nc} == 0x006E) { # n
4423            
4424            $self->{state} = NDATA_STATE;
4425            $self->{kwd} = chr $self->{nc};
4426            
4427        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4428          $self->{line_prev} = $self->{line};
4429          $self->{column_prev} = $self->{column};
4430          $self->{column}++;
4431          $self->{nc}
4432              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4433        } else {
4434          $self->{set_nc}->($self);
4435        }
4436      
4437          redo A;          redo A;
4438        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4439                    
4440          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4441          $self->{state} = DATA_STATE;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
         $self->{s_kwd} = '';  
4442          ## reconsume          ## reconsume
4443            return  ($self->{ct}); # ENTITY
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4444          redo A;          redo A;
4445        } else {        } else {
4446                    
4447          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
4448          #$self->{ct}->{quirks} = 1;          $self->{state} = BOGUS_MD_STATE;
   
         $self->{state} = BOGUS_DOCTYPE_STATE;  
4449                    
4450      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4451        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3639  sub _get_next_token ($) { Line 4479  sub _get_next_token ($) {
4479          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4480    
4481          redo A;          redo A;
4482          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4483            
4484            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4485            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4486            $self->{in_subset} = 1;
4487            
4488        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4489          $self->{line_prev} = $self->{line};
4490          $self->{column_prev} = $self->{column};
4491          $self->{column}++;
4492          $self->{nc}
4493              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4494        } else {
4495          $self->{set_nc}->($self);
4496        }
4497      
4498            return  ($self->{ct}); # DOCTYPE
4499            redo A;
4500        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4501                    
4502          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3651  sub _get_next_token ($) { Line 4509  sub _get_next_token ($) {
4509        } else {        } else {
4510                    
4511          my $s = '';          my $s = '';
4512          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4513    
4514          ## Stay in the state          ## Stay in the state
4515                    
# Line 3671  sub _get_next_token ($) { Line 4529  sub _get_next_token ($) {
4529        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
4530        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
4531        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
4532    
4533          ## XML5: "CDATA state".
4534                
4535        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4536                    
# Line 3697  sub _get_next_token ($) { Line 4557  sub _get_next_token ($) {
4557    
4558          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4559          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4560                    ## Reconsume.
     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {  
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
4561          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
4562                        
4563            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3740  sub _get_next_token ($) { Line 4590  sub _get_next_token ($) {
4590    
4591        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
4592      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
4593          ## XML5: "CDATA bracket state".
4594    
4595        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4596                    
4597          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3757  sub _get_next_token ($) { Line 4609  sub _get_next_token ($) {
4609          redo A;          redo A;
4610        } else {        } else {
4611                    
4612            ## XML5: If EOF, "]" is not appended and changed to the data state.
4613          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
4614          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
4615          ## Reconsume.          ## Reconsume.
4616          redo A;          redo A;
4617        }        }
4618      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
4619          ## XML5: "CDATA end state".
4620    
4621        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4622          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4623          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 3805  sub _get_next_token ($) { Line 4660  sub _get_next_token ($) {
4660                    
4661          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
4662          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
4663          ## Reconsume.          ## Reconsume. ## XML5: Emit.
4664          redo A;          redo A;
4665        }        }
4666      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3814  sub _get_next_token ($) { Line 4669  sub _get_next_token ($) {
4669              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
4670              $self->{entity_add} => 1,              $self->{entity_add} => 1,
4671            }->{$self->{nc}}) {            }->{$self->{nc}}) {
4672                    if ($self->{is_xml}) {
4673              
4674              $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4675                              line => $self->{line_prev},
4676                              column => $self->{column_prev}
4677                                  + ($self->{nc} == -1 ? 1 : 0));
4678            } else {
4679              
4680              ## No error
4681            }
4682          ## Don't consume          ## Don't consume
         ## No error  
4683          ## Return nothing.          ## Return nothing.
4684          #          #
4685        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4686                    
4687          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4688          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4689                    
4690      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4691        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3835  sub _get_next_token ($) { Line 4698  sub _get_next_token ($) {
4698      }      }
4699        
4700          redo A;          redo A;
4701        } elsif ((0x0041 <= $self->{nc} and        } elsif ($self->{is_xml} or
4702                   (0x0041 <= $self->{nc} and
4703                  $self->{nc} <= 0x005A) or # A..Z                  $self->{nc} <= 0x005A) or # A..Z
4704                 (0x0061 <= $self->{nc} and                 (0x0061 <= $self->{nc} and
4705                  $self->{nc} <= 0x007A)) { # a..z                  $self->{nc} <= 0x007A)) { # a..z
4706                    
4707          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4708          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4709          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4710          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4711          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4712                    
4713      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3889  sub _get_next_token ($) { Line 4753  sub _get_next_token ($) {
4753          redo A;          redo A;
4754        }        }
4755      } elsif ($self->{state} == ENTITY_HASH_STATE) {      } elsif ($self->{state} == ENTITY_HASH_STATE) {
4756        if ($self->{nc} == 0x0078 or # x        if ($self->{nc} == 0x0078) { # x
4757            $self->{nc} == 0x0058) { # X          
4758            $self->{state} = HEXREF_X_STATE;
4759            $self->{kwd} .= chr $self->{nc};
4760            
4761        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4762          $self->{line_prev} = $self->{line};
4763          $self->{column_prev} = $self->{column};
4764          $self->{column}++;
4765          $self->{nc}
4766              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4767        } else {
4768          $self->{set_nc}->($self);
4769        }
4770      
4771            redo A;
4772          } elsif ($self->{nc} == 0x0058) { # X
4773                    
4774            if ($self->{is_xml}) {
4775              $self->{parse_error}->(level => $self->{level}->{must}, type => 'uppercase hcro'); ## TODO: type
4776            }
4777          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4778          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4779                    
4780      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4781        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3910  sub _get_next_token ($) { Line 4792  sub _get_next_token ($) {
4792                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4793                    
4794          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4795          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4796                    
4797      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4798        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3956  sub _get_next_token ($) { Line 4838  sub _get_next_token ($) {
4838        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4839            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4840                    
4841          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4842          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4843                    
4844          ## Stay in the state.          ## Stay in the state.
4845                    
# Line 3993  sub _get_next_token ($) { Line 4875  sub _get_next_token ($) {
4875          #          #
4876        }        }
4877    
4878        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4879        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4880        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4881        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
4882              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
4883              ($self->{is_xml} and $code == 0x0000)) {
4884                    
4885          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
4886                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4036  sub _get_next_token ($) { Line 4920  sub _get_next_token ($) {
4920          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4921                    
4922          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4923          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4924          ## Reconsume.          ## Reconsume.
4925          redo A;          redo A;
4926        } else {        } else {
# Line 4054  sub _get_next_token ($) { Line 4938  sub _get_next_token ($) {
4938            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4939            ## Reconsume.            ## Reconsume.
4940            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4941                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4942                      line => $self->{line_prev},                      line => $self->{line_prev},
4943                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4944                     });                     });
4945            redo A;            redo A;
4946          } else {          } else {
4947                        
4948            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4949            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4950            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4951            ## Reconsume.            ## Reconsume.
# Line 4072  sub _get_next_token ($) { Line 4956  sub _get_next_token ($) {
4956        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4957          # 0..9          # 0..9
4958                    
4959          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4960          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4961          ## Stay in the state.          ## Stay in the state.
4962                    
4963      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4090  sub _get_next_token ($) { Line 4974  sub _get_next_token ($) {
4974        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4975                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4976                    
4977          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4978          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4979          ## Stay in the state.          ## Stay in the state.
4980                    
4981      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4108  sub _get_next_token ($) { Line 4992  sub _get_next_token ($) {
4992        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4993                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4994                    
4995          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4996          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4997          ## Stay in the state.          ## Stay in the state.
4998                    
4999      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4146  sub _get_next_token ($) { Line 5030  sub _get_next_token ($) {
5030          #          #
5031        }        }
5032    
5033        my $code = $self->{s_kwd};        my $code = $self->{kwd};
5034        my $l = $self->{line_prev};        my $l = $self->{line_prev};
5035        my $c = $self->{column_prev};        my $c = $self->{column_prev};
5036        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
5037              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
5038              ($self->{is_xml} and $code == 0x0000)) {
5039                    
5040          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'invalid character reference',
5041                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 4183  sub _get_next_token ($) { Line 5069  sub _get_next_token ($) {
5069          redo A;          redo A;
5070        }        }
5071      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
5072        if (length $self->{s_kwd} < 30 and        if ((0x0041 <= $self->{nc} and # a
5073            ## NOTE: Some number greater than the maximum length of entity name             $self->{nc} <= 0x005A) or # x
5074            ((0x0041 <= $self->{nc} and # a            (0x0061 <= $self->{nc} and # a
5075              $self->{nc} <= 0x005A) or # x             $self->{nc} <= 0x007A) or # z
5076             (0x0061 <= $self->{nc} and # a            (0x0030 <= $self->{nc} and # 0
5077              $self->{nc} <= 0x007A) or # z             $self->{nc} <= 0x0039) or # 9
5078             (0x0030 <= $self->{nc} and # 0            $self->{nc} == 0x003B or # ;
5079              $self->{nc} <= 0x0039) or # 9            ($self->{is_xml} and
5080             $self->{nc} == 0x003B)) { # ;             not ($is_space->{$self->{nc}} or
5081                    {
5082                      0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
5083                      $self->{entity_add} => 1,
5084                    }->{$self->{nc}}))) {
5085          our $EntityChar;          our $EntityChar;
5086          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
5087          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}} or
5088                $self->{ge}->{$self->{kwd}}) {
5089            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
5090                            if (defined $self->{ge}->{$self->{kwd}}) {
5091              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};                if ($self->{ge}->{$self->{kwd}}->{only_text}) {
5092                    
5093                    $self->{entity__value} = $self->{ge}->{$self->{kwd}}->{value};
5094                  } else {
5095                    if (defined $self->{ge}->{$self->{kwd}}->{notation}) {
5096                      
5097                      $self->{parse_error}->(level => $self->{level}->{must}, type => 'unparsed entity', ## TODO: type
5098                                      value => $self->{kwd});
5099                    } else {
5100                      
5101                    }
5102                    $self->{entity__value} = '&' . $self->{kwd}; ## TODO: expand
5103                  }
5104                } else {
5105                  if ($self->{is_xml}) {
5106                    
5107                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'entity not declared', ## TODO: type
5108                                    value => $self->{kwd},
5109                                    level => {
5110                                              'amp;' => $self->{level}->{warn},
5111                                              'quot;' => $self->{level}->{warn},
5112                                              'lt;' => $self->{level}->{warn},
5113                                              'gt;' => $self->{level}->{warn},
5114                                              'apos;' => $self->{level}->{warn},
5115                                             }->{$self->{kwd}} ||
5116                                             $self->{level}->{must});
5117                  } else {
5118                    
5119                  }
5120                  $self->{entity__value} = $EntityChar->{$self->{kwd}};
5121                }
5122              $self->{entity__match} = 1;              $self->{entity__match} = 1;
5123                            
5124      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4213  sub _get_next_token ($) { Line 5134  sub _get_next_token ($) {
5134              #              #
5135            } else {            } else {
5136                            
5137              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
5138              $self->{entity__match} = -1;              $self->{entity__match} = -1;
5139              ## Stay in the state.              ## Stay in the state.
5140                            
# Line 4261  sub _get_next_token ($) { Line 5182  sub _get_next_token ($) {
5182          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
5183              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
5184                        
5185            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
5186            #            #
5187          } else {          } else {
5188                        
# Line 4273  sub _get_next_token ($) { Line 5194  sub _get_next_token ($) {
5194                    
5195          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
5196                          line => $self->{line_prev},                          line => $self->{line_prev},
5197                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
5198          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
5199          #          #
5200        }        }
5201        
# Line 4297  sub _get_next_token ($) { Line 5218  sub _get_next_token ($) {
5218                    data => $data,                    data => $data,
5219                    has_reference => $has_ref,                    has_reference => $has_ref,
5220                    line => $self->{line_prev},                    line => $self->{line_prev},
5221                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
5222                   });                   });
5223          redo A;          redo A;
5224        } else {        } else {
# Line 4313  sub _get_next_token ($) { Line 5234  sub _get_next_token ($) {
5234      ## XML-only states      ## XML-only states
5235    
5236      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
5237          ## XML5: "Pi state" and "DOCTYPE pi state".
5238    
5239        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
5240            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
5241            $self->{nc} == -1) {            $self->{nc} == -1) {
5242            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
5243            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
5244            ## "DOCTYPE pi state": Parse error, switch to the "data
5245            ## state".
5246          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
5247                          line => $self->{line_prev},                          line => $self->{line_prev},
5248                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 4330  sub _get_next_token ($) { Line 5257  sub _get_next_token ($) {
5257                        };                        };
5258          redo A;          redo A;
5259        } else {        } else {
5260            ## XML5: "DOCTYPE pi state": Stay in the state.
5261          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
5262                         target => chr $self->{nc},                         target => chr $self->{nc},
5263                         data => '',                         data => '',
# Line 4367  sub _get_next_token ($) { Line 5295  sub _get_next_token ($) {
5295          redo A;          redo A;
5296        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5297          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5298          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5299          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5300            } else {
5301              $self->{state} = DATA_STATE;
5302              $self->{s_kwd} = '';
5303            }
5304          ## Reconsume.          ## Reconsume.
5305          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5306          redo A;          redo A;
# Line 4439  sub _get_next_token ($) { Line 5371  sub _get_next_token ($) {
5371          redo A;          redo A;
5372        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5373          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5374          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5375          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5376            } else {
5377              $self->{state} = DATA_STATE;
5378              $self->{s_kwd} = '';
5379            }
5380          ## Reprocess.          ## Reprocess.
5381          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5382          redo A;          redo A;
# Line 4464  sub _get_next_token ($) { Line 5400  sub _get_next_token ($) {
5400          redo A;          redo A;
5401        }        }
5402      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
5403          ## XML5: Part of "Pi after state".
5404    
5405        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5406          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5407          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5408            } else {
5409              $self->{state} = DATA_STATE;
5410              $self->{s_kwd} = '';
5411            }
5412                    
5413      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5414        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4509  sub _get_next_token ($) { Line 5451  sub _get_next_token ($) {
5451          redo A;          redo A;
5452        }        }
5453      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5454        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5455    
5456        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5457          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5458          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5459            } else {
5460              $self->{state} = DATA_STATE;
5461              $self->{s_kwd} = '';
5462            }
5463                    
5464      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5465        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4547  sub _get_next_token ($) { Line 5494  sub _get_next_token ($) {
5494          ## Reprocess.          ## Reprocess.
5495          redo A;          redo A;
5496        }        }
5497    
5498        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5499          if ($self->{nc} == 0x003C) { # <
5500            $self->{state} = DOCTYPE_TAG_STATE;
5501            
5502        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5503          $self->{line_prev} = $self->{line};
5504          $self->{column_prev} = $self->{column};
5505          $self->{column}++;
5506          $self->{nc}
5507              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5508        } else {
5509          $self->{set_nc}->($self);
5510        }
5511      
5512            redo A;
5513          } elsif ($self->{nc} == 0x0025) { # %
5514            ## XML5: Not defined yet.
5515    
5516            ## TODO:
5517    
5518            if (not $self->{stop_processing} and
5519                not $self->{document}->xml_standalone) {
5520              $self->{parse_error}->(level => $self->{level}->{must}, type => 'stop processing', ## TODO: type
5521                              level => $self->{level}->{info});
5522              $self->{stop_processing} = 1;
5523            }
5524    
5525            
5526        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5527          $self->{line_prev} = $self->{line};
5528          $self->{column_prev} = $self->{column};
5529          $self->{column}++;
5530          $self->{nc}
5531              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5532        } else {
5533          $self->{set_nc}->($self);
5534        }
5535      
5536            redo A;
5537          } elsif ($self->{nc} == 0x005D) { # ]
5538            delete $self->{in_subset};
5539            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5540            
5541        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5542          $self->{line_prev} = $self->{line};
5543          $self->{column_prev} = $self->{column};
5544          $self->{column}++;
5545          $self->{nc}
5546              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5547        } else {
5548          $self->{set_nc}->($self);
5549        }
5550      
5551            redo A;
5552          } elsif ($is_space->{$self->{nc}}) {
5553            ## Stay in the state.
5554            
5555        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5556          $self->{line_prev} = $self->{line};
5557          $self->{column_prev} = $self->{column};
5558          $self->{column}++;
5559          $self->{nc}
5560              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5561        } else {
5562          $self->{set_nc}->($self);
5563        }
5564      
5565            redo A;
5566          } elsif ($self->{nc} == -1) {
5567            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5568            delete $self->{in_subset};
5569            $self->{state} = DATA_STATE;
5570            $self->{s_kwd} = '';
5571            ## Reconsume.
5572            return  ({type => END_OF_DOCTYPE_TOKEN});
5573            redo A;
5574          } else {
5575            unless ($self->{internal_subset_tainted}) {
5576              ## XML5: No parse error.
5577              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5578              $self->{internal_subset_tainted} = 1;
5579            }
5580            ## Stay in the state.
5581            
5582        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5583          $self->{line_prev} = $self->{line};
5584          $self->{column_prev} = $self->{column};
5585          $self->{column}++;
5586          $self->{nc}
5587              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5588        } else {
5589          $self->{set_nc}->($self);
5590        }
5591      
5592            redo A;
5593          }
5594        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5595          if ($self->{nc} == 0x003E) { # >
5596            $self->{state} = DATA_STATE;
5597            $self->{s_kwd} = '';
5598            
5599        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5600          $self->{line_prev} = $self->{line};
5601          $self->{column_prev} = $self->{column};
5602          $self->{column}++;
5603          $self->{nc}
5604              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5605        } else {
5606          $self->{set_nc}->($self);
5607        }
5608      
5609            return  ({type => END_OF_DOCTYPE_TOKEN});
5610            redo A;
5611          } elsif ($self->{nc} == -1) {
5612            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5613            $self->{state} = DATA_STATE;
5614            $self->{s_kwd} = '';
5615            ## Reconsume.
5616            return  ({type => END_OF_DOCTYPE_TOKEN});
5617            redo A;
5618          } else {
5619            ## XML5: No parse error and stay in the state.
5620            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5621    
5622            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5623            
5624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5625          $self->{line_prev} = $self->{line};
5626          $self->{column_prev} = $self->{column};
5627          $self->{column}++;
5628          $self->{nc}
5629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5630        } else {
5631          $self->{set_nc}->($self);
5632        }
5633      
5634            redo A;
5635          }
5636        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5637          if ($self->{nc} == 0x003E) { # >
5638            $self->{state} = DATA_STATE;
5639            $self->{s_kwd} = '';
5640            
5641        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5642          $self->{line_prev} = $self->{line};
5643          $self->{column_prev} = $self->{column};
5644          $self->{column}++;
5645          $self->{nc}
5646              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5647        } else {
5648          $self->{set_nc}->($self);
5649        }
5650      
5651            return  ({type => END_OF_DOCTYPE_TOKEN});
5652            redo A;
5653          } elsif ($self->{nc} == -1) {
5654            $self->{state} = DATA_STATE;
5655            $self->{s_kwd} = '';
5656            ## Reconsume.
5657            return  ({type => END_OF_DOCTYPE_TOKEN});
5658            redo A;
5659          } else {
5660            ## Stay in the state.
5661            
5662        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5663          $self->{line_prev} = $self->{line};
5664          $self->{column_prev} = $self->{column};
5665          $self->{column}++;
5666          $self->{nc}
5667              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5668        } else {
5669          $self->{set_nc}->($self);
5670        }
5671      
5672            redo A;
5673          }
5674        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5675          if ($self->{nc} == 0x0021) { # !
5676            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5677            
5678        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5679          $self->{line_prev} = $self->{line};
5680          $self->{column_prev} = $self->{column};
5681          $self->{column}++;
5682          $self->{nc}
5683              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5684        } else {
5685          $self->{set_nc}->($self);
5686        }
5687      
5688            redo A;
5689          } elsif ($self->{nc} == 0x003F) { # ?
5690            $self->{state} = PI_STATE;
5691            
5692        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5693          $self->{line_prev} = $self->{line};
5694          $self->{column_prev} = $self->{column};
5695          $self->{column}++;
5696          $self->{nc}
5697              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5698        } else {
5699          $self->{set_nc}->($self);
5700        }
5701      
5702            redo A;
5703          } elsif ($self->{nc} == -1) {
5704            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5705            $self->{state} = DATA_STATE;
5706            $self->{s_kwd} = '';
5707            ## Reconsume.
5708            redo A;
5709          } else {
5710            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5711                            line => $self->{line_prev},
5712                            column => $self->{column_prev});
5713            $self->{state} = BOGUS_COMMENT_STATE;
5714            $self->{ct} = {type => COMMENT_TOKEN,
5715                           data => '',
5716                          }; ## NOTE: Will be discarded.
5717            
5718        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5719          $self->{line_prev} = $self->{line};
5720          $self->{column_prev} = $self->{column};
5721          $self->{column}++;
5722          $self->{nc}
5723              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5724        } else {
5725          $self->{set_nc}->($self);
5726        }
5727      
5728            redo A;
5729          }
5730        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5731          ## XML5: "DOCTYPE markup declaration state".
5732          
5733          if ($self->{nc} == 0x002D) { # -
5734            $self->{state} = MD_HYPHEN_STATE;
5735            
5736        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5737          $self->{line_prev} = $self->{line};
5738          $self->{column_prev} = $self->{column};
5739          $self->{column}++;
5740          $self->{nc}
5741              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5742        } else {
5743          $self->{set_nc}->($self);
5744        }
5745      
5746            redo A;
5747          } elsif ($self->{nc} == 0x0045 or # E
5748                   $self->{nc} == 0x0065) { # e
5749            $self->{state} = MD_E_STATE;
5750            $self->{kwd} = chr $self->{nc};
5751            
5752        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5753          $self->{line_prev} = $self->{line};
5754          $self->{column_prev} = $self->{column};
5755          $self->{column}++;
5756          $self->{nc}
5757              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5758        } else {
5759          $self->{set_nc}->($self);
5760        }
5761      
5762            redo A;
5763          } elsif ($self->{nc} == 0x0041 or # A
5764                   $self->{nc} == 0x0061) { # a
5765            $self->{state} = MD_ATTLIST_STATE;
5766            $self->{kwd} = chr $self->{nc};
5767            
5768        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5769          $self->{line_prev} = $self->{line};
5770          $self->{column_prev} = $self->{column};
5771          $self->{column}++;
5772          $self->{nc}
5773              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5774        } else {
5775          $self->{set_nc}->($self);
5776        }
5777      
5778            redo A;
5779          } elsif ($self->{nc} == 0x004E or # N
5780                   $self->{nc} == 0x006E) { # n
5781            $self->{state} = MD_NOTATION_STATE;
5782            $self->{kwd} = chr $self->{nc};
5783            
5784        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5785          $self->{line_prev} = $self->{line};
5786          $self->{column_prev} = $self->{column};
5787          $self->{column}++;
5788          $self->{nc}
5789              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5790        } else {
5791          $self->{set_nc}->($self);
5792        }
5793      
5794            redo A;
5795          } else {
5796            #
5797          }
5798          
5799          ## XML5: No parse error.
5800          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5801                          line => $self->{line_prev},
5802                          column => $self->{column_prev} - 1);
5803          ## Reconsume.
5804          $self->{state} = BOGUS_COMMENT_STATE;
5805          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5806          redo A;
5807        } elsif ($self->{state} == MD_E_STATE) {
5808          if ($self->{nc} == 0x004E or # N
5809              $self->{nc} == 0x006E) { # n
5810            $self->{state} = MD_ENTITY_STATE;
5811            $self->{kwd} .= chr $self->{nc};
5812            
5813        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5814          $self->{line_prev} = $self->{line};
5815          $self->{column_prev} = $self->{column};
5816          $self->{column}++;
5817          $self->{nc}
5818              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5819        } else {
5820          $self->{set_nc}->($self);
5821        }
5822      
5823            redo A;
5824          } elsif ($self->{nc} == 0x004C or # L
5825                   $self->{nc} == 0x006C) { # l
5826            ## XML5: <!ELEMENT> not supported.
5827            $self->{state} = MD_ELEMENT_STATE;
5828            $self->{kwd} .= chr $self->{nc};
5829            
5830        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5831          $self->{line_prev} = $self->{line};
5832          $self->{column_prev} = $self->{column};
5833          $self->{column}++;
5834          $self->{nc}
5835              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5836        } else {
5837          $self->{set_nc}->($self);
5838        }
5839      
5840            redo A;
5841          } else {
5842            ## XML5: No parse error.
5843            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5844                            line => $self->{line_prev},
5845                            column => $self->{column_prev} - 2
5846                                + 1 * ($self->{nc} == -1));
5847            ## Reconsume.
5848            $self->{state} = BOGUS_COMMENT_STATE;
5849            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5850            redo A;
5851          }
5852        } elsif ($self->{state} == MD_ENTITY_STATE) {
5853          if ($self->{nc} == [
5854                undef,
5855                undef,
5856                0x0054, # T
5857                0x0049, # I
5858                0x0054, # T
5859              ]->[length $self->{kwd}] or
5860              $self->{nc} == [
5861                undef,
5862                undef,
5863                0x0074, # t
5864                0x0069, # i
5865                0x0074, # t
5866              ]->[length $self->{kwd}]) {
5867            ## Stay in the state.
5868            $self->{kwd} .= chr $self->{nc};
5869            
5870        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5871          $self->{line_prev} = $self->{line};
5872          $self->{column_prev} = $self->{column};
5873          $self->{column}++;
5874          $self->{nc}
5875              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5876        } else {
5877          $self->{set_nc}->($self);
5878        }
5879      
5880            redo A;
5881          } elsif ((length $self->{kwd}) == 5 and
5882                   ($self->{nc} == 0x0059 or # Y
5883                    $self->{nc} == 0x0079)) { # y
5884            if ($self->{kwd} ne 'ENTIT' or $self->{nc} == 0x0079) {
5885              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5886                              text => 'ENTITY',
5887                              line => $self->{line_prev},
5888                              column => $self->{column_prev} - 4);
5889            }
5890            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '',
5891                           line => $self->{line_prev},
5892                           column => $self->{column_prev} - 6};
5893            $self->{state} = DOCTYPE_MD_STATE;
5894            
5895        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5896          $self->{line_prev} = $self->{line};
5897          $self->{column_prev} = $self->{column};
5898          $self->{column}++;
5899          $self->{nc}
5900              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5901        } else {
5902          $self->{set_nc}->($self);
5903        }
5904      
5905            redo A;
5906          } else {
5907            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5908                            line => $self->{line_prev},
5909                            column => $self->{column_prev} - 1
5910                                - (length $self->{kwd})
5911                                + 1 * ($self->{nc} == -1));
5912            $self->{state} = BOGUS_COMMENT_STATE;
5913            ## Reconsume.
5914            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5915            redo A;
5916          }
5917        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5918          if ($self->{nc} == [
5919               undef,
5920               undef,
5921               0x0045, # E
5922               0x004D, # M
5923               0x0045, # E
5924               0x004E, # N
5925              ]->[length $self->{kwd}] or
5926              $self->{nc} == [
5927               undef,
5928               undef,
5929               0x0065, # e
5930               0x006D, # m
5931               0x0065, # e
5932               0x006E, # n
5933              ]->[length $self->{kwd}]) {
5934            ## Stay in the state.
5935            $self->{kwd} .= chr $self->{nc};
5936            
5937        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5938          $self->{line_prev} = $self->{line};
5939          $self->{column_prev} = $self->{column};
5940          $self->{column}++;
5941          $self->{nc}
5942              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5943        } else {
5944          $self->{set_nc}->($self);
5945        }
5946      
5947            redo A;
5948          } elsif ((length $self->{kwd}) == 6 and
5949                   ($self->{nc} == 0x0054 or # T
5950                    $self->{nc} == 0x0074)) { # t
5951            if ($self->{kwd} ne 'ELEMEN' or $self->{nc} == 0x0074) {
5952              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
5953                              text => 'ELEMENT',
5954                              line => $self->{line_prev},
5955                              column => $self->{column_prev} - 5);
5956            }
5957            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5958                           line => $self->{line_prev},
5959                           column => $self->{column_prev} - 7};
5960            $self->{state} = DOCTYPE_MD_STATE;
5961            
5962        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5963          $self->{line_prev} = $self->{line};
5964          $self->{column_prev} = $self->{column};
5965          $self->{column}++;
5966          $self->{nc}
5967              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5968        } else {
5969          $self->{set_nc}->($self);
5970        }
5971      
5972            redo A;
5973          } else {
5974            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5975                            line => $self->{line_prev},
5976                            column => $self->{column_prev} - 1
5977                                - (length $self->{kwd})
5978                                + 1 * ($self->{nc} == -1));
5979            $self->{state} = BOGUS_COMMENT_STATE;
5980            ## Reconsume.
5981            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5982            redo A;
5983          }
5984        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5985          if ($self->{nc} == [
5986               undef,
5987               0x0054, # T
5988               0x0054, # T
5989               0x004C, # L
5990               0x0049, # I
5991               0x0053, # S
5992              ]->[length $self->{kwd}] or
5993              $self->{nc} == [
5994               undef,
5995               0x0074, # t
5996               0x0074, # t
5997               0x006C, # l
5998               0x0069, # i
5999               0x0073, # s
6000              ]->[length $self->{kwd}]) {
6001            ## Stay in the state.
6002            $self->{kwd} .= chr $self->{nc};
6003            
6004        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6005          $self->{line_prev} = $self->{line};
6006          $self->{column_prev} = $self->{column};
6007          $self->{column}++;
6008          $self->{nc}
6009              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6010        } else {
6011          $self->{set_nc}->($self);
6012        }
6013      
6014            redo A;
6015          } elsif ((length $self->{kwd}) == 6 and
6016                   ($self->{nc} == 0x0054 or # T
6017                    $self->{nc} == 0x0074)) { # t
6018            if ($self->{kwd} ne 'ATTLIS' or $self->{nc} == 0x0074) {
6019              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6020                              text => 'ATTLIST',
6021                              line => $self->{line_prev},
6022                              column => $self->{column_prev} - 5);
6023            }
6024            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
6025                           attrdefs => [],
6026                           line => $self->{line_prev},
6027                           column => $self->{column_prev} - 7};
6028            $self->{state} = DOCTYPE_MD_STATE;
6029            
6030        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6031          $self->{line_prev} = $self->{line};
6032          $self->{column_prev} = $self->{column};
6033          $self->{column}++;
6034          $self->{nc}
6035              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6036        } else {
6037          $self->{set_nc}->($self);
6038        }
6039      
6040            redo A;
6041          } else {
6042            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6043                            line => $self->{line_prev},
6044                            column => $self->{column_prev} - 1
6045                                 - (length $self->{kwd})
6046                                 + 1 * ($self->{nc} == -1));
6047            $self->{state} = BOGUS_COMMENT_STATE;
6048            ## Reconsume.
6049            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6050            redo A;
6051          }
6052        } elsif ($self->{state} == MD_NOTATION_STATE) {
6053          if ($self->{nc} == [
6054               undef,
6055               0x004F, # O
6056               0x0054, # T
6057               0x0041, # A
6058               0x0054, # T
6059               0x0049, # I
6060               0x004F, # O
6061              ]->[length $self->{kwd}] or
6062              $self->{nc} == [
6063               undef,
6064               0x006F, # o
6065               0x0074, # t
6066               0x0061, # a
6067               0x0074, # t
6068               0x0069, # i
6069               0x006F, # o
6070              ]->[length $self->{kwd}]) {
6071            ## Stay in the state.
6072            $self->{kwd} .= chr $self->{nc};
6073            
6074        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6075          $self->{line_prev} = $self->{line};
6076          $self->{column_prev} = $self->{column};
6077          $self->{column}++;
6078          $self->{nc}
6079              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6080        } else {
6081          $self->{set_nc}->($self);
6082        }
6083      
6084            redo A;
6085          } elsif ((length $self->{kwd}) == 7 and
6086                   ($self->{nc} == 0x004E or # N
6087                    $self->{nc} == 0x006E)) { # n
6088            if ($self->{kwd} ne 'NOTATIO' or $self->{nc} == 0x006E) {
6089              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
6090                              text => 'NOTATION',
6091                              line => $self->{line_prev},
6092                              column => $self->{column_prev} - 6);
6093            }
6094            $self->{ct} = {type => NOTATION_TOKEN, name => '',
6095                           line => $self->{line_prev},
6096                           column => $self->{column_prev} - 8};
6097            $self->{state} = DOCTYPE_MD_STATE;
6098            
6099        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6100          $self->{line_prev} = $self->{line};
6101          $self->{column_prev} = $self->{column};
6102          $self->{column}++;
6103          $self->{nc}
6104              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6105        } else {
6106          $self->{set_nc}->($self);
6107        }
6108      
6109            redo A;
6110          } else {
6111            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
6112                            line => $self->{line_prev},
6113                            column => $self->{column_prev} - 1
6114                                - (length $self->{kwd})
6115                                + 1 * ($self->{nc} == -1));
6116            $self->{state} = BOGUS_COMMENT_STATE;
6117            ## Reconsume.
6118            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6119            redo A;
6120          }
6121        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
6122          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
6123          ## "DOCTYPE NOTATION state".
6124    
6125          if ($is_space->{$self->{nc}}) {
6126            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
6127            $self->{state} = BEFORE_MD_NAME_STATE;
6128            
6129        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6130          $self->{line_prev} = $self->{line};
6131          $self->{column_prev} = $self->{column};
6132          $self->{column}++;
6133          $self->{nc}
6134              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6135        } else {
6136          $self->{set_nc}->($self);
6137        }
6138      
6139            redo A;
6140          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6141                   $self->{nc} == 0x0025) { # %
6142            ## XML5: Switch to the "DOCTYPE bogus comment state".
6143            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6144            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6145            
6146        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6147          $self->{line_prev} = $self->{line};
6148          $self->{column_prev} = $self->{column};
6149          $self->{column}++;
6150          $self->{nc}
6151              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6152        } else {
6153          $self->{set_nc}->($self);
6154        }
6155      
6156            redo A;
6157          } elsif ($self->{nc} == -1) {
6158            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6159            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6160            ## Reconsume.
6161            redo A;
6162          } elsif ($self->{nc} == 0x003E) { # >
6163            ## XML5: Switch to the "DOCTYPE bogus comment state".
6164            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6165            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6166            
6167        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6168          $self->{line_prev} = $self->{line};
6169          $self->{column_prev} = $self->{column};
6170          $self->{column}++;
6171          $self->{nc}
6172              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6173        } else {
6174          $self->{set_nc}->($self);
6175        }
6176      
6177            redo A;
6178          } else {
6179            ## XML5: Switch to the "DOCTYPE bogus comment state".
6180            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
6181            $self->{state} = BEFORE_MD_NAME_STATE;
6182            redo A;
6183          }
6184        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
6185          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
6186          ## before state", "DOCTYPE ATTLIST name before state".
6187    
6188          if ($is_space->{$self->{nc}}) {
6189            ## Stay in the state.
6190            
6191        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6192          $self->{line_prev} = $self->{line};
6193          $self->{column_prev} = $self->{column};
6194          $self->{column}++;
6195          $self->{nc}
6196              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6197        } else {
6198          $self->{set_nc}->($self);
6199        }
6200      
6201            redo A;
6202          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
6203                   $self->{nc} == 0x0025) { # %
6204            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
6205            
6206        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6207          $self->{line_prev} = $self->{line};
6208          $self->{column_prev} = $self->{column};
6209          $self->{column}++;
6210          $self->{nc}
6211              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6212        } else {
6213          $self->{set_nc}->($self);
6214        }
6215      
6216            redo A;
6217          } elsif ($self->{nc} == 0x003E) { # >
6218            ## XML5: Same as "Anything else".
6219            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6220            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6221            
6222        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6223          $self->{line_prev} = $self->{line};
6224          $self->{column_prev} = $self->{column};
6225          $self->{column}++;
6226          $self->{nc}
6227              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6228        } else {
6229          $self->{set_nc}->($self);
6230        }
6231      
6232            redo A;
6233          } elsif ($self->{nc} == -1) {
6234            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6235            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6236            ## Reconsume.
6237            redo A;
6238          } else {
6239            ## XML5: [ATTLIST] Not defined yet.
6240            $self->{ct}->{name} .= chr $self->{nc};
6241            $self->{state} = MD_NAME_STATE;
6242            
6243        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6244          $self->{line_prev} = $self->{line};
6245          $self->{column_prev} = $self->{column};
6246          $self->{column}++;
6247          $self->{nc}
6248              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6249        } else {
6250          $self->{set_nc}->($self);
6251        }
6252      
6253            redo A;
6254          }
6255        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
6256          if ($is_space->{$self->{nc}}) {
6257            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
6258            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
6259            $self->{state} = BEFORE_MD_NAME_STATE;
6260            
6261        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6262          $self->{line_prev} = $self->{line};
6263          $self->{column_prev} = $self->{column};
6264          $self->{column}++;
6265          $self->{nc}
6266              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6267        } else {
6268          $self->{set_nc}->($self);
6269        }
6270      
6271            redo A;
6272          } elsif ($self->{nc} == 0x003E) { # >
6273            ## XML5: Same as "Anything else".
6274            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
6275            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6276            
6277        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6278          $self->{line_prev} = $self->{line};
6279          $self->{column_prev} = $self->{column};
6280          $self->{column}++;
6281          $self->{nc}
6282              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6283        } else {
6284          $self->{set_nc}->($self);
6285        }
6286      
6287            redo A;
6288          } elsif ($self->{nc} == -1) {
6289            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6290            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6291            ## Reconsume.
6292            redo A;
6293          } else {
6294            ## XML5: No parse error.
6295            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
6296            $self->{state} = BOGUS_COMMENT_STATE;
6297            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
6298            ## Reconsume.
6299            redo A;
6300          }
6301        } elsif ($self->{state} == MD_NAME_STATE) {
6302          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
6303          
6304          if ($is_space->{$self->{nc}}) {
6305            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6306              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
6307            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
6308              $self->{state} = AFTER_ELEMENT_NAME_STATE;
6309            } else { # ENTITY/NOTATION
6310              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
6311            }
6312            
6313        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6314          $self->{line_prev} = $self->{line};
6315          $self->{column_prev} = $self->{column};
6316          $self->{column}++;
6317          $self->{nc}
6318              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6319        } else {
6320          $self->{set_nc}->($self);
6321        }
6322      
6323            redo A;
6324          } elsif ($self->{nc} == 0x003E) { # >
6325            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
6326              #
6327            } else {
6328              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
6329            }
6330            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6331            
6332        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6333          $self->{line_prev} = $self->{line};
6334          $self->{column_prev} = $self->{column};
6335          $self->{column}++;
6336          $self->{nc}
6337              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6338        } else {
6339          $self->{set_nc}->($self);
6340        }
6341      
6342            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6343            redo A;
6344          } elsif ($self->{nc} == -1) {
6345            ## XML5: [ATTLIST] No parse error.
6346            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6347            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6348            ## Reconsume.
6349            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6350            redo A;
6351          } else {
6352            ## XML5: [ATTLIST] Not defined yet.
6353            $self->{ct}->{name} .= chr $self->{nc};
6354            ## Stay in the state.
6355            
6356        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6357          $self->{line_prev} = $self->{line};
6358          $self->{column_prev} = $self->{column};
6359          $self->{column}++;
6360          $self->{nc}
6361              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6362        } else {
6363          $self->{set_nc}->($self);
6364        }
6365      
6366            redo A;
6367          }
6368        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6369          if ($is_space->{$self->{nc}}) {
6370            ## Stay in the state.
6371            
6372        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6373          $self->{line_prev} = $self->{line};
6374          $self->{column_prev} = $self->{column};
6375          $self->{column}++;
6376          $self->{nc}
6377              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6378        } else {
6379          $self->{set_nc}->($self);
6380        }
6381      
6382            redo A;
6383          } elsif ($self->{nc} == 0x003E) { # >
6384            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6385            
6386        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6387          $self->{line_prev} = $self->{line};
6388          $self->{column_prev} = $self->{column};
6389          $self->{column}++;
6390          $self->{nc}
6391              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6392        } else {
6393          $self->{set_nc}->($self);
6394        }
6395      
6396            return  ($self->{ct}); # ATTLIST
6397            redo A;
6398          } elsif ($self->{nc} == -1) {
6399            ## XML5: No parse error.
6400            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6401            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6402            return  ($self->{ct});
6403            redo A;
6404          } else {
6405            ## XML5: Not defined yet.
6406            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6407                           tokens => [],
6408                           line => $self->{line}, column => $self->{column}};
6409            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6410            
6411        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6412          $self->{line_prev} = $self->{line};
6413          $self->{column_prev} = $self->{column};
6414          $self->{column}++;
6415          $self->{nc}
6416              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6417        } else {
6418          $self->{set_nc}->($self);
6419        }
6420      
6421            redo A;
6422          }
6423        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6424          if ($is_space->{$self->{nc}}) {
6425            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6426            
6427        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6428          $self->{line_prev} = $self->{line};
6429          $self->{column_prev} = $self->{column};
6430          $self->{column}++;
6431          $self->{nc}
6432              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6433        } else {
6434          $self->{set_nc}->($self);
6435        }
6436      
6437            redo A;
6438          } elsif ($self->{nc} == 0x003E) { # >
6439            ## XML5: Same as "anything else".
6440            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6441            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6442            
6443        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6444          $self->{line_prev} = $self->{line};
6445          $self->{column_prev} = $self->{column};
6446          $self->{column}++;
6447          $self->{nc}
6448              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6449        } else {
6450          $self->{set_nc}->($self);
6451        }
6452      
6453            return  ($self->{ct}); # ATTLIST
6454            redo A;
6455          } elsif ($self->{nc} == 0x0028) { # (
6456            ## XML5: Same as "anything else".
6457            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6458            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6459            
6460        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6461          $self->{line_prev} = $self->{line};
6462          $self->{column_prev} = $self->{column};
6463          $self->{column}++;
6464          $self->{nc}
6465              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6466        } else {
6467          $self->{set_nc}->($self);
6468        }
6469      
6470            redo A;
6471          } elsif ($self->{nc} == -1) {
6472            ## XML5: No parse error.
6473            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6474            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6475            
6476        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6477          $self->{line_prev} = $self->{line};
6478          $self->{column_prev} = $self->{column};
6479          $self->{column}++;
6480          $self->{nc}
6481              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6482        } else {
6483          $self->{set_nc}->($self);
6484        }
6485      
6486            return  ($self->{ct}); # ATTLIST
6487            redo A;
6488          } else {
6489            ## XML5: Not defined yet.
6490            $self->{ca}->{name} .= chr $self->{nc};
6491            ## Stay in the state.
6492            
6493        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6494          $self->{line_prev} = $self->{line};
6495          $self->{column_prev} = $self->{column};
6496          $self->{column}++;
6497          $self->{nc}
6498              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6499        } else {
6500          $self->{set_nc}->($self);
6501        }
6502      
6503            redo A;
6504          }
6505        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6506          if ($is_space->{$self->{nc}}) {
6507            ## Stay in the state.
6508            
6509        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6510          $self->{line_prev} = $self->{line};
6511          $self->{column_prev} = $self->{column};
6512          $self->{column}++;
6513          $self->{nc}
6514              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6515        } else {
6516          $self->{set_nc}->($self);
6517        }
6518      
6519            redo A;
6520          } elsif ($self->{nc} == 0x003E) { # >
6521            ## XML5: Same as "anything else".
6522            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6523            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6524            
6525        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6526          $self->{line_prev} = $self->{line};
6527          $self->{column_prev} = $self->{column};
6528          $self->{column}++;
6529          $self->{nc}
6530              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6531        } else {
6532          $self->{set_nc}->($self);
6533        }
6534      
6535            return  ($self->{ct}); # ATTLIST
6536            redo A;
6537          } elsif ($self->{nc} == 0x0028) { # (
6538            ## XML5: Same as "anything else".
6539            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6540            
6541        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6542          $self->{line_prev} = $self->{line};
6543          $self->{column_prev} = $self->{column};
6544          $self->{column}++;
6545          $self->{nc}
6546              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6547        } else {
6548          $self->{set_nc}->($self);
6549        }
6550      
6551            redo A;
6552          } elsif ($self->{nc} == -1) {
6553            ## XML5: No parse error.
6554            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6555            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6556            
6557        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6558          $self->{line_prev} = $self->{line};
6559          $self->{column_prev} = $self->{column};
6560          $self->{column}++;
6561          $self->{nc}
6562              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6563        } else {
6564          $self->{set_nc}->($self);
6565        }
6566      
6567            return  ($self->{ct});
6568            redo A;
6569          } else {
6570            ## XML5: Not defined yet.
6571            $self->{ca}->{type} = chr $self->{nc};
6572            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6573            
6574        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6575          $self->{line_prev} = $self->{line};
6576          $self->{column_prev} = $self->{column};
6577          $self->{column}++;
6578          $self->{nc}
6579              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6580        } else {
6581          $self->{set_nc}->($self);
6582        }
6583      
6584            redo A;
6585          }
6586        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6587          if ($is_space->{$self->{nc}}) {
6588            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6589            
6590        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6591          $self->{line_prev} = $self->{line};
6592          $self->{column_prev} = $self->{column};
6593          $self->{column}++;
6594          $self->{nc}
6595              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6596        } else {
6597          $self->{set_nc}->($self);
6598        }
6599      
6600            redo A;
6601          } elsif ($self->{nc} == 0x0023) { # #
6602            ## XML5: Same as "anything else".
6603            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6604            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6605            
6606        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6607          $self->{line_prev} = $self->{line};
6608          $self->{column_prev} = $self->{column};
6609          $self->{column}++;
6610          $self->{nc}
6611              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6612        } else {
6613          $self->{set_nc}->($self);
6614        }
6615      
6616            redo A;
6617          } elsif ($self->{nc} == 0x0022) { # "
6618            ## XML5: Same as "anything else".
6619            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6620            $self->{ca}->{value} = '';
6621            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6622            
6623        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6624          $self->{line_prev} = $self->{line};
6625          $self->{column_prev} = $self->{column};
6626          $self->{column}++;
6627          $self->{nc}
6628              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6629        } else {
6630          $self->{set_nc}->($self);
6631        }
6632      
6633            redo A;
6634          } elsif ($self->{nc} == 0x0027) { # '
6635            ## XML5: Same as "anything else".
6636            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6637            $self->{ca}->{value} = '';
6638            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6639            
6640        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6641          $self->{line_prev} = $self->{line};
6642          $self->{column_prev} = $self->{column};
6643          $self->{column}++;
6644          $self->{nc}
6645              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6646        } else {
6647          $self->{set_nc}->($self);
6648        }
6649      
6650            redo A;
6651          } elsif ($self->{nc} == 0x003E) { # >
6652            ## XML5: Same as "anything else".
6653            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6654            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6655            
6656        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6657          $self->{line_prev} = $self->{line};
6658          $self->{column_prev} = $self->{column};
6659          $self->{column}++;
6660          $self->{nc}
6661              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6662        } else {
6663          $self->{set_nc}->($self);
6664        }
6665      
6666            return  ($self->{ct}); # ATTLIST
6667            redo A;
6668          } elsif ($self->{nc} == 0x0028) { # (
6669            ## XML5: Same as "anything else".
6670            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6671            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6672            
6673        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6674          $self->{line_prev} = $self->{line};
6675          $self->{column_prev} = $self->{column};
6676          $self->{column}++;
6677          $self->{nc}
6678              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6679        } else {
6680          $self->{set_nc}->($self);
6681        }
6682      
6683            redo A;
6684          } elsif ($self->{nc} == -1) {
6685            ## XML5: No parse error.
6686            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6687            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6688            
6689        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6690          $self->{line_prev} = $self->{line};
6691          $self->{column_prev} = $self->{column};
6692          $self->{column}++;
6693          $self->{nc}
6694              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6695        } else {
6696          $self->{set_nc}->($self);
6697        }
6698      
6699            return  ($self->{ct});
6700            redo A;
6701          } else {
6702            ## XML5: Not defined yet.
6703            $self->{ca}->{type} .= chr $self->{nc};
6704            ## Stay in the state.
6705            
6706        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6707          $self->{line_prev} = $self->{line};
6708          $self->{column_prev} = $self->{column};
6709          $self->{column}++;
6710          $self->{nc}
6711              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6712        } else {
6713          $self->{set_nc}->($self);
6714        }
6715      
6716            redo A;
6717          }
6718        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6719          if ($is_space->{$self->{nc}}) {
6720            ## Stay in the state.
6721            
6722        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6723          $self->{line_prev} = $self->{line};
6724          $self->{column_prev} = $self->{column};
6725          $self->{column}++;
6726          $self->{nc}
6727              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6728        } else {
6729          $self->{set_nc}->($self);
6730        }
6731      
6732            redo A;
6733          } elsif ($self->{nc} == 0x0028) { # (
6734            ## XML5: Same as "anything else".
6735            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6736            
6737        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6738          $self->{line_prev} = $self->{line};
6739          $self->{column_prev} = $self->{column};
6740          $self->{column}++;
6741          $self->{nc}
6742              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6743        } else {
6744          $self->{set_nc}->($self);
6745        }
6746      
6747            redo A;
6748          } elsif ($self->{nc} == 0x0023) { # #
6749            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6750            
6751        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6752          $self->{line_prev} = $self->{line};
6753          $self->{column_prev} = $self->{column};
6754          $self->{column}++;
6755          $self->{nc}
6756              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6757        } else {
6758          $self->{set_nc}->($self);
6759        }
6760      
6761            redo A;
6762          } elsif ($self->{nc} == 0x0022) { # "
6763            ## XML5: Same as "anything else".
6764            $self->{ca}->{value} = '';
6765            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6766            
6767        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6768          $self->{line_prev} = $self->{line};
6769          $self->{column_prev} = $self->{column};
6770          $self->{column}++;
6771          $self->{nc}
6772              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6773        } else {
6774          $self->{set_nc}->($self);
6775        }
6776      
6777            redo A;
6778          } elsif ($self->{nc} == 0x0027) { # '
6779            ## XML5: Same as "anything else".
6780            $self->{ca}->{value} = '';
6781            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6782            
6783        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6784          $self->{line_prev} = $self->{line};
6785          $self->{column_prev} = $self->{column};
6786          $self->{column}++;
6787          $self->{nc}
6788              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6789        } else {
6790          $self->{set_nc}->($self);
6791        }
6792      
6793            redo A;
6794          } elsif ($self->{nc} == 0x003E) { # >
6795            ## XML5: Same as "anything else".
6796            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6797            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6798            
6799        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6800          $self->{line_prev} = $self->{line};
6801          $self->{column_prev} = $self->{column};
6802          $self->{column}++;
6803          $self->{nc}
6804              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6805        } else {
6806          $self->{set_nc}->($self);
6807        }
6808      
6809            return  ($self->{ct}); # ATTLIST
6810            redo A;
6811          } elsif ($self->{nc} == -1) {
6812            ## XML5: No parse error.
6813            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6814            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6815            
6816        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6817          $self->{line_prev} = $self->{line};
6818          $self->{column_prev} = $self->{column};
6819          $self->{column}++;
6820          $self->{nc}
6821              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6822        } else {
6823          $self->{set_nc}->($self);
6824        }
6825      
6826            return  ($self->{ct});
6827            redo A;
6828          } else {
6829            ## XML5: Switch to the "DOCTYPE bogus comment state".
6830            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6831            $self->{ca}->{value} = '';
6832            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6833            ## Reconsume.
6834            redo A;
6835          }
6836        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6837          if ($is_space->{$self->{nc}}) {
6838            ## Stay in the state.
6839            
6840        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6841          $self->{line_prev} = $self->{line};
6842          $self->{column_prev} = $self->{column};
6843          $self->{column}++;
6844          $self->{nc}
6845              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6846        } else {
6847          $self->{set_nc}->($self);
6848        }
6849      
6850            redo A;
6851          } elsif ($self->{nc} == 0x007C) { # |
6852            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6853            ## Stay in the state.
6854            
6855        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6856          $self->{line_prev} = $self->{line};
6857          $self->{column_prev} = $self->{column};
6858          $self->{column}++;
6859          $self->{nc}
6860              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6861        } else {
6862          $self->{set_nc}->($self);
6863        }
6864      
6865            redo A;
6866          } elsif ($self->{nc} == 0x0029) { # )
6867            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6868            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6869            
6870        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6871          $self->{line_prev} = $self->{line};
6872          $self->{column_prev} = $self->{column};
6873          $self->{column}++;
6874          $self->{nc}
6875              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6876        } else {
6877          $self->{set_nc}->($self);
6878        }
6879      
6880            redo A;
6881          } elsif ($self->{nc} == 0x003E) { # >
6882            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6883            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6884            
6885        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6886          $self->{line_prev} = $self->{line};
6887          $self->{column_prev} = $self->{column};
6888          $self->{column}++;
6889          $self->{nc}
6890              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6891        } else {
6892          $self->{set_nc}->($self);
6893        }
6894      
6895            return  ($self->{ct}); # ATTLIST
6896            redo A;
6897          } elsif ($self->{nc} == -1) {
6898            ## XML5: No parse error.
6899            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6900            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6901            
6902        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6903          $self->{line_prev} = $self->{line};
6904          $self->{column_prev} = $self->{column};
6905          $self->{column}++;
6906          $self->{nc}
6907              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6908        } else {
6909          $self->{set_nc}->($self);
6910        }
6911      
6912            return  ($self->{ct});
6913            redo A;
6914          } else {
6915            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6916            $self->{state} = ALLOWED_TOKEN_STATE;
6917            
6918        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6919          $self->{line_prev} = $self->{line};
6920          $self->{column_prev} = $self->{column};
6921          $self->{column}++;
6922          $self->{nc}
6923              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6924        } else {
6925          $self->{set_nc}->($self);
6926        }
6927      
6928            redo A;
6929          }
6930        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6931          if ($is_space->{$self->{nc}}) {
6932            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6933            
6934        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6935          $self->{line_prev} = $self->{line};
6936          $self->{column_prev} = $self->{column};
6937          $self->{column}++;
6938          $self->{nc}
6939              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6940        } else {
6941          $self->{set_nc}->($self);
6942        }
6943      
6944            redo A;
6945          } elsif ($self->{nc} == 0x007C) { # |
6946            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6947            
6948        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6949          $self->{line_prev} = $self->{line};
6950          $self->{column_prev} = $self->{column};
6951          $self->{column}++;
6952          $self->{nc}
6953              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6954        } else {
6955          $self->{set_nc}->($self);
6956        }
6957      
6958            redo A;
6959          } elsif ($self->{nc} == 0x0029) { # )
6960            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6961            
6962        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6963          $self->{line_prev} = $self->{line};
6964          $self->{column_prev} = $self->{column};
6965          $self->{column}++;
6966          $self->{nc}
6967              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6968        } else {
6969          $self->{set_nc}->($self);
6970        }
6971      
6972            redo A;
6973          } elsif ($self->{nc} == 0x003E) { # >
6974            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6975            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6976            
6977        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6978          $self->{line_prev} = $self->{line};
6979          $self->{column_prev} = $self->{column};
6980          $self->{column}++;
6981          $self->{nc}
6982              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6983        } else {
6984          $self->{set_nc}->($self);
6985        }
6986      
6987            return  ($self->{ct}); # ATTLIST
6988            redo A;
6989          } elsif ($self->{nc} == -1) {
6990            ## XML5: No parse error.
6991            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6992            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6993            
6994        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6995          $self->{line_prev} = $self->{line};
6996          $self->{column_prev} = $self->{column};
6997          $self->{column}++;
6998          $self->{nc}
6999              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7000        } else {
7001          $self->{set_nc}->($self);
7002        }
7003      
7004            return  ($self->{ct});
7005            redo A;
7006          } else {
7007            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
7008            ## Stay in the state.
7009            
7010        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7011          $self->{line_prev} = $self->{line};
7012          $self->{column_prev} = $self->{column};
7013          $self->{column}++;
7014          $self->{nc}
7015              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7016        } else {
7017          $self->{set_nc}->($self);
7018        }
7019      
7020            redo A;
7021          }
7022        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
7023          if ($is_space->{$self->{nc}}) {
7024            ## Stay in the state.
7025            
7026        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7027          $self->{line_prev} = $self->{line};
7028          $self->{column_prev} = $self->{column};
7029          $self->{column}++;
7030          $self->{nc}
7031              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7032        } else {
7033          $self->{set_nc}->($self);
7034        }
7035      
7036            redo A;
7037          } elsif ($self->{nc} == 0x007C) { # |
7038            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
7039            
7040        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7041          $self->{line_prev} = $self->{line};
7042          $self->{column_prev} = $self->{column};
7043          $self->{column}++;
7044          $self->{nc}
7045              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7046        } else {
7047          $self->{set_nc}->($self);
7048        }
7049      
7050            redo A;
7051          } elsif ($self->{nc} == 0x0029) { # )
7052            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
7053            
7054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7055          $self->{line_prev} = $self->{line};
7056          $self->{column_prev} = $self->{column};
7057          $self->{column}++;
7058          $self->{nc}
7059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7060        } else {
7061          $self->{set_nc}->($self);
7062        }
7063      
7064            redo A;
7065          } elsif ($self->{nc} == 0x003E) { # >
7066            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
7067            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7068            
7069        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7070          $self->{line_prev} = $self->{line};
7071          $self->{column_prev} = $self->{column};
7072          $self->{column}++;
7073          $self->{nc}
7074              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7075        } else {
7076          $self->{set_nc}->($self);
7077        }
7078      
7079            return  ($self->{ct}); # ATTLIST
7080            redo A;
7081          } elsif ($self->{nc} == -1) {
7082            ## XML5: No parse error.
7083            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7084            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7085            
7086        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7087          $self->{line_prev} = $self->{line};
7088          $self->{column_prev} = $self->{column};
7089          $self->{column}++;
7090          $self->{nc}
7091              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7092        } else {
7093          $self->{set_nc}->($self);
7094        }
7095      
7096            return  ($self->{ct});
7097            redo A;
7098          } else {
7099            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
7100                            line => $self->{line_prev},
7101                            column => $self->{column_prev});
7102            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
7103            $self->{state} = ALLOWED_TOKEN_STATE;
7104            
7105        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7106          $self->{line_prev} = $self->{line};
7107          $self->{column_prev} = $self->{column};
7108          $self->{column}++;
7109          $self->{nc}
7110              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7111        } else {
7112          $self->{set_nc}->($self);
7113        }
7114      
7115            redo A;
7116          }
7117        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
7118          if ($is_space->{$self->{nc}}) {
7119            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
7120            
7121        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7122          $self->{line_prev} = $self->{line};
7123          $self->{column_prev} = $self->{column};
7124          $self->{column}++;
7125          $self->{nc}
7126              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7127        } else {
7128          $self->{set_nc}->($self);
7129        }
7130      
7131            redo A;
7132          } elsif ($self->{nc} == 0x0023) { # #
7133            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7134            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7135            
7136        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7137          $self->{line_prev} = $self->{line};
7138          $self->{column_prev} = $self->{column};
7139          $self->{column}++;
7140          $self->{nc}
7141              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7142        } else {
7143          $self->{set_nc}->($self);
7144        }
7145      
7146            redo A;
7147          } elsif ($self->{nc} == 0x0022) { # "
7148            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7149            $self->{ca}->{value} = '';
7150            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7151            
7152        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7153          $self->{line_prev} = $self->{line};
7154          $self->{column_prev} = $self->{column};
7155          $self->{column}++;
7156          $self->{nc}
7157              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7158        } else {
7159          $self->{set_nc}->($self);
7160        }
7161      
7162            redo A;
7163          } elsif ($self->{nc} == 0x0027) { # '
7164            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7165            $self->{ca}->{value} = '';
7166            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7167            
7168        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7169          $self->{line_prev} = $self->{line};
7170          $self->{column_prev} = $self->{column};
7171          $self->{column}++;
7172          $self->{nc}
7173              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7174        } else {
7175          $self->{set_nc}->($self);
7176        }
7177      
7178            redo A;
7179          } elsif ($self->{nc} == 0x003E) { # >
7180            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7181            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7182            
7183        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7184          $self->{line_prev} = $self->{line};
7185          $self->{column_prev} = $self->{column};
7186          $self->{column}++;
7187          $self->{nc}
7188              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7189        } else {
7190          $self->{set_nc}->($self);
7191        }
7192      
7193            return  ($self->{ct}); # ATTLIST
7194            redo A;
7195          } elsif ($self->{nc} == -1) {
7196            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7197            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7198            
7199        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7200          $self->{line_prev} = $self->{line};
7201          $self->{column_prev} = $self->{column};
7202          $self->{column}++;
7203          $self->{nc}
7204              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7205        } else {
7206          $self->{set_nc}->($self);
7207        }
7208      
7209            return  ($self->{ct});
7210            redo A;
7211          } else {
7212            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7213            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7214            ## Reconsume.
7215            redo A;
7216          }
7217        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
7218          if ($is_space->{$self->{nc}}) {
7219            ## Stay in the state.
7220            
7221        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7222          $self->{line_prev} = $self->{line};
7223          $self->{column_prev} = $self->{column};
7224          $self->{column}++;
7225          $self->{nc}
7226              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7227        } else {
7228          $self->{set_nc}->($self);
7229        }
7230      
7231            redo A;
7232          } elsif ($self->{nc} == 0x0023) { # #
7233            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
7234            
7235        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7236          $self->{line_prev} = $self->{line};
7237          $self->{column_prev} = $self->{column};
7238          $self->{column}++;
7239          $self->{nc}
7240              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7241        } else {
7242          $self->{set_nc}->($self);
7243        }
7244      
7245            redo A;
7246          } elsif ($self->{nc} == 0x0022) { # "
7247            $self->{ca}->{value} = '';
7248            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7249            
7250        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7251          $self->{line_prev} = $self->{line};
7252          $self->{column_prev} = $self->{column};
7253          $self->{column}++;
7254          $self->{nc}
7255              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7256        } else {
7257          $self->{set_nc}->($self);
7258        }
7259      
7260            redo A;
7261          } elsif ($self->{nc} == 0x0027) { # '
7262            $self->{ca}->{value} = '';
7263            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7264            
7265        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7266          $self->{line_prev} = $self->{line};
7267          $self->{column_prev} = $self->{column};
7268          $self->{column}++;
7269          $self->{nc}
7270              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7271        } else {
7272          $self->{set_nc}->($self);
7273        }
7274      
7275            redo A;
7276          } elsif ($self->{nc} == 0x003E) { # >
7277            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7278            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7279            
7280        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7281          $self->{line_prev} = $self->{line};
7282          $self->{column_prev} = $self->{column};
7283          $self->{column}++;
7284          $self->{nc}
7285              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7286        } else {
7287          $self->{set_nc}->($self);
7288        }
7289      
7290            return  ($self->{ct}); # ATTLIST
7291            redo A;
7292          } elsif ($self->{nc} == -1) {
7293            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7294            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7295            
7296        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7297          $self->{line_prev} = $self->{line};
7298          $self->{column_prev} = $self->{column};
7299          $self->{column}++;
7300          $self->{nc}
7301              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7302        } else {
7303          $self->{set_nc}->($self);
7304        }
7305      
7306            return  ($self->{ct});
7307            redo A;
7308          } else {
7309            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
7310            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7311            ## Reconsume.
7312            redo A;
7313          }
7314        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
7315          if ($is_space->{$self->{nc}}) {
7316            ## XML5: No parse error.
7317            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
7318            $self->{state} = BOGUS_MD_STATE;
7319            ## Reconsume.
7320            redo A;
7321          } elsif ($self->{nc} == 0x0022) { # "
7322            ## XML5: Same as "anything else".
7323            $self->{ca}->{value} = '';
7324            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7325            
7326        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7327          $self->{line_prev} = $self->{line};
7328          $self->{column_prev} = $self->{column};
7329          $self->{column}++;
7330          $self->{nc}
7331              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7332        } else {
7333          $self->{set_nc}->($self);
7334        }
7335      
7336            redo A;
7337          } elsif ($self->{nc} == 0x0027) { # '
7338            ## XML5: Same as "anything else".
7339            $self->{ca}->{value} = '';
7340            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7341            
7342        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7343          $self->{line_prev} = $self->{line};
7344          $self->{column_prev} = $self->{column};
7345          $self->{column}++;
7346          $self->{nc}
7347              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7348        } else {
7349          $self->{set_nc}->($self);
7350        }
7351      
7352            redo A;
7353          } elsif ($self->{nc} == 0x003E) { # >
7354            ## XML5: Same as "anything else".
7355            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7356            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7357            
7358        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7359          $self->{line_prev} = $self->{line};
7360          $self->{column_prev} = $self->{column};
7361          $self->{column}++;
7362          $self->{nc}
7363              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7364        } else {
7365          $self->{set_nc}->($self);
7366        }
7367      
7368            return  ($self->{ct}); # ATTLIST
7369            redo A;
7370          } elsif ($self->{nc} == -1) {
7371            ## XML5: No parse error.
7372            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7373            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7374            
7375        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7376          $self->{line_prev} = $self->{line};
7377          $self->{column_prev} = $self->{column};
7378          $self->{column}++;
7379          $self->{nc}
7380              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7381        } else {
7382          $self->{set_nc}->($self);
7383        }
7384      
7385            return  ($self->{ct});
7386            redo A;
7387          } else {
7388            $self->{ca}->{default} = chr $self->{nc};
7389            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7390            
7391        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7392          $self->{line_prev} = $self->{line};
7393          $self->{column_prev} = $self->{column};
7394          $self->{column}++;
7395          $self->{nc}
7396              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7397        } else {
7398          $self->{set_nc}->($self);
7399        }
7400      
7401            redo A;
7402          }
7403        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7404          if ($is_space->{$self->{nc}}) {
7405            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7406            
7407        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7408          $self->{line_prev} = $self->{line};
7409          $self->{column_prev} = $self->{column};
7410          $self->{column}++;
7411          $self->{nc}
7412              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7413        } else {
7414          $self->{set_nc}->($self);
7415        }
7416      
7417            redo A;
7418          } elsif ($self->{nc} == 0x0022) { # "
7419            ## XML5: Same as "anything else".
7420            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7421            $self->{ca}->{value} = '';
7422            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7423            
7424        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7425          $self->{line_prev} = $self->{line};
7426          $self->{column_prev} = $self->{column};
7427          $self->{column}++;
7428          $self->{nc}
7429              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7430        } else {
7431          $self->{set_nc}->($self);
7432        }
7433      
7434            redo A;
7435          } elsif ($self->{nc} == 0x0027) { # '
7436            ## XML5: Same as "anything else".
7437            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7438            $self->{ca}->{value} = '';
7439            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7440            
7441        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7442          $self->{line_prev} = $self->{line};
7443          $self->{column_prev} = $self->{column};
7444          $self->{column}++;
7445          $self->{nc}
7446              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7447        } else {
7448          $self->{set_nc}->($self);
7449        }
7450      
7451            redo A;
7452          } elsif ($self->{nc} == 0x003E) { # >
7453            ## XML5: Same as "anything else".
7454            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7455            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7456            
7457        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7458          $self->{line_prev} = $self->{line};
7459          $self->{column_prev} = $self->{column};
7460          $self->{column}++;
7461          $self->{nc}
7462              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7463        } else {
7464          $self->{set_nc}->($self);
7465        }
7466      
7467            return  ($self->{ct}); # ATTLIST
7468            redo A;
7469          } elsif ($self->{nc} == -1) {
7470            ## XML5: No parse error.
7471            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7472            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7473            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7474            
7475        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7476          $self->{line_prev} = $self->{line};
7477          $self->{column_prev} = $self->{column};
7478          $self->{column}++;
7479          $self->{nc}
7480              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7481        } else {
7482          $self->{set_nc}->($self);
7483        }
7484      
7485            return  ($self->{ct});
7486            redo A;
7487          } else {
7488            $self->{ca}->{default} .= chr $self->{nc};
7489            ## Stay in the state.
7490            
7491        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7492          $self->{line_prev} = $self->{line};
7493          $self->{column_prev} = $self->{column};
7494          $self->{column}++;
7495          $self->{nc}
7496              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7497        } else {
7498          $self->{set_nc}->($self);
7499        }
7500      
7501            redo A;
7502          }
7503        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7504          if ($is_space->{$self->{nc}}) {
7505            ## Stay in the state.
7506            
7507        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7508          $self->{line_prev} = $self->{line};
7509          $self->{column_prev} = $self->{column};
7510          $self->{column}++;
7511          $self->{nc}
7512              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7513        } else {
7514          $self->{set_nc}->($self);
7515        }
7516      
7517            redo A;
7518          } elsif ($self->{nc} == 0x0022) { # "
7519            $self->{ca}->{value} = '';
7520            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7521            
7522        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7523          $self->{line_prev} = $self->{line};
7524          $self->{column_prev} = $self->{column};
7525          $self->{column}++;
7526          $self->{nc}
7527              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7528        } else {
7529          $self->{set_nc}->($self);
7530        }
7531      
7532            redo A;
7533          } elsif ($self->{nc} == 0x0027) { # '
7534            $self->{ca}->{value} = '';
7535            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7536            
7537        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7538          $self->{line_prev} = $self->{line};
7539          $self->{column_prev} = $self->{column};
7540          $self->{column}++;
7541          $self->{nc}
7542              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7543        } else {
7544          $self->{set_nc}->($self);
7545        }
7546      
7547            redo A;
7548          } elsif ($self->{nc} == 0x003E) { # >
7549            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7550            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7551            
7552        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7553          $self->{line_prev} = $self->{line};
7554          $self->{column_prev} = $self->{column};
7555          $self->{column}++;
7556          $self->{nc}
7557              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7558        } else {
7559          $self->{set_nc}->($self);
7560        }
7561      
7562            return  ($self->{ct}); # ATTLIST
7563            redo A;
7564          } elsif ($self->{nc} == -1) {
7565            ## XML5: No parse error.
7566            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7567            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7568            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7569            
7570        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7571          $self->{line_prev} = $self->{line};
7572          $self->{column_prev} = $self->{column};
7573          $self->{column}++;
7574          $self->{nc}
7575              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7576        } else {
7577          $self->{set_nc}->($self);
7578        }
7579      
7580            return  ($self->{ct});
7581            redo A;
7582          } else {
7583            ## XML5: Not defined yet.
7584            if ($self->{ca}->{default} eq 'FIXED') {
7585              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7586            } else {
7587              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7588              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7589            }
7590            ## Reconsume.
7591            redo A;
7592          }
7593        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7594          if ($is_space->{$self->{nc}} or
7595              $self->{nc} == -1 or
7596              $self->{nc} == 0x003E) { # >
7597            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7598            ## Reconsume.
7599            redo A;
7600          } else {
7601            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7602            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7603            ## Reconsume.
7604            redo A;
7605          }
7606        } elsif ($self->{state} == NDATA_STATE) {
7607          ## ASCII case-insensitive
7608          if ($self->{nc} == [
7609                undef,
7610                0x0044, # D
7611                0x0041, # A
7612                0x0054, # T
7613              ]->[length $self->{kwd}] or
7614              $self->{nc} == [
7615                undef,
7616                0x0064, # d
7617                0x0061, # a
7618                0x0074, # t
7619              ]->[length $self->{kwd}]) {
7620            
7621            ## Stay in the state.
7622            $self->{kwd} .= chr $self->{nc};
7623            
7624        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7625          $self->{line_prev} = $self->{line};
7626          $self->{column_prev} = $self->{column};
7627          $self->{column}++;
7628          $self->{nc}
7629              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7630        } else {
7631          $self->{set_nc}->($self);
7632        }
7633      
7634            redo A;
7635          } elsif ((length $self->{kwd}) == 4 and
7636                   ($self->{nc} == 0x0041 or # A
7637                    $self->{nc} == 0x0061)) { # a
7638            if ($self->{kwd} ne 'NDAT' or $self->{nc} == 0x0061) { # a
7639              
7640              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
7641                              text => 'NDATA',
7642                              line => $self->{line_prev},
7643                              column => $self->{column_prev} - 4);
7644            } else {
7645              
7646            }
7647            $self->{state} = AFTER_NDATA_STATE;
7648            
7649        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7650          $self->{line_prev} = $self->{line};
7651          $self->{column_prev} = $self->{column};
7652          $self->{column}++;
7653          $self->{nc}
7654              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7655        } else {
7656          $self->{set_nc}->($self);
7657        }
7658      
7659            redo A;
7660          } else {
7661            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7662                            line => $self->{line_prev},
7663                            column => $self->{column_prev} + 1
7664                                - length $self->{kwd});
7665            
7666            $self->{state} = BOGUS_MD_STATE;
7667            ## Reconsume.
7668            redo A;
7669          }
7670        } elsif ($self->{state} == AFTER_NDATA_STATE) {
7671          if ($is_space->{$self->{nc}}) {
7672            $self->{state} = BEFORE_NOTATION_NAME_STATE;
7673            
7674        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7675          $self->{line_prev} = $self->{line};
7676          $self->{column_prev} = $self->{column};
7677          $self->{column}++;
7678          $self->{nc}
7679              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7680        } else {
7681          $self->{set_nc}->($self);
7682        }
7683      
7684            redo A;
7685          } elsif ($self->{nc} == 0x003E) { # >
7686            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7687            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7688            
7689        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7690          $self->{line_prev} = $self->{line};
7691          $self->{column_prev} = $self->{column};
7692          $self->{column}++;
7693          $self->{nc}
7694              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7695        } else {
7696          $self->{set_nc}->($self);
7697        }
7698      
7699            return  ($self->{ct}); # ENTITY
7700            redo A;
7701          } elsif ($self->{nc} == -1) {
7702            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7703            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7704            
7705        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7706          $self->{line_prev} = $self->{line};
7707          $self->{column_prev} = $self->{column};
7708          $self->{column}++;
7709          $self->{nc}
7710              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7711        } else {
7712          $self->{set_nc}->($self);
7713        }
7714      
7715            return  ($self->{ct}); # ENTITY
7716            redo A;
7717          } else {
7718            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after literal', ## TODO: type
7719                            line => $self->{line_prev},
7720                            column => $self->{column_prev} + 1
7721                                - length $self->{kwd});
7722            $self->{state} = BOGUS_MD_STATE;
7723            ## Reconsume.
7724            redo A;
7725          }
7726        } elsif ($self->{state} == BEFORE_NOTATION_NAME_STATE) {
7727          if ($is_space->{$self->{nc}}) {
7728            ## Stay in the state.
7729            
7730        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7731          $self->{line_prev} = $self->{line};
7732          $self->{column_prev} = $self->{column};
7733          $self->{column}++;
7734          $self->{nc}
7735              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7736        } else {
7737          $self->{set_nc}->($self);
7738        }
7739      
7740            redo A;
7741          } elsif ($self->{nc} == 0x003E) { # >
7742            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no notation name'); ## TODO: type
7743            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7744            
7745        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7746          $self->{line_prev} = $self->{line};
7747          $self->{column_prev} = $self->{column};
7748          $self->{column}++;
7749          $self->{nc}
7750              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7751        } else {
7752          $self->{set_nc}->($self);
7753        }
7754      
7755            return  ($self->{ct}); # ENTITY
7756            redo A;
7757          } elsif ($self->{nc} == -1) {
7758            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7759            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7760            
7761        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7762          $self->{line_prev} = $self->{line};
7763          $self->{column_prev} = $self->{column};
7764          $self->{column}++;
7765          $self->{nc}
7766              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7767        } else {
7768          $self->{set_nc}->($self);
7769        }
7770      
7771            return  ($self->{ct}); # ENTITY
7772            redo A;
7773          } else {
7774            $self->{ct}->{notation} = chr $self->{nc}; # ENTITY
7775            $self->{state} = NOTATION_NAME_STATE;
7776            
7777        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7778          $self->{line_prev} = $self->{line};
7779          $self->{column_prev} = $self->{column};
7780          $self->{column}++;
7781          $self->{nc}
7782              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7783        } else {
7784          $self->{set_nc}->($self);
7785        }
7786      
7787            redo A;
7788          }
7789        } elsif ($self->{state} == NOTATION_NAME_STATE) {
7790          if ($is_space->{$self->{nc}}) {
7791            $self->{state} = AFTER_MD_DEF_STATE;
7792            
7793        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7794          $self->{line_prev} = $self->{line};
7795          $self->{column_prev} = $self->{column};
7796          $self->{column}++;
7797          $self->{nc}
7798              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7799        } else {
7800          $self->{set_nc}->($self);
7801        }
7802      
7803            redo A;
7804          } elsif ($self->{nc} == 0x003E) { # >
7805            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7806            
7807        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7808          $self->{line_prev} = $self->{line};
7809          $self->{column_prev} = $self->{column};
7810          $self->{column}++;
7811          $self->{nc}
7812              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7813        } else {
7814          $self->{set_nc}->($self);
7815        }
7816      
7817            return  ($self->{ct}); # ENTITY
7818            redo A;
7819          } elsif ($self->{nc} == -1) {
7820            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7821            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7822            
7823        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7824          $self->{line_prev} = $self->{line};
7825          $self->{column_prev} = $self->{column};
7826          $self->{column}++;
7827          $self->{nc}
7828              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7829        } else {
7830          $self->{set_nc}->($self);
7831        }
7832      
7833            return  ($self->{ct}); # ENTITY
7834            redo A;
7835          } else {
7836            $self->{ct}->{notation} .= chr $self->{nc}; # ENTITY
7837            ## Stay in the state.
7838            
7839        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7840          $self->{line_prev} = $self->{line};
7841          $self->{column_prev} = $self->{column};
7842          $self->{column}++;
7843          $self->{nc}
7844              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7845        } else {
7846          $self->{set_nc}->($self);
7847        }
7848      
7849            redo A;
7850          }
7851        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
7852          if ($self->{nc} == 0x0022) { # "
7853            $self->{state} = AFTER_MD_DEF_STATE;
7854            
7855        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7856          $self->{line_prev} = $self->{line};
7857          $self->{column_prev} = $self->{column};
7858          $self->{column}++;
7859          $self->{nc}
7860              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7861        } else {
7862          $self->{set_nc}->($self);
7863        }
7864      
7865            redo A;
7866          } elsif ($self->{nc} == 0x0026) { # &
7867            $self->{prev_state} = $self->{state};
7868            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7869            $self->{entity_add} = 0x0022; # "
7870            
7871        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7872          $self->{line_prev} = $self->{line};
7873          $self->{column_prev} = $self->{column};
7874          $self->{column}++;
7875          $self->{nc}
7876              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7877        } else {
7878          $self->{set_nc}->($self);
7879        }
7880      
7881            redo A;
7882    ## TODO: %
7883          } elsif ($self->{nc} == -1) {
7884            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7885            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7886            ## Reconsume.
7887            return  ($self->{ct}); # ENTITY
7888            redo A;
7889          } else {
7890            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7891            
7892        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7893          $self->{line_prev} = $self->{line};
7894          $self->{column_prev} = $self->{column};
7895          $self->{column}++;
7896          $self->{nc}
7897              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7898        } else {
7899          $self->{set_nc}->($self);
7900        }
7901      
7902            redo A;
7903          }
7904        } elsif ($self->{state} == DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
7905          if ($self->{nc} == 0x0027) { # '
7906            $self->{state} = AFTER_MD_DEF_STATE;
7907            
7908        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7909          $self->{line_prev} = $self->{line};
7910          $self->{column_prev} = $self->{column};
7911          $self->{column}++;
7912          $self->{nc}
7913              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7914        } else {
7915          $self->{set_nc}->($self);
7916        }
7917      
7918            redo A;
7919          } elsif ($self->{nc} == 0x0026) { # &
7920            $self->{prev_state} = $self->{state};
7921            $self->{state} = ENTITY_VALUE_ENTITY_STATE;
7922            $self->{entity_add} = 0x0027; # '
7923            
7924        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7925          $self->{line_prev} = $self->{line};
7926          $self->{column_prev} = $self->{column};
7927          $self->{column}++;
7928          $self->{nc}
7929              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7930        } else {
7931          $self->{set_nc}->($self);
7932        }
7933      
7934            redo A;
7935    ## TODO: %
7936          } elsif ($self->{nc} == -1) {
7937            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed entity value'); ## TODO: type
7938            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7939            ## Reconsume.
7940            return  ($self->{ct}); # ENTITY
7941            redo A;
7942          } else {
7943            $self->{ct}->{value} .= chr $self->{nc}; # ENTITY
7944            
7945        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7946          $self->{line_prev} = $self->{line};
7947          $self->{column_prev} = $self->{column};
7948          $self->{column}++;
7949          $self->{nc}
7950              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7951        } else {
7952          $self->{set_nc}->($self);
7953        }
7954      
7955            redo A;
7956          }
7957        } elsif ($self->{state} == ENTITY_VALUE_ENTITY_STATE) {
7958          if ($is_space->{$self->{nc}} or
7959              {
7960                0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
7961                $self->{entity_add} => 1,
7962              }->{$self->{nc}}) {
7963            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
7964                            line => $self->{line_prev},
7965                            column => $self->{column_prev}
7966                                + ($self->{nc} == -1 ? 1 : 0));
7967            ## Don't consume
7968            ## Return nothing.
7969            #
7970          } elsif ($self->{nc} == 0x0023) { # #
7971            $self->{ca} = $self->{ct};
7972            $self->{state} = ENTITY_HASH_STATE;
7973            $self->{kwd} = '#';
7974            
7975        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7976          $self->{line_prev} = $self->{line};
7977          $self->{column_prev} = $self->{column};
7978          $self->{column}++;
7979          $self->{nc}
7980              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7981        } else {
7982          $self->{set_nc}->($self);
7983        }
7984      
7985            redo A;
7986          } else {
7987            #
7988          }
7989    
7990          $self->{ct}->{value} .= '&';
7991          $self->{state} = $self->{prev_state};
7992          ## Reconsume.
7993          redo A;
7994        } elsif ($self->{state} == AFTER_ELEMENT_NAME_STATE) {
7995          if ($is_space->{$self->{nc}}) {
7996            $self->{state} = BEFORE_ELEMENT_CONTENT_STATE;
7997            
7998        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7999          $self->{line_prev} = $self->{line};
8000          $self->{column_prev} = $self->{column};
8001          $self->{column}++;
8002          $self->{nc}
8003              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8004        } else {
8005          $self->{set_nc}->($self);
8006        }
8007      
8008            redo A;
8009          } elsif ($self->{nc} == 0x0028) { # (
8010            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8011            $self->{ct}->{content} = ['('];
8012            $self->{group_depth} = 1;
8013            
8014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8015          $self->{line_prev} = $self->{line};
8016          $self->{column_prev} = $self->{column};
8017          $self->{column}++;
8018          $self->{nc}
8019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8020        } else {
8021          $self->{set_nc}->($self);
8022        }
8023      
8024            redo A;
8025          } elsif ($self->{nc} == 0x003E) { # >
8026            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
8027            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8028            
8029        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8030          $self->{line_prev} = $self->{line};
8031          $self->{column_prev} = $self->{column};
8032          $self->{column}++;
8033          $self->{nc}
8034              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8035        } else {
8036          $self->{set_nc}->($self);
8037        }
8038      
8039            return  ($self->{ct}); # ELEMENT
8040            redo A;
8041          } elsif ($self->{nc} == -1) {
8042            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8043            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8044            
8045        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8046          $self->{line_prev} = $self->{line};
8047          $self->{column_prev} = $self->{column};
8048          $self->{column}++;
8049          $self->{nc}
8050              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8051        } else {
8052          $self->{set_nc}->($self);
8053        }
8054      
8055            return  ($self->{ct}); # ELEMENT
8056            redo A;
8057          } else {
8058            $self->{ct}->{content} = [chr $self->{nc}];
8059            $self->{state} = CONTENT_KEYWORD_STATE;
8060            
8061        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8062          $self->{line_prev} = $self->{line};
8063          $self->{column_prev} = $self->{column};
8064          $self->{column}++;
8065          $self->{nc}
8066              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8067        } else {
8068          $self->{set_nc}->($self);
8069        }
8070      
8071            redo A;
8072          }
8073        } elsif ($self->{state} == CONTENT_KEYWORD_STATE) {
8074          if ($is_space->{$self->{nc}}) {
8075            $self->{state} = AFTER_MD_DEF_STATE;
8076            
8077        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8078          $self->{line_prev} = $self->{line};
8079          $self->{column_prev} = $self->{column};
8080          $self->{column}++;
8081          $self->{nc}
8082              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8083        } else {
8084          $self->{set_nc}->($self);
8085        }
8086      
8087            redo A;
8088          } elsif ($self->{nc} == 0x003E) { # >
8089            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8090                    
8091        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8092          $self->{line_prev} = $self->{line};
8093          $self->{column_prev} = $self->{column};
8094          $self->{column}++;
8095          $self->{nc}
8096              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8097        } else {
8098          $self->{set_nc}->($self);
8099        }
8100      
8101            return  ($self->{ct}); # ELEMENT
8102            redo A;
8103          } elsif ($self->{nc} == -1) {
8104            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8105            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8106            
8107        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8108          $self->{line_prev} = $self->{line};
8109          $self->{column_prev} = $self->{column};
8110          $self->{column}++;
8111          $self->{nc}
8112              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8113        } else {
8114          $self->{set_nc}->($self);
8115        }
8116      
8117            return  ($self->{ct}); # ELEMENT
8118            redo A;
8119          } else {
8120            $self->{ct}->{content}->[-1] .= chr $self->{nc}; # ELEMENT
8121            ## Stay in the state.
8122            
8123        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8124          $self->{line_prev} = $self->{line};
8125          $self->{column_prev} = $self->{column};
8126          $self->{column}++;
8127          $self->{nc}
8128              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8129        } else {
8130          $self->{set_nc}->($self);
8131        }
8132      
8133            redo A;
8134          }
8135        } elsif ($self->{state} == AFTER_CM_GROUP_OPEN_STATE) {
8136          if ($is_space->{$self->{nc}}) {
8137            ## Stay in the state.
8138            
8139        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8140          $self->{line_prev} = $self->{line};
8141          $self->{column_prev} = $self->{column};
8142          $self->{column}++;
8143          $self->{nc}
8144              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8145        } else {
8146          $self->{set_nc}->($self);
8147        }
8148      
8149            redo A;
8150          } elsif ($self->{nc} == 0x0028) { # (
8151            $self->{group_depth}++;
8152            push @{$self->{ct}->{content}}, chr $self->{nc};
8153            ## Stay in the state.
8154            
8155        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8156          $self->{line_prev} = $self->{line};
8157          $self->{column_prev} = $self->{column};
8158          $self->{column}++;
8159          $self->{nc}
8160              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8161        } else {
8162          $self->{set_nc}->($self);
8163        }
8164      
8165            redo A;
8166          } elsif ($self->{nc} == 0x007C or # |
8167                   $self->{nc} == 0x002C) { # ,
8168            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8169            ## Stay in the state.
8170            
8171        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8172          $self->{line_prev} = $self->{line};
8173          $self->{column_prev} = $self->{column};
8174          $self->{column}++;
8175          $self->{nc}
8176              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8177        } else {
8178          $self->{set_nc}->($self);
8179        }
8180      
8181            redo A;
8182          } elsif ($self->{nc} == 0x0029) { # )
8183            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty element name'); ## TODO: type
8184            push @{$self->{ct}->{content}}, chr $self->{nc};
8185            $self->{group_depth}--;
8186            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8187            
8188        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8189          $self->{line_prev} = $self->{line};
8190          $self->{column_prev} = $self->{column};
8191          $self->{column}++;
8192          $self->{nc}
8193              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8194        } else {
8195          $self->{set_nc}->($self);
8196        }
8197      
8198            redo A;
8199          } elsif ($self->{nc} == 0x003E) { # >
8200            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8201            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8202            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8203            
8204        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8205          $self->{line_prev} = $self->{line};
8206          $self->{column_prev} = $self->{column};
8207          $self->{column}++;
8208          $self->{nc}
8209              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8210        } else {
8211          $self->{set_nc}->($self);
8212        }
8213      
8214            return  ($self->{ct}); # ELEMENT
8215            redo A;
8216          } elsif ($self->{nc} == -1) {
8217            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8218            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8219            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8220            
8221        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8222          $self->{line_prev} = $self->{line};
8223          $self->{column_prev} = $self->{column};
8224          $self->{column}++;
8225          $self->{nc}
8226              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8227        } else {
8228          $self->{set_nc}->($self);
8229        }
8230      
8231            return  ($self->{ct}); # ELEMENT
8232            redo A;
8233          } else {
8234            push @{$self->{ct}->{content}}, chr $self->{nc};
8235            $self->{state} = CM_ELEMENT_NAME_STATE;
8236            
8237        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8238          $self->{line_prev} = $self->{line};
8239          $self->{column_prev} = $self->{column};
8240          $self->{column}++;
8241          $self->{nc}
8242              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8243        } else {
8244          $self->{set_nc}->($self);
8245        }
8246      
8247            redo A;
8248          }
8249        } elsif ($self->{state} == CM_ELEMENT_NAME_STATE) {
8250          if ($is_space->{$self->{nc}}) {
8251            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8252            
8253        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8254          $self->{line_prev} = $self->{line};
8255          $self->{column_prev} = $self->{column};
8256          $self->{column}++;
8257          $self->{nc}
8258              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8259        } else {
8260          $self->{set_nc}->($self);
8261        }
8262      
8263            redo A;
8264          } elsif ($self->{nc} == 0x002A or # *
8265                   $self->{nc} == 0x002B or # +
8266                   $self->{nc} == 0x003F) { # ?
8267            push @{$self->{ct}->{content}}, chr $self->{nc};
8268            $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8269            
8270        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8271          $self->{line_prev} = $self->{line};
8272          $self->{column_prev} = $self->{column};
8273          $self->{column}++;
8274          $self->{nc}
8275              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8276        } else {
8277          $self->{set_nc}->($self);
8278        }
8279      
8280            redo A;
8281          } elsif ($self->{nc} == 0x007C or # |
8282                   $self->{nc} == 0x002C) { # ,
8283            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8284            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8285            
8286        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8287          $self->{line_prev} = $self->{line};
8288          $self->{column_prev} = $self->{column};
8289          $self->{column}++;
8290          $self->{nc}
8291              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8292        } else {
8293          $self->{set_nc}->($self);
8294        }
8295      
8296            redo A;
8297          } elsif ($self->{nc} == 0x0029) { # )
8298            $self->{group_depth}--;
8299            push @{$self->{ct}->{content}}, chr $self->{nc};
8300            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8301            
8302        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8303          $self->{line_prev} = $self->{line};
8304          $self->{column_prev} = $self->{column};
8305          $self->{column}++;
8306          $self->{nc}
8307              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8308        } else {
8309          $self->{set_nc}->($self);
8310        }
8311      
8312            redo A;
8313          } elsif ($self->{nc} == 0x003E) { # >
8314            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8315            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8316            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8317            
8318        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8319          $self->{line_prev} = $self->{line};
8320          $self->{column_prev} = $self->{column};
8321          $self->{column}++;
8322          $self->{nc}
8323              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8324        } else {
8325          $self->{set_nc}->($self);
8326        }
8327      
8328            return  ($self->{ct}); # ELEMENT
8329            redo A;
8330          } elsif ($self->{nc} == -1) {
8331            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8332            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8333            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8334            
8335        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8336          $self->{line_prev} = $self->{line};
8337          $self->{column_prev} = $self->{column};
8338          $self->{column}++;
8339          $self->{nc}
8340              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8341        } else {
8342          $self->{set_nc}->($self);
8343        }
8344      
8345            return  ($self->{ct}); # ELEMENT
8346            redo A;
8347          } else {
8348            $self->{ct}->{content}->[-1] .= chr $self->{nc};
8349            ## Stay in the state.
8350            
8351        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8352          $self->{line_prev} = $self->{line};
8353          $self->{column_prev} = $self->{column};
8354          $self->{column}++;
8355          $self->{nc}
8356              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8357        } else {
8358          $self->{set_nc}->($self);
8359        }
8360      
8361            redo A;
8362          }
8363        } elsif ($self->{state} == AFTER_CM_ELEMENT_NAME_STATE) {
8364          if ($is_space->{$self->{nc}}) {
8365            ## Stay in the state.
8366            
8367        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8368          $self->{line_prev} = $self->{line};
8369          $self->{column_prev} = $self->{column};
8370          $self->{column}++;
8371          $self->{nc}
8372              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8373        } else {
8374          $self->{set_nc}->($self);
8375        }
8376      
8377            redo A;
8378          } elsif ($self->{nc} == 0x007C or # |
8379                   $self->{nc} == 0x002C) { # ,
8380            push @{$self->{ct}->{content}}, $self->{nc} == 0x007C ? ' | ' : ', ';
8381            $self->{state} = AFTER_CM_GROUP_OPEN_STATE;
8382            
8383        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8384          $self->{line_prev} = $self->{line};
8385          $self->{column_prev} = $self->{column};
8386          $self->{column}++;
8387          $self->{nc}
8388              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8389        } else {
8390          $self->{set_nc}->($self);
8391        }
8392      
8393            redo A;
8394          } elsif ($self->{nc} == 0x0029) { # )
8395            $self->{group_depth}--;
8396            push @{$self->{ct}->{content}}, chr $self->{nc};
8397            $self->{state} = AFTER_CM_GROUP_CLOSE_STATE;
8398            
8399        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8400          $self->{line_prev} = $self->{line};
8401          $self->{column_prev} = $self->{column};
8402          $self->{column}++;
8403          $self->{nc}
8404              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8405        } else {
8406          $self->{set_nc}->($self);
8407        }
8408      
8409            redo A;
8410          } elsif ($self->{nc} == 0x003E) { # >
8411            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8412            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8413            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8414            
8415        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8416          $self->{line_prev} = $self->{line};
8417          $self->{column_prev} = $self->{column};
8418          $self->{column}++;
8419          $self->{nc}
8420              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8421        } else {
8422          $self->{set_nc}->($self);
8423        }
8424      
8425            return  ($self->{ct}); # ELEMENT
8426            redo A;
8427          } elsif ($self->{nc} == -1) {
8428            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8429            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8430            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8431            
8432        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8433          $self->{line_prev} = $self->{line};
8434          $self->{column_prev} = $self->{column};
8435          $self->{column}++;
8436          $self->{nc}
8437              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8438        } else {
8439          $self->{set_nc}->($self);
8440        }
8441      
8442            return  ($self->{ct}); # ELEMENT
8443            redo A;
8444          } else {
8445            $self->{parse_error}->(level => $self->{level}->{must}, type => 'after element name'); ## TODO: type
8446            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8447            $self->{state} = BOGUS_MD_STATE;
8448            
8449        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8450          $self->{line_prev} = $self->{line};
8451          $self->{column_prev} = $self->{column};
8452          $self->{column}++;
8453          $self->{nc}
8454              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8455        } else {
8456          $self->{set_nc}->($self);
8457        }
8458      
8459            redo A;
8460          }
8461        } elsif ($self->{state} == AFTER_CM_GROUP_CLOSE_STATE) {
8462          if ($is_space->{$self->{nc}}) {
8463            if ($self->{group_depth}) {
8464              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8465            } else {
8466              $self->{state} = AFTER_MD_DEF_STATE;
8467            }
8468            
8469        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8470          $self->{line_prev} = $self->{line};
8471          $self->{column_prev} = $self->{column};
8472          $self->{column}++;
8473          $self->{nc}
8474              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8475        } else {
8476          $self->{set_nc}->($self);
8477        }
8478      
8479            redo A;
8480          } elsif ($self->{nc} == 0x002A or # *
8481                   $self->{nc} == 0x002B or # +
8482                   $self->{nc} == 0x003F) { # ?
8483            push @{$self->{ct}->{content}}, chr $self->{nc};
8484            if ($self->{group_depth}) {
8485              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8486            } else {
8487              $self->{state} = AFTER_MD_DEF_STATE;
8488            }
8489            
8490        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8491          $self->{line_prev} = $self->{line};
8492          $self->{column_prev} = $self->{column};
8493          $self->{column}++;
8494          $self->{nc}
8495              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8496        } else {
8497          $self->{set_nc}->($self);
8498        }
8499      
8500            redo A;
8501          } elsif ($self->{nc} == 0x0029) { # )
8502            if ($self->{group_depth}) {
8503              $self->{group_depth}--;
8504              push @{$self->{ct}->{content}}, chr $self->{nc};
8505              ## Stay in the state.
8506              
8507        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8508          $self->{line_prev} = $self->{line};
8509          $self->{column_prev} = $self->{column};
8510          $self->{column}++;
8511          $self->{nc}
8512              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8513        } else {
8514          $self->{set_nc}->($self);
8515        }
8516      
8517              redo A;
8518            } else {
8519              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8520              $self->{state} = BOGUS_MD_STATE;
8521              ## Reconsume.
8522              redo A;
8523            }
8524          } elsif ($self->{nc} == 0x003E) { # >
8525            if ($self->{group_depth}) {
8526              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed cm group'); ## TODO: type
8527              push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8528            }
8529            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8530            
8531        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8532          $self->{line_prev} = $self->{line};
8533          $self->{column_prev} = $self->{column};
8534          $self->{column}++;
8535          $self->{nc}
8536              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8537        } else {
8538          $self->{set_nc}->($self);
8539        }
8540      
8541            return  ($self->{ct}); # ELEMENT
8542            redo A;
8543          } elsif ($self->{nc} == -1) {
8544            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8545            push @{$self->{ct}->{content}}, (')') x $self->{group_depth};
8546            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8547            
8548        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8549          $self->{line_prev} = $self->{line};
8550          $self->{column_prev} = $self->{column};
8551          $self->{column}++;
8552          $self->{nc}
8553              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8554        } else {
8555          $self->{set_nc}->($self);
8556        }
8557      
8558            return  ($self->{ct}); # ELEMENT
8559            redo A;
8560          } else {
8561            if ($self->{group_depth}) {
8562              $self->{state} = AFTER_CM_ELEMENT_NAME_STATE;
8563            } else {
8564              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8565              $self->{state} = BOGUS_MD_STATE;
8566            }
8567            ## Reconsume.
8568            redo A;
8569          }
8570        } elsif ($self->{state} == AFTER_MD_DEF_STATE) {
8571          if ($is_space->{$self->{nc}}) {
8572            ## Stay in the state.
8573            
8574        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8575          $self->{line_prev} = $self->{line};
8576          $self->{column_prev} = $self->{column};
8577          $self->{column}++;
8578          $self->{nc}
8579              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8580        } else {
8581          $self->{set_nc}->($self);
8582        }
8583      
8584            redo A;
8585          } elsif ($self->{nc} == 0x003E) { # >
8586            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8587            
8588        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8589          $self->{line_prev} = $self->{line};
8590          $self->{column_prev} = $self->{column};
8591          $self->{column}++;
8592          $self->{nc}
8593              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8594        } else {
8595          $self->{set_nc}->($self);
8596        }
8597      
8598            return  ($self->{ct}); # ENTITY/ELEMENT
8599            redo A;
8600          } elsif ($self->{nc} == -1) {
8601            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
8602            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8603            
8604        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8605          $self->{line_prev} = $self->{line};
8606          $self->{column_prev} = $self->{column};
8607          $self->{column}++;
8608          $self->{nc}
8609              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8610        } else {
8611          $self->{set_nc}->($self);
8612        }
8613      
8614            return  ($self->{ct}); # ENTITY/ELEMENT
8615            redo A;
8616          } else {
8617            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after md def'); ## TODO: type
8618            $self->{state} = BOGUS_MD_STATE;
8619            ## Reconsume.
8620            redo A;
8621          }
8622        } elsif ($self->{state} == BOGUS_MD_STATE) {
8623          if ($self->{nc} == 0x003E) { # >
8624            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8625            
8626        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8627          $self->{line_prev} = $self->{line};
8628          $self->{column_prev} = $self->{column};
8629          $self->{column}++;
8630          $self->{nc}
8631              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8632        } else {
8633          $self->{set_nc}->($self);
8634        }
8635      
8636            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8637            redo A;
8638          } elsif ($self->{nc} == -1) {
8639            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
8640            ## Reconsume.
8641            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
8642            redo A;
8643          } else {
8644            ## Stay in the state.
8645            
8646        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
8647          $self->{line_prev} = $self->{line};
8648          $self->{column_prev} = $self->{column};
8649          $self->{column}++;
8650          $self->{nc}
8651              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
8652        } else {
8653          $self->{set_nc}->($self);
8654        }
8655      
8656            redo A;
8657          }
8658      } else {      } else {
8659        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
8660      }      }
# Line 4558  sub _get_next_token ($) { Line 8665  sub _get_next_token ($) {
8665    
8666  1;  1;
8667  ## $Date$  ## $Date$
8668                                    

Legend:
Removed from v.1.9  
changed lines
  Added in v.1.28

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24