/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.9 by wakaba, Wed Oct 15 08:05:47 2008 UTC revision 1.16 by wakaba, Sat Oct 18 11:34:49 2008 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145  ## XML states  ## XML-only states
146  sub PI_STATE () { 51 }  sub PI_STATE () { 51 }
147  sub PI_TARGET_STATE () { 52 }  sub PI_TARGET_STATE () { 52 }
148  sub PI_TARGET_AFTER_STATE () { 53 }  sub PI_TARGET_AFTER_STATE () { 53 }
149  sub PI_DATA_STATE () { 54 }  sub PI_DATA_STATE () { 54 }
150  sub PI_AFTER_STATE () { 55 }  sub PI_AFTER_STATE () { 55 }
151  sub PI_DATA_AFTER_STATE () { 56 }  sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168    sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170    sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171    sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172    sub ALLOWED_TOKEN_STATE () { 77 }
173    sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174    sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175    sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178    sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179    sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180    sub BOGUS_MD_STATE () { 85 }
181    
182  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
183  ## list and descriptions)  ## list and descriptions)
# Line 186  sub _initialize_tokenizer ($) { Line 243  sub _initialize_tokenizer ($) {
243    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
244    
245    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
246    $self->{s_kwd} = ''; # state keyword    $self->{s_kwd} = ''; # Data state keyword
247      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
248    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
249    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
250    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 216  sub _initialize_tokenizer ($) { Line 274  sub _initialize_tokenizer ($) {
274    
275  ## A token has:  ## A token has:
276  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
277  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
278  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
279  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
280    ##   ->{target} (PI_TOKEN)
281  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
282  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
283  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 226  sub _initialize_tokenizer ($) { Line 285  sub _initialize_tokenizer ($) {
285  ##        ->{name}  ##        ->{name}
286  ##        ->{value}  ##        ->{value}
287  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
288  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
289    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
290  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)  ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
291    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
292    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
293    
294  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
295  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
296  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 247  my $is_space = { Line 310  my $is_space = {
310    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
311    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
312    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
313    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
314    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
315    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
316  };  };
# Line 507  sub _get_next_token ($) { Line 570  sub _get_next_token ($) {
570        return  ($token);        return  ($token);
571        redo A;        redo A;
572      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
573          ## XML5: "tag state".
574    
575        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
576          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
577                        
# Line 525  sub _get_next_token ($) { Line 590  sub _get_next_token ($) {
590            redo A;            redo A;
591          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
592                        
593            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
594            #            #
595          } else {          } else {
596                        
597              $self->{s_kwd} = '';
598            #            #
599          }          }
600    
601          ## reconsume          ## reconsume
602          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
         $self->{s_kwd} = '';  
603          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
604                    line => $self->{line_prev},                    line => $self->{line_prev},
605                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 709  sub _get_next_token ($) { Line 774  sub _get_next_token ($) {
774        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
775        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
776    
777          ## XML5: "end tag state".
778    
779        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
780        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
781          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
782            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
783            $self->{s_kwd} = '';            $self->{kwd} = '';
784            ## Reconsume.            ## Reconsume.
785            redo A;            redo A;
786          } else {          } else {
# Line 770  sub _get_next_token ($) { Line 837  sub _get_next_token ($) {
837        
838          redo A;          redo A;
839        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
840          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
841                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
842                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
843          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
844          $self->{s_kwd} = '';          $self->{s_kwd} = '';
845                    if ($self->{is_xml}) {
846              
847              ## XML5: No parse error.
848              
849              ## NOTE: This parser raises a parse error, since it supports
850              ## XML1, not XML5.
851    
852              ## NOTE: A short end tag token.
853              my $ct = {type => END_TAG_TOKEN,
854                        tag_name => '',
855                        line => $self->{line_prev},
856                        column => $self->{column_prev} - 1,
857                       };
858              
859      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
860        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
861        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 787  sub _get_next_token ($) { Line 866  sub _get_next_token ($) {
866        $self->{set_nc}->($self);        $self->{set_nc}->($self);
867      }      }
868        
869              return  ($ct);
870            } else {
871              
872              
873        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
874          $self->{line_prev} = $self->{line};
875          $self->{column_prev} = $self->{column};
876          $self->{column}++;
877          $self->{nc}
878              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
879        } else {
880          $self->{set_nc}->($self);
881        }
882      
883            }
884          redo A;          redo A;
885        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
886                    
# Line 800  sub _get_next_token ($) { Line 894  sub _get_next_token ($) {
894                   });                   });
895    
896          redo A;          redo A;
897        } else {        } elsif (not $self->{is_xml} or
898                   $is_space->{$self->{nc}}) {
899                    
900          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
901                            line => $self->{line_prev}, # "<" of "</"
902                            column => $self->{column_prev} - 1);
903          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
904          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
905                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 815  sub _get_next_token ($) { Line 912  sub _get_next_token ($) {
912          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
913          ## "bogus comment state" entry.          ## "bogus comment state" entry.
914          redo A;          redo A;
915          } else {
916            ## XML5: "</:" is a parse error.
917            
918            $self->{ct} = {type => END_TAG_TOKEN,
919                           tag_name => chr ($self->{nc}),
920                           line => $l, column => $c};
921            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
922            
923        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
924          $self->{line_prev} = $self->{line};
925          $self->{column_prev} = $self->{column};
926          $self->{column}++;
927          $self->{nc}
928              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
929        } else {
930          $self->{set_nc}->($self);
931        }
932      
933            redo A;
934        }        }
935      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
936        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
937        if (length $ch) {        if (length $ch) {
938          my $CH = $ch;          my $CH = $ch;
939          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 825  sub _get_next_token ($) { Line 941  sub _get_next_token ($) {
941          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
942                        
943            ## Stay in the state.            ## Stay in the state.
944            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
945                        
946      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
947        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 844  sub _get_next_token ($) { Line 960  sub _get_next_token ($) {
960            $self->{s_kwd} = '';            $self->{s_kwd} = '';
961            ## Reconsume.            ## Reconsume.
962            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
963                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
964                      line => $self->{line_prev},                      line => $self->{line_prev},
965                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
966                     });                     });
967            redo A;            redo A;
968          }          }
# Line 862  sub _get_next_token ($) { Line 978  sub _get_next_token ($) {
978            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
979            $self->{s_kwd} = '';            $self->{s_kwd} = '';
980            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
981                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
982                      line => $self->{line_prev},                      line => $self->{line_prev},
983                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
984                     });                     });
985            redo A;            redo A;
986          } else {          } else {
# Line 873  sub _get_next_token ($) { Line 989  sub _get_next_token ($) {
989                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
990                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
991                   line => $self->{line_prev},                   line => $self->{line_prev},
992                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
993            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
994            ## Reconsume.            ## Reconsume.
995            redo A;            redo A;
# Line 1005  sub _get_next_token ($) { Line 1121  sub _get_next_token ($) {
1121          redo A;          redo A;
1122        }        }
1123      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1124          ## XML5: "Tag attribute name before state".
1125    
1126        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1127                    
1128          ## Stay in the state          ## Stay in the state
# Line 1117  sub _get_next_token ($) { Line 1235  sub _get_next_token ($) {
1235               0x003D => 1, # =               0x003D => 1, # =
1236              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1237                        
1238              ## XML5: Not a parse error.
1239            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1240          } else {          } else {
1241                        
1242              ## XML5: ":" raises a parse error and is ignored.
1243          }          }
1244          $self->{ca}          $self->{ca}
1245              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1140  sub _get_next_token ($) { Line 1260  sub _get_next_token ($) {
1260          redo A;          redo A;
1261        }        }
1262      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1263          ## XML5: "Tag attribute name state".
1264    
1265        my $before_leave = sub {        my $before_leave = sub {
1266          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1267              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1150  sub _get_next_token ($) { Line 1272  sub _get_next_token ($) {
1272                        
1273            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1274              = $self->{ca};              = $self->{ca};
1275              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1276          }          }
1277        }; # $before_leave        }; # $before_leave
1278    
# Line 1186  sub _get_next_token ($) { Line 1309  sub _get_next_token ($) {
1309        
1310          redo A;          redo A;
1311        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1312            if ($self->{is_xml}) {
1313              
1314              ## XML5: Not a parse error.
1315              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1316            } else {
1317              
1318            }
1319    
1320          $before_leave->();          $before_leave->();
1321          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1322                        
# Line 1235  sub _get_next_token ($) { Line 1366  sub _get_next_token ($) {
1366        
1367          redo A;          redo A;
1368        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1369            if ($self->{is_xml}) {
1370              
1371              ## XML5: Not a parse error.
1372              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1373            } else {
1374              
1375            }
1376                    
1377          $before_leave->();          $before_leave->();
1378          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1279  sub _get_next_token ($) { Line 1417  sub _get_next_token ($) {
1417          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1418              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1419                        
1420              ## XML5: Not a parse error.
1421            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1422          } else {          } else {
1423                        
# Line 1299  sub _get_next_token ($) { Line 1438  sub _get_next_token ($) {
1438          redo A;          redo A;
1439        }        }
1440      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1441          ## XML5: "Tag attribute name after state".
1442          
1443        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1444                    
1445          ## Stay in the state          ## Stay in the state
# Line 1330  sub _get_next_token ($) { Line 1471  sub _get_next_token ($) {
1471        
1472          redo A;          redo A;
1473        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1474            if ($self->{is_xml}) {
1475              
1476              ## XML5: Not a parse error.
1477              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1478            } else {
1479              
1480            }
1481    
1482          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1483                        
1484            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1383  sub _get_next_token ($) { Line 1532  sub _get_next_token ($) {
1532        
1533          redo A;          redo A;
1534        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1535            if ($self->{is_xml}) {
1536              
1537              ## XML5: Not a parse error.
1538              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1539            } else {
1540              
1541            }
1542                    
1543          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1544                    
# Line 1422  sub _get_next_token ($) { Line 1578  sub _get_next_token ($) {
1578    
1579          redo A;          redo A;
1580        } else {        } else {
1581            if ($self->{is_xml}) {
1582              
1583              ## XML5: Not a parse error.
1584              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1585            } else {
1586              
1587            }
1588    
1589          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1590              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1591                        
1592              ## XML5: Not a parse error.
1593            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1594          } else {          } else {
1595                        
# Line 1448  sub _get_next_token ($) { Line 1613  sub _get_next_token ($) {
1613          redo A;                  redo A;        
1614        }        }
1615      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1616          ## XML5: "Tag attribute value before state".
1617    
1618        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1619                    
1620          ## Stay in the state          ## Stay in the state
# Line 1559  sub _get_next_token ($) { Line 1726  sub _get_next_token ($) {
1726        } else {        } else {
1727          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D) { # =
1728                        
1729              ## XML5: Not a parse error.
1730            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1731            } elsif ($self->{is_xml}) {
1732              
1733              ## XML5: No parse error.
1734              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1735          } else {          } else {
1736                        
1737          }          }
# Line 1579  sub _get_next_token ($) { Line 1751  sub _get_next_token ($) {
1751          redo A;          redo A;
1752        }        }
1753      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1754          ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1755          ## ATTLIST attribute value double quoted state".
1756          
1757        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1758                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1759          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1760              ## XML5: "DOCTYPE ATTLIST name after state".
1761              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1762              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1763            } else {
1764              
1765              ## XML5: "Tag attribute name before state".
1766              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1767            }
1768                    
1769      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1770        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1596  sub _get_next_token ($) { Line 1779  sub _get_next_token ($) {
1779          redo A;          redo A;
1780        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1781                    
1782            ## XML5: Not defined yet.
1783    
1784          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1785          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1786          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1620  sub _get_next_token ($) { Line 1805  sub _get_next_token ($) {
1805          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1806                        
1807            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1808    
1809              $self->{state} = DATA_STATE;
1810              $self->{s_kwd} = '';
1811              ## reconsume
1812              return  ($self->{ct}); # start tag
1813              redo A;
1814          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1815            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1816            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1629  sub _get_next_token ($) { Line 1820  sub _get_next_token ($) {
1820              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1821                            
1822            }            }
1823    
1824              $self->{state} = DATA_STATE;
1825              $self->{s_kwd} = '';
1826              ## reconsume
1827              return  ($self->{ct}); # end tag
1828              redo A;
1829            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1830              ## XML5: No parse error above; not defined yet.
1831              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1832              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1833              ## Reconsume.
1834              return  ($self->{ct}); # ATTLIST
1835              redo A;
1836          } else {          } else {
1837            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1838          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1839        } else {        } else {
1840                    ## XML5 [ATTLIST]: Not defined yet.
1841            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1842              
1843              ## XML5: Not a parse error.
1844              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1845            } else {
1846              
1847            }
1848          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1849          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1850                                q["&],                                q["&<],
1851                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1852    
1853          ## Stay in the state          ## Stay in the state
# Line 1661  sub _get_next_token ($) { Line 1865  sub _get_next_token ($) {
1865          redo A;          redo A;
1866        }        }
1867      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1868          ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1869          ## ATTLIST attribute value single quoted state".
1870    
1871        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1872                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1873          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;            
1874              ## XML5: "DOCTYPE ATTLIST name after state".
1875              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1876              $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1877            } else {
1878              
1879              ## XML5: "Before attribute name state" (sic).
1880              $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1881            }
1882                    
1883      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1884        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1678  sub _get_next_token ($) { Line 1893  sub _get_next_token ($) {
1893          redo A;          redo A;
1894        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1895                    
1896            ## XML5: Not defined yet.
1897    
1898          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1899          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1900          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1702  sub _get_next_token ($) { Line 1919  sub _get_next_token ($) {
1919          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1920                        
1921            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
1922    
1923              $self->{state} = DATA_STATE;
1924              $self->{s_kwd} = '';
1925              ## reconsume
1926              return  ($self->{ct}); # start tag
1927              redo A;
1928          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1929            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1930            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1711  sub _get_next_token ($) { Line 1934  sub _get_next_token ($) {
1934              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
1935                            
1936            }            }
1937    
1938              $self->{state} = DATA_STATE;
1939              $self->{s_kwd} = '';
1940              ## reconsume
1941              return  ($self->{ct}); # end tag
1942              redo A;
1943            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1944              ## XML5: No parse error above; not defined yet.
1945              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1946              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1947              ## Reconsume.
1948              return  ($self->{ct}); # ATTLIST
1949              redo A;
1950          } else {          } else {
1951            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1952          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
1953        } else {        } else {
1954                    ## XML5 [ATTLIST]: Not defined yet.
1955            if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1956              
1957              ## XML5: Not a parse error.
1958              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1959            } else {
1960              
1961            }
1962          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1963          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1964                                q['&],                                q['&<],
1965                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1966    
1967          ## Stay in the state          ## Stay in the state
# Line 1743  sub _get_next_token ($) { Line 1979  sub _get_next_token ($) {
1979          redo A;          redo A;
1980        }        }
1981      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1982          ## XML5: "Tag attribute value unquoted state".
1983    
1984        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1985                    if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1986          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;            
1987              push @{$self->{ct}->{attrdefs}}, $self->{ca};
1988              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1989            } else {
1990              
1991              ## XML5: "Tag attribute name before state".
1992              $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1993            }
1994                    
1995      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
1996        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 1760  sub _get_next_token ($) { Line 2005  sub _get_next_token ($) {
2005          redo A;          redo A;
2006        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
2007                    
2008    
2009            ## XML5: Not defined yet.
2010    
2011          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
2012          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
2013          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1783  sub _get_next_token ($) { Line 2031  sub _get_next_token ($) {
2031          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2032                        
2033            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2034    
2035              $self->{state} = DATA_STATE;
2036              $self->{s_kwd} = '';
2037              
2038        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2039          $self->{line_prev} = $self->{line};
2040          $self->{column_prev} = $self->{column};
2041          $self->{column}++;
2042          $self->{nc}
2043              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2044        } else {
2045          $self->{set_nc}->($self);
2046        }
2047      
2048              return  ($self->{ct}); # start tag
2049              redo A;
2050          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2051            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2052            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
# Line 1792  sub _get_next_token ($) { Line 2056  sub _get_next_token ($) {
2056              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2057                            
2058            }            }
2059          } else {  
2060            die "$0: $self->{ct}->{type}: Unknown token type";            $self->{state} = DATA_STATE;
2061          }            $self->{s_kwd} = '';
2062          $self->{state} = DATA_STATE;            
         $self->{s_kwd} = '';  
           
2063      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2064        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
2065        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 1808  sub _get_next_token ($) { Line 2070  sub _get_next_token ($) {
2070        $self->{set_nc}->($self);        $self->{set_nc}->($self);
2071      }      }
2072        
2073              return  ($self->{ct}); # end tag
2074          return  ($self->{ct}); # start tag or end tag            redo A;
2075            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2076          redo A;            push @{$self->{ct}->{attrdefs}}, $self->{ca};
2077              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2078              
2079        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2080          $self->{line_prev} = $self->{line};
2081          $self->{column_prev} = $self->{column};
2082          $self->{column}++;
2083          $self->{nc}
2084              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2085        } else {
2086          $self->{set_nc}->($self);
2087        }
2088      
2089              return  ($self->{ct}); # ATTLIST
2090              redo A;
2091            } else {
2092              die "$0: $self->{ct}->{type}: Unknown token type";
2093            }
2094        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');  
2095          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
2096                        
2097              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2098            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
2099    
2100              $self->{state} = DATA_STATE;
2101              $self->{s_kwd} = '';
2102              ## reconsume
2103              return  ($self->{ct}); # start tag
2104              redo A;
2105          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {          } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
2106              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed tag');
2107            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
2108            if ($self->{ct}->{attributes}) {            if ($self->{ct}->{attributes}) {
2109                            
# Line 1826  sub _get_next_token ($) { Line 2112  sub _get_next_token ($) {
2112              ## NOTE: This state should never be reached.              ## NOTE: This state should never be reached.
2113                            
2114            }            }
2115    
2116              $self->{state} = DATA_STATE;
2117              $self->{s_kwd} = '';
2118              ## reconsume
2119              return  ($self->{ct}); # end tag
2120              redo A;
2121            } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
2122              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
2123              push @{$self->{ct}->{attrdefs}}, $self->{ca};
2124              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2125              ## Reconsume.
2126              return  ($self->{ct}); # ATTLIST
2127              redo A;
2128          } else {          } else {
2129            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2130          }          }
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
         ## reconsume  
   
         return  ($self->{ct}); # start tag or end tag  
   
         redo A;  
2131        } else {        } else {
2132          if ({          if ({
2133               0x0022 => 1, # "               0x0022 => 1, # "
# Line 1843  sub _get_next_token ($) { Line 2135  sub _get_next_token ($) {
2135               0x003D => 1, # =               0x003D => 1, # =
2136              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2137                        
2138              ## XML5: Not a parse error.
2139            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
2140          } else {          } else {
2141                        
# Line 1959  sub _get_next_token ($) { Line 2252  sub _get_next_token ($) {
2252          redo A;          redo A;
2253        }        }
2254      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2255          ## XML5: "Empty tag state".
2256    
2257        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2258          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2259                        
# Line 2010  sub _get_next_token ($) { Line 2305  sub _get_next_token ($) {
2305          } else {          } else {
2306            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2307          }          }
2308            ## XML5: "Tag attribute name before state".
2309          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2310          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2311          ## Reconsume.          ## Reconsume.
# Line 2024  sub _get_next_token ($) { Line 2320  sub _get_next_token ($) {
2320          redo A;          redo A;
2321        }        }
2322      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2323        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2324    
2325        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2326        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2327                
2328        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2329                    if ($self->{in_subset}) {
2330          $self->{state} = DATA_STATE;            
2331          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2332            } else {
2333              
2334              $self->{state} = DATA_STATE;
2335              $self->{s_kwd} = '';
2336            }
2337                    
2338      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2339        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2048  sub _get_next_token ($) { Line 2349  sub _get_next_token ($) {
2349          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2350          redo A;          redo A;
2351        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2352                    if ($self->{in_subset}) {
2353          $self->{state} = DATA_STATE;            
2354          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2355            } else {
2356              
2357              $self->{state} = DATA_STATE;
2358              $self->{s_kwd} = '';
2359            }
2360          ## reconsume          ## reconsume
2361    
2362          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2077  sub _get_next_token ($) { Line 2383  sub _get_next_token ($) {
2383          redo A;          redo A;
2384        }        }
2385      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2386        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2387                
2388        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2389                    
# Line 2099  sub _get_next_token ($) { Line 2405  sub _get_next_token ($) {
2405          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2406                    
2407          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2408          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2409                    
2410      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2411        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2118  sub _get_next_token ($) { Line 2424  sub _get_next_token ($) {
2424                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2425                                                    
2426          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2427          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2428                    
2429      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2430        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2152  sub _get_next_token ($) { Line 2458  sub _get_next_token ($) {
2458                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2459                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2460                                   };                                   };
2461          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2462                    
2463      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2464        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2188  sub _get_next_token ($) { Line 2494  sub _get_next_token ($) {
2494              0x0054, # T              0x0054, # T
2495              0x0059, # Y              0x0059, # Y
2496              0x0050, # P              0x0050, # P
2497            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2498            $self->{nc} == [            $self->{nc} == [
2499              undef,              undef,
2500              0x006F, # o              0x006F, # o
# Line 2196  sub _get_next_token ($) { Line 2502  sub _get_next_token ($) {
2502              0x0074, # t              0x0074, # t
2503              0x0079, # y              0x0079, # y
2504              0x0070, # p              0x0070, # p
2505            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2506                    
2507          ## Stay in the state.          ## Stay in the state.
2508          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2509                    
2510      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2511        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2212  sub _get_next_token ($) { Line 2518  sub _get_next_token ($) {
2518      }      }
2519        
2520          redo A;          redo A;
2521        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2522                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2523                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2524                    if ($self->{is_xml} and
2525                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2526              
2527              ## XML5: case-sensitive.
2528              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2529                              text => 'DOCTYPE',
2530                              line => $self->{line_prev},
2531                              column => $self->{column_prev} - 5);
2532            } else {
2533              
2534            }
2535          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2536          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2537                                    quirks => 1,                                    quirks => 1,
# Line 2238  sub _get_next_token ($) { Line 2554  sub _get_next_token ($) {
2554                                    
2555          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2556                          line => $self->{line_prev},                          line => $self->{line_prev},
2557                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2558          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2559          ## Reconsume.          ## Reconsume.
2560          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2561                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2562                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2563                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2564                                   };                                   };
2565          redo A;          redo A;
2566        }        }
# Line 2255  sub _get_next_token ($) { Line 2571  sub _get_next_token ($) {
2571              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2572              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2573              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2574            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2575                    
2576          ## Stay in the state.          ## Stay in the state.
2577          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2578                    
2579      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2580        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2271  sub _get_next_token ($) { Line 2587  sub _get_next_token ($) {
2587      }      }
2588        
2589          redo A;          redo A;
2590        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2591                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2592          if ($self->{is_xml} and          if ($self->{is_xml} and
2593              not $self->{tainted} and              not $self->{tainted} and
# Line 2306  sub _get_next_token ($) { Line 2622  sub _get_next_token ($) {
2622                    
2623          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2624                          line => $self->{line_prev},                          line => $self->{line_prev},
2625                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2626          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2627          ## Reconsume.          ## Reconsume.
2628          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2629                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2630                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2631                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2632                                   };                                   };
2633          redo A;          redo A;
2634        }        }
# Line 2333  sub _get_next_token ($) { Line 2649  sub _get_next_token ($) {
2649        
2650          redo A;          redo A;
2651        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2652          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2653          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2654          $self->{s_kwd} = '';            
2655              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2656            } else {
2657              
2658              $self->{state} = DATA_STATE;
2659              $self->{s_kwd} = '';
2660            }
2661                    
2662      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2663        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2353  sub _get_next_token ($) { Line 2674  sub _get_next_token ($) {
2674    
2675          redo A;          redo A;
2676        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2677          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2678          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2679          $self->{s_kwd} = '';            
2680              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2681            } else {
2682              
2683              $self->{state} = DATA_STATE;
2684              $self->{s_kwd} = '';
2685            }
2686          ## reconsume          ## reconsume
2687    
2688          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2397  sub _get_next_token ($) { Line 2723  sub _get_next_token ($) {
2723        
2724          redo A;          redo A;
2725        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2726          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2727          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2728          $self->{s_kwd} = '';            
2729              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2730            } else {
2731              
2732              $self->{state} = DATA_STATE;
2733              $self->{s_kwd} = '';
2734            }
2735                    
2736      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2737        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2417  sub _get_next_token ($) { Line 2748  sub _get_next_token ($) {
2748    
2749          redo A;          redo A;
2750        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2751          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2752          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2753          $self->{s_kwd} = '';            
2754              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2755            } else {
2756              
2757              $self->{state} = DATA_STATE;
2758              $self->{s_kwd} = '';
2759            }
2760          ## reconsume          ## reconsume
2761    
2762          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2445  sub _get_next_token ($) { Line 2781  sub _get_next_token ($) {
2781          redo A;          redo A;
2782        }        }
2783      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2784          ## XML5: "Comment state" and "DOCTYPE comment state".
2785    
2786        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2787                    
2788          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2461  sub _get_next_token ($) { Line 2799  sub _get_next_token ($) {
2799        
2800          redo A;          redo A;
2801        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2802          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2803          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2804          $self->{s_kwd} = '';            
2805              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2806            } else {
2807              
2808              $self->{state} = DATA_STATE;
2809              $self->{s_kwd} = '';
2810            }
2811          ## reconsume          ## reconsume
2812    
2813          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2492  sub _get_next_token ($) { Line 2835  sub _get_next_token ($) {
2835          redo A;          redo A;
2836        }        }
2837      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2838          ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2839    
2840        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2841                    
2842          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2508  sub _get_next_token ($) { Line 2853  sub _get_next_token ($) {
2853        
2854          redo A;          redo A;
2855        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2856          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2857          $self->{s_kwd} = '';          if ($self->{in_subset}) {
2858          $self->{state} = DATA_STATE;            
2859          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2860            } else {
2861              
2862              $self->{state} = DATA_STATE;
2863              $self->{s_kwd} = '';
2864            }
2865          ## reconsume          ## reconsume
2866    
2867          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2536  sub _get_next_token ($) { Line 2885  sub _get_next_token ($) {
2885          redo A;          redo A;
2886        }        }
2887      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2888          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2889    
2890        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2891                    if ($self->{in_subset}) {
2892          $self->{state} = DATA_STATE;            
2893          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2894            } else {
2895              
2896              $self->{state} = DATA_STATE;
2897              $self->{s_kwd} = '';
2898            }
2899                    
2900      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2901        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2557  sub _get_next_token ($) { Line 2913  sub _get_next_token ($) {
2913          redo A;          redo A;
2914        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2915                    
2916            ## XML5: Not a parse error.
2917          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2918                          line => $self->{line_prev},                          line => $self->{line_prev},
2919                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2575  sub _get_next_token ($) { Line 2932  sub _get_next_token ($) {
2932        
2933          redo A;          redo A;
2934        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2935          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2936          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2937          $self->{s_kwd} = '';            
2938              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2939            } else {
2940              
2941              $self->{state} = DATA_STATE;
2942              $self->{s_kwd} = '';
2943            }
2944          ## reconsume          ## reconsume
2945    
2946          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2586  sub _get_next_token ($) { Line 2948  sub _get_next_token ($) {
2948          redo A;          redo A;
2949        } else {        } else {
2950                    
2951            ## XML5: Not a parse error.
2952          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2953                          line => $self->{line_prev},                          line => $self->{line_prev},
2954                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2622  sub _get_next_token ($) { Line 2985  sub _get_next_token ($) {
2985          redo A;          redo A;
2986        } else {        } else {
2987                    
2988            ## XML5: Unless EOF, swith to the bogus comment state.
2989          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
2990          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2991          ## reconsume          ## reconsume
2992          redo A;          redo A;
2993        }        }
2994      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2995          ## XML5: "DOCTYPE root name before state".
2996    
2997        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2998                    
2999          ## Stay in the state          ## Stay in the state
# Line 2645  sub _get_next_token ($) { Line 3011  sub _get_next_token ($) {
3011          redo A;          redo A;
3012        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3013                    
3014            ## XML5: No parse error.
3015          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3016          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3017          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 2673  sub _get_next_token ($) { Line 3040  sub _get_next_token ($) {
3040          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
3041    
3042          redo A;          redo A;
3043          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3044            
3045            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
3046            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3047            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3048            $self->{in_subset} = 1;
3049            
3050        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3051          $self->{line_prev} = $self->{line};
3052          $self->{column_prev} = $self->{column};
3053          $self->{column}++;
3054          $self->{nc}
3055              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3056        } else {
3057          $self->{set_nc}->($self);
3058        }
3059      
3060            return  ($self->{ct}); # DOCTYPE
3061            redo A;
3062        } else {        } else {
3063                    
3064          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2692  sub _get_next_token ($) { Line 3078  sub _get_next_token ($) {
3078          redo A;          redo A;
3079        }        }
3080      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
3081  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
3082    
3083          ## ISSUE: Redundant "First," in the spec.
3084    
3085        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3086                    
3087          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2738  sub _get_next_token ($) { Line 3127  sub _get_next_token ($) {
3127          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3128    
3129          redo A;          redo A;
3130          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3131            
3132            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3133            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3134            $self->{in_subset} = 1;
3135            
3136        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3137          $self->{line_prev} = $self->{line};
3138          $self->{column_prev} = $self->{column};
3139          $self->{column}++;
3140          $self->{nc}
3141              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3142        } else {
3143          $self->{set_nc}->($self);
3144        }
3145      
3146            return  ($self->{ct}); # DOCTYPE
3147            redo A;
3148        } else {        } else {
3149                    
3150          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2757  sub _get_next_token ($) { Line 3164  sub _get_next_token ($) {
3164          redo A;          redo A;
3165        }        }
3166      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3167          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3168          ## state", but implemented differently.
3169    
3170        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3171                    
3172          ## Stay in the state          ## Stay in the state
# Line 2773  sub _get_next_token ($) { Line 3183  sub _get_next_token ($) {
3183        
3184          redo A;          redo A;
3185        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3186            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3187              
3188              $self->{state} = DATA_STATE;
3189              $self->{s_kwd} = '';
3190            } else {
3191              
3192              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
3193              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3194            }
3195                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3196                    
3197      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3198        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2787  sub _get_next_token ($) { Line 3204  sub _get_next_token ($) {
3204        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3205      }      }
3206        
3207            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3208          redo A;          redo A;
3209        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3210            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3211              
3212              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3213              $self->{state} = DATA_STATE;
3214              $self->{s_kwd} = '';
3215              $self->{ct}->{quirks} = 1;
3216            } else {
3217              
3218              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3219              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3220            }
3221                    
3222          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');          ## Reconsume.
3223          $self->{state} = DATA_STATE;          return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{s_kwd} = '';  
         ## reconsume  
   
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3224          redo A;          redo A;
3225        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3226                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3227            
3228          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3229          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3230                    
3231      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3232        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2820  sub _get_next_token ($) { Line 3241  sub _get_next_token ($) {
3241          redo A;          redo A;
3242        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3243                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3244            
3245          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3246          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3247                    
3248      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3249        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2834  sub _get_next_token ($) { Line 3256  sub _get_next_token ($) {
3256      }      }
3257        
3258          redo A;          redo A;
3259        } else {  ## TODO: " and ' for ENTITY
3260          } elsif ($self->{is_xml} and
3261                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3262                   $self->{nc} == 0x005B) { # [
3263                    
3264          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name');          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3265          $self->{ct}->{quirks} = 1;          $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3266            $self->{in_subset} = 1;
3267            
3268        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3269          $self->{line_prev} = $self->{line};
3270          $self->{column_prev} = $self->{column};
3271          $self->{column}++;
3272          $self->{nc}
3273              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3274        } else {
3275          $self->{set_nc}->($self);
3276        }
3277      
3278            return  ($self->{ct}); # DOCTYPE
3279            redo A;
3280          } else {
3281            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name'); ## TODO: type
3282    
3283            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3284              
3285              $self->{ct}->{quirks} = 1;
3286              $self->{state} = BOGUS_DOCTYPE_STATE;
3287            } else {
3288              
3289              $self->{state} = BOGUS_MD_STATE;
3290            }
3291    
         $self->{state} = BOGUS_DOCTYPE_STATE;  
3292                    
3293      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3294        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2861  sub _get_next_token ($) { Line 3310  sub _get_next_token ($) {
3310              0x0042, # B              0x0042, # B
3311              0x004C, # L              0x004C, # L
3312              0x0049, # I              0x0049, # I
3313            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3314            $self->{nc} == [            $self->{nc} == [
3315              undef,              undef,
3316              0x0075, # u              0x0075, # u
3317              0x0062, # b              0x0062, # b
3318              0x006C, # l              0x006C, # l
3319              0x0069, # i              0x0069, # i
3320            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3321                    
3322          ## Stay in the state.          ## Stay in the state.
3323          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3324                    
3325      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3326        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2884  sub _get_next_token ($) { Line 3333  sub _get_next_token ($) {
3333      }      }
3334        
3335          redo A;          redo A;
3336        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3337                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3338                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3339                    if ($self->{is_xml} and
3340                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3341              
3342              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3343                              text => 'PUBLIC',
3344                              line => $self->{line_prev},
3345                              column => $self->{column_prev} - 4);
3346            } else {
3347              
3348            }
3349          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3350                    
3351      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2902  sub _get_next_token ($) { Line 3360  sub _get_next_token ($) {
3360        
3361          redo A;          redo A;
3362        } else {        } else {
3363                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3364                          line => $self->{line_prev},                          line => $self->{line_prev},
3365                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3366          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3367              
3368          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3369              $self->{state} = BOGUS_DOCTYPE_STATE;
3370            } else {
3371              
3372              $self->{state} = BOGUS_MD_STATE;
3373            }
3374          ## Reconsume.          ## Reconsume.
3375          redo A;          redo A;
3376        }        }
# Line 2920  sub _get_next_token ($) { Line 3382  sub _get_next_token ($) {
3382              0x0053, # S              0x0053, # S
3383              0x0054, # T              0x0054, # T
3384              0x0045, # E              0x0045, # E
3385            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3386            $self->{nc} == [            $self->{nc} == [
3387              undef,              undef,
3388              0x0079, # y              0x0079, # y
3389              0x0073, # s              0x0073, # s
3390              0x0074, # t              0x0074, # t
3391              0x0065, # e              0x0065, # e
3392            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3393                    
3394          ## Stay in the state.          ## Stay in the state.
3395          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3396                    
3397      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3398        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2943  sub _get_next_token ($) { Line 3405  sub _get_next_token ($) {
3405      }      }
3406        
3407          redo A;          redo A;
3408        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3409                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3410                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3411                    if ($self->{is_xml} and
3412                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3413              
3414              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3415                              text => 'SYSTEM',
3416                              line => $self->{line_prev},
3417                              column => $self->{column_prev} - 4);
3418            } else {
3419              
3420            }
3421          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3422                    
3423      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2961  sub _get_next_token ($) { Line 3432  sub _get_next_token ($) {
3432        
3433          redo A;          redo A;
3434        } else {        } else {
3435                    $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name', ## TODO: type
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',  
3436                          line => $self->{line_prev},                          line => $self->{line_prev},
3437                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3438          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3439              
3440          $self->{state} = BOGUS_DOCTYPE_STATE;            $self->{ct}->{quirks} = 1;
3441              $self->{state} = BOGUS_DOCTYPE_STATE;
3442            } else {
3443              
3444              $self->{state} = BOGUS_MD_STATE;
3445            }
3446          ## Reconsume.          ## Reconsume.
3447          redo A;          redo A;
3448        }        }
# Line 3020  sub _get_next_token ($) { Line 3495  sub _get_next_token ($) {
3495        
3496          redo A;          redo A;
3497        } elsif ($self->{nc} eq 0x003E) { # >        } elsif ($self->{nc} eq 0x003E) { # >
           
3498          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3499            
3500          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3501          $self->{s_kwd} = '';            
3502              $self->{state} = DATA_STATE;
3503              $self->{s_kwd} = '';
3504              $self->{ct}->{quirks} = 1;
3505            } else {
3506              
3507              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3508            }
3509            
3510                    
3511      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3512        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3036  sub _get_next_token ($) { Line 3518  sub _get_next_token ($) {
3518        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3519      }      }
3520        
3521            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3522          redo A;          redo A;
3523        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3524            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3525              
3526              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3527              $self->{state} = DATA_STATE;
3528              $self->{s_kwd} = '';
3529              $self->{ct}->{quirks} = 1;
3530            } else {
3531              
3532              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3533              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3534            }
3535                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3536          ## reconsume          ## reconsume
   
         $self->{ct}->{quirks} = 1;  
3537          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3538          redo A;          redo A;
3539        } else {        } elsif ($self->{is_xml} and
3540                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3541                   $self->{nc} == 0x005B) { # [
3542            
3543            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3544            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3545            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3546            $self->{in_subset} = 1;
3547                    
3548        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3549          $self->{line_prev} = $self->{line};
3550          $self->{column_prev} = $self->{column};
3551          $self->{column}++;
3552          $self->{nc}
3553              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3554        } else {
3555          $self->{set_nc}->($self);
3556        }
3557      
3558            return  ($self->{ct}); # DOCTYPE
3559            redo A;
3560          } else {
3561          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
         $self->{ct}->{quirks} = 1;  
3562    
3563          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3564              
3565              $self->{ct}->{quirks} = 1;
3566              $self->{state} = BOGUS_DOCTYPE_STATE;
3567            } else {
3568              
3569              $self->{state} = BOGUS_MD_STATE;
3570            }
3571    
3572                    
3573      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3574        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3089  sub _get_next_token ($) { Line 3599  sub _get_next_token ($) {
3599        
3600          redo A;          redo A;
3601        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3602          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3603    
3604          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3605          $self->{s_kwd} = '';            
3606              $self->{state} = DATA_STATE;
3607              $self->{s_kwd} = '';
3608              $self->{ct}->{quirks} = 1;
3609            } else {
3610              
3611              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3612            }
3613    
3614                    
3615      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3616        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3105  sub _get_next_token ($) { Line 3622  sub _get_next_token ($) {
3622        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3623      }      }
3624        
3625            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3626          redo A;          redo A;
3627        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3628          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3629    
3630          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3631          $self->{s_kwd} = '';            
3632          ## reconsume            $self->{state} = DATA_STATE;
3633              $self->{s_kwd} = '';
3634          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
3635            } else {
3636              
3637              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3638            }
3639            
3640            ## Reconsume.
3641          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3642          redo A;          redo A;
3643        } else {        } else {
3644                    
3645          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3646          $self->{read_until}->($self->{ct}->{pubid}, q[">],          $self->{read_until}->($self->{ct}->{pubid}, q[">],
3647                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3648    
# Line 3160  sub _get_next_token ($) { Line 3677  sub _get_next_token ($) {
3677        
3678          redo A;          redo A;
3679        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3680          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3681    
3682          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3683          $self->{s_kwd} = '';            
3684              $self->{state} = DATA_STATE;
3685              $self->{s_kwd} = '';
3686              $self->{ct}->{quirks} = 1;
3687            } else {
3688              
3689              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3690            }
3691    
3692                    
3693      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3694        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3176  sub _get_next_token ($) { Line 3700  sub _get_next_token ($) {
3700        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3701      }      }
3702        
3703            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3704          redo A;          redo A;
3705        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
3706          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed PUBLIC literal');
3707    
3708          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3709          $self->{s_kwd} = '';            
3710              $self->{state} = DATA_STATE;
3711              $self->{s_kwd} = '';
3712              $self->{ct}->{quirks} = 1;
3713            } else {
3714              
3715              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3716            }
3717          
3718          ## reconsume          ## reconsume
3719            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
3720          redo A;          redo A;
3721        } else {        } else {
3722                    
3723          $self->{ct}->{pubid} # DOCTYPE          $self->{ct}->{pubid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
3724          $self->{read_until}->($self->{ct}->{pubid}, q['>],          $self->{read_until}->($self->{ct}->{pubid}, q['>],
3725                                length $self->{ct}->{pubid});                                length $self->{ct}->{pubid});
3726    
# Line 3232  sub _get_next_token ($) { Line 3756  sub _get_next_token ($) {
3756          redo A;          redo A;
3757        } elsif ($self->{nc} == 0x0022) { # "        } elsif ($self->{nc} == 0x0022) { # "
3758                    
3759          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3760          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
3761                    
3762      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3248  sub _get_next_token ($) { Line 3772  sub _get_next_token ($) {
3772          redo A;          redo A;
3773        } elsif ($self->{nc} == 0x0027) { # '        } elsif ($self->{nc} == 0x0027) { # '
3774                    
3775          $self->{ct}->{sysid} = ''; # DOCTYPE          $self->{ct}->{sysid} = ''; # DOCTYPE/ENTITY/NOTATION
3776          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;          $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
3777                    
3778      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3263  sub _get_next_token ($) { Line 3787  sub _get_next_token ($) {
3787        
3788          redo A;          redo A;
3789        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3790            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3791              if ($self->{is_xml}) {
3792                
3793                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3794              } else {
3795                
3796              }
3797              $self->{state} = DATA_STATE;
3798              $self->{s_kwd} = '';
3799            } else {
3800              if ($self->{ct}->{type} == NOTATION_TOKEN) {
3801                
3802              } else {
3803                
3804                $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');            
3805              }
3806              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3807            }
3808                    
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3809                    
3810      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3811        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3277  sub _get_next_token ($) { Line 3817  sub _get_next_token ($) {
3817        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3818      }      }
3819        
3820            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
3821          redo A;          redo A;
3822        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3823            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3824              
3825              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3826              
3827              $self->{state} = DATA_STATE;
3828              $self->{s_kwd} = '';
3829              $self->{ct}->{quirks} = 1;
3830            } else {
3831              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3832              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3833            }
3834                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3835          ## reconsume          ## reconsume
3836            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3837          $self->{ct}->{quirks} = 1;          redo A;
3838          } elsif ($self->{is_xml} and
3839                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3840                   $self->{nc} == 0x005B) { # [
3841            
3842            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3843            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3844            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3845            $self->{in_subset} = 1;
3846            
3847        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3848          $self->{line_prev} = $self->{line};
3849          $self->{column_prev} = $self->{column};
3850          $self->{column}++;
3851          $self->{nc}
3852              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3853        } else {
3854          $self->{set_nc}->($self);
3855        }
3856      
3857          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3858          redo A;          redo A;
3859        } else {        } else {
           
3860          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
         $self->{ct}->{quirks} = 1;  
3861    
3862          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3863              
3864              $self->{ct}->{quirks} = 1;
3865              $self->{state} = BOGUS_DOCTYPE_STATE;
3866            } else {
3867              
3868              $self->{state} = BOGUS_MD_STATE;
3869            }
3870    
3871                    
3872      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3873        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3361  sub _get_next_token ($) { Line 3930  sub _get_next_token ($) {
3930        
3931          redo A;          redo A;
3932        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
3933          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3934                    
3935      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3936        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3377  sub _get_next_token ($) { Line 3943  sub _get_next_token ($) {
3943      }      }
3944        
3945    
3946          $self->{ct}->{quirks} = 1;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3947          return  ($self->{ct}); # DOCTYPE            
3948              $self->{state} = DATA_STATE;
3949              $self->{s_kwd} = '';
3950              $self->{ct}->{quirks} = 1;
3951            } else {
3952              
3953              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3954            }
3955    
3956            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3957          redo A;          redo A;
3958        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3959            if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
3960              
3961              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
3962              $self->{state} = DATA_STATE;
3963              $self->{s_kwd} = '';
3964              $self->{ct}->{quirks} = 1;
3965            } else {
3966              
3967              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
3968              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3969            }
3970                    
         $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');  
   
         $self->{state} = DATA_STATE;  
         $self->{s_kwd} = '';  
3971          ## reconsume          ## reconsume
3972            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
3973            redo A;
3974          } elsif ($self->{is_xml} and
3975                   $self->{ct}->{type} == DOCTYPE_TOKEN and
3976                   $self->{nc} == 0x005B) { # [
3977            
3978            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3979    
3980          $self->{ct}->{quirks} = 1;          $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3981            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3982            $self->{in_subset} = 1;
3983            
3984        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3985          $self->{line_prev} = $self->{line};
3986          $self->{column_prev} = $self->{column};
3987          $self->{column}++;
3988          $self->{nc}
3989              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3990        } else {
3991          $self->{set_nc}->($self);
3992        }
3993      
3994          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
3995          redo A;          redo A;
3996        } else {        } else {
           
3997          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
         $self->{ct}->{quirks} = 1;  
3998    
3999          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4000                        
4001              $self->{ct}->{quirks} = 1;
4002              $self->{state} = BOGUS_DOCTYPE_STATE;
4003            } else {
4004              
4005              $self->{state} = BOGUS_MD_STATE;
4006            }
4007    
4008                    
4009      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4010        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3428  sub _get_next_token ($) { Line 4034  sub _get_next_token ($) {
4034      }      }
4035        
4036          redo A;          redo A;
4037        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
           
4038          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4039    
4040          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4041          $self->{s_kwd} = '';            
4042              $self->{state} = DATA_STATE;
4043              $self->{s_kwd} = '';
4044              $self->{ct}->{quirks} = 1;
4045            } else {
4046              
4047              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4048            }
4049            
4050                    
4051      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4052        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3445  sub _get_next_token ($) { Line 4058  sub _get_next_token ($) {
4058        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4059      }      }
4060        
4061            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4062          redo A;          redo A;
4063        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4064          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4065    
4066          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4067          $self->{s_kwd} = '';            
4068              $self->{state} = DATA_STATE;
4069              $self->{s_kwd} = '';
4070              $self->{ct}->{quirks} = 1;
4071            } else {
4072              
4073              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4074            }
4075            
4076          ## reconsume          ## reconsume
4077            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         $self->{ct}->{quirks} = 1;  
         return  ($self->{ct}); # DOCTYPE  
   
4078          redo A;          redo A;
4079        } else {        } else {
4080                    
4081          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4082          $self->{read_until}->($self->{ct}->{sysid}, q[">],          $self->{read_until}->($self->{ct}->{sysid}, q[">],
4083                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4084    
# Line 3499  sub _get_next_token ($) { Line 4112  sub _get_next_token ($) {
4112      }      }
4113        
4114          redo A;          redo A;
4115        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
4116                    
4117          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4118    
# Line 3522  sub _get_next_token ($) { Line 4135  sub _get_next_token ($) {
4135    
4136          redo A;          redo A;
4137        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
4138          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
4139    
4140          $self->{state} = DATA_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4141          $self->{s_kwd} = '';            
4142          ## reconsume            $self->{state} = DATA_STATE;
4143              $self->{s_kwd} = '';
4144          $self->{ct}->{quirks} = 1;            $self->{ct}->{quirks} = 1;
4145          return  ($self->{ct}); # DOCTYPE          } else {
4146              
4147              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4148            }
4149    
4150            ## reconsume
4151            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4152          redo A;          redo A;
4153        } else {        } else {
4154                    
4155          $self->{ct}->{sysid} # DOCTYPE          $self->{ct}->{sysid} .= chr $self->{nc}; # DOCTYPE/ENTITY/NOTATION
             .= chr $self->{nc};  
4156          $self->{read_until}->($self->{ct}->{sysid}, q['>],          $self->{read_until}->($self->{ct}->{sysid}, q['>],
4157                                length $self->{ct}->{sysid});                                length $self->{ct}->{sysid});
4158    
# Line 3571  sub _get_next_token ($) { Line 4187  sub _get_next_token ($) {
4187        
4188          redo A;          redo A;
4189        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
4190                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4191          $self->{state} = DATA_STATE;            
4192          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4193              $self->{s_kwd} = '';
4194            } else {
4195              
4196              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4197            }
4198    
4199                    
4200      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4201        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3585  sub _get_next_token ($) { Line 4207  sub _get_next_token ($) {
4207        $self->{set_nc}->($self);        $self->{set_nc}->($self);
4208      }      }
4209        
4210            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
         return  ($self->{ct}); # DOCTYPE  
   
4211          redo A;          redo A;
4212    ## TODO: "NDATA"
4213        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4214                    if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4215          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');            
4216          $self->{state} = DATA_STATE;            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
4217          $self->{s_kwd} = '';            $self->{state} = DATA_STATE;
4218          ## reconsume            $self->{s_kwd} = '';
4219              $self->{ct}->{quirks} = 1;
4220            } else {
4221              
4222              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
4223              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4224            }
4225    
4226          $self->{ct}->{quirks} = 1;          ## reconsume
4227            return  ($self->{ct}); # DOCTYPE/ENTITY/NOTATION
4228            redo A;
4229          } elsif ($self->{is_xml} and
4230                   $self->{ct}->{type} == DOCTYPE_TOKEN and
4231                   $self->{nc} == 0x005B) { # [
4232            
4233            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4234            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4235            $self->{in_subset} = 1;
4236            
4237        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4238          $self->{line_prev} = $self->{line};
4239          $self->{column_prev} = $self->{column};
4240          $self->{column}++;
4241          $self->{nc}
4242              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4243        } else {
4244          $self->{set_nc}->($self);
4245        }
4246      
4247          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
   
4248          redo A;          redo A;
4249        } else {        } else {
           
4250          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
         #$self->{ct}->{quirks} = 1;  
4251    
4252          $self->{state} = BOGUS_DOCTYPE_STATE;          if ($self->{ct}->{type} == DOCTYPE_TOKEN) {
4253              
4254              #$self->{ct}->{quirks} = 1;
4255              $self->{state} = BOGUS_DOCTYPE_STATE;
4256            } else {
4257              
4258              $self->{state} = BOGUS_MD_STATE;
4259            }
4260    
4261                    
4262      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4263        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3639  sub _get_next_token ($) { Line 4291  sub _get_next_token ($) {
4291          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4292    
4293          redo A;          redo A;
4294          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4295            
4296            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4297            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4298            $self->{in_subset} = 1;
4299            
4300        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4301          $self->{line_prev} = $self->{line};
4302          $self->{column_prev} = $self->{column};
4303          $self->{column}++;
4304          $self->{nc}
4305              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4306        } else {
4307          $self->{set_nc}->($self);
4308        }
4309      
4310            return  ($self->{ct}); # DOCTYPE
4311            redo A;
4312        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4313                    
4314          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3651  sub _get_next_token ($) { Line 4321  sub _get_next_token ($) {
4321        } else {        } else {
4322                    
4323          my $s = '';          my $s = '';
4324          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4325    
4326          ## Stay in the state          ## Stay in the state
4327                    
# Line 3671  sub _get_next_token ($) { Line 4341  sub _get_next_token ($) {
4341        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
4342        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
4343        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
4344    
4345          ## XML5: "CDATA state".
4346                
4347        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4348                    
# Line 3697  sub _get_next_token ($) { Line 4369  sub _get_next_token ($) {
4369    
4370          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4371          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4372                    ## Reconsume.
     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {  
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
4373          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
4374                        
4375            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3740  sub _get_next_token ($) { Line 4402  sub _get_next_token ($) {
4402    
4403        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
4404      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
4405          ## XML5: "CDATA bracket state".
4406    
4407        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4408                    
4409          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3757  sub _get_next_token ($) { Line 4421  sub _get_next_token ($) {
4421          redo A;          redo A;
4422        } else {        } else {
4423                    
4424            ## XML5: If EOF, "]" is not appended and changed to the data state.
4425          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
4426          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
4427          ## Reconsume.          ## Reconsume.
4428          redo A;          redo A;
4429        }        }
4430      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
4431          ## XML5: "CDATA end state".
4432    
4433        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4434          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4435          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 3805  sub _get_next_token ($) { Line 4472  sub _get_next_token ($) {
4472                    
4473          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
4474          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
4475          ## Reconsume.          ## Reconsume. ## XML5: Emit.
4476          redo A;          redo A;
4477        }        }
4478      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3822  sub _get_next_token ($) { Line 4489  sub _get_next_token ($) {
4489        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4490                    
4491          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4492          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4493                    
4494      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4495        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3842  sub _get_next_token ($) { Line 4509  sub _get_next_token ($) {
4509                    
4510          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4511          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4512          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4513          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4514          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4515                    
4516      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3893  sub _get_next_token ($) { Line 4560  sub _get_next_token ($) {
4560            $self->{nc} == 0x0058) { # X            $self->{nc} == 0x0058) { # X
4561                    
4562          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4563          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4564                    
4565      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4566        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3910  sub _get_next_token ($) { Line 4577  sub _get_next_token ($) {
4577                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4578                    
4579          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4580          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4581                    
4582      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4583        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3956  sub _get_next_token ($) { Line 4623  sub _get_next_token ($) {
4623        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4624            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4625                    
4626          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4627          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4628                    
4629          ## Stay in the state.          ## Stay in the state.
4630                    
# Line 3993  sub _get_next_token ($) { Line 4660  sub _get_next_token ($) {
4660          #          #
4661        }        }
4662    
4663        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4664        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4665        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4666        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4036  sub _get_next_token ($) { Line 4703  sub _get_next_token ($) {
4703          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4704                    
4705          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4706          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4707          ## Reconsume.          ## Reconsume.
4708          redo A;          redo A;
4709        } else {        } else {
# Line 4054  sub _get_next_token ($) { Line 4721  sub _get_next_token ($) {
4721            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4722            ## Reconsume.            ## Reconsume.
4723            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4724                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4725                      line => $self->{line_prev},                      line => $self->{line_prev},
4726                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4727                     });                     });
4728            redo A;            redo A;
4729          } else {          } else {
4730                        
4731            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4732            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4733            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4734            ## Reconsume.            ## Reconsume.
# Line 4072  sub _get_next_token ($) { Line 4739  sub _get_next_token ($) {
4739        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4740          # 0..9          # 0..9
4741                    
4742          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4743          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4744          ## Stay in the state.          ## Stay in the state.
4745                    
4746      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4090  sub _get_next_token ($) { Line 4757  sub _get_next_token ($) {
4757        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4758                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4759                    
4760          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4761          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4762          ## Stay in the state.          ## Stay in the state.
4763                    
4764      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4108  sub _get_next_token ($) { Line 4775  sub _get_next_token ($) {
4775        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4776                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4777                    
4778          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4779          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4780          ## Stay in the state.          ## Stay in the state.
4781                    
4782      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4146  sub _get_next_token ($) { Line 4813  sub _get_next_token ($) {
4813          #          #
4814        }        }
4815    
4816        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4817        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4818        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4819        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4183  sub _get_next_token ($) { Line 4850  sub _get_next_token ($) {
4850          redo A;          redo A;
4851        }        }
4852      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
4853        if (length $self->{s_kwd} < 30 and        if (length $self->{kwd} < 30 and
4854            ## NOTE: Some number greater than the maximum length of entity name            ## NOTE: Some number greater than the maximum length of entity name
4855            ((0x0041 <= $self->{nc} and # a            ((0x0041 <= $self->{nc} and # a
4856              $self->{nc} <= 0x005A) or # x              $self->{nc} <= 0x005A) or # x
# Line 4193  sub _get_next_token ($) { Line 4860  sub _get_next_token ($) {
4860              $self->{nc} <= 0x0039) or # 9              $self->{nc} <= 0x0039) or # 9
4861             $self->{nc} == 0x003B)) { # ;             $self->{nc} == 0x003B)) { # ;
4862          our $EntityChar;          our $EntityChar;
4863          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4864          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}}) {
4865            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
4866                            
4867              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
4868              $self->{entity__match} = 1;              $self->{entity__match} = 1;
4869                            
4870      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4213  sub _get_next_token ($) { Line 4880  sub _get_next_token ($) {
4880              #              #
4881            } else {            } else {
4882                            
4883              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
4884              $self->{entity__match} = -1;              $self->{entity__match} = -1;
4885              ## Stay in the state.              ## Stay in the state.
4886                            
# Line 4261  sub _get_next_token ($) { Line 4928  sub _get_next_token ($) {
4928          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
4929              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
4930                        
4931            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
4932            #            #
4933          } else {          } else {
4934                        
# Line 4273  sub _get_next_token ($) { Line 4940  sub _get_next_token ($) {
4940                    
4941          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4942                          line => $self->{line_prev},                          line => $self->{line_prev},
4943                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
4944          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
4945          #          #
4946        }        }
4947        
# Line 4297  sub _get_next_token ($) { Line 4964  sub _get_next_token ($) {
4964                    data => $data,                    data => $data,
4965                    has_reference => $has_ref,                    has_reference => $has_ref,
4966                    line => $self->{line_prev},                    line => $self->{line_prev},
4967                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
4968                   });                   });
4969          redo A;          redo A;
4970        } else {        } else {
# Line 4313  sub _get_next_token ($) { Line 4980  sub _get_next_token ($) {
4980      ## XML-only states      ## XML-only states
4981    
4982      } elsif ($self->{state} == PI_STATE) {      } elsif ($self->{state} == PI_STATE) {
4983          ## XML5: "Pi state" and "DOCTYPE pi state".
4984    
4985        if ($is_space->{$self->{nc}} or        if ($is_space->{$self->{nc}} or
4986            $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"            $self->{nc} == 0x003F or # ?
4987            $self->{nc} == -1) {            $self->{nc} == -1) {
4988            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
4989            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
4990            ## "DOCTYPE pi state": Parse error, switch to the "data
4991            ## state".
4992          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
4993                          line => $self->{line_prev},                          line => $self->{line_prev},
4994                          column => $self->{column_prev}                          column => $self->{column_prev}
# Line 4330  sub _get_next_token ($) { Line 5003  sub _get_next_token ($) {
5003                        };                        };
5004          redo A;          redo A;
5005        } else {        } else {
5006            ## XML5: "DOCTYPE pi state": Stay in the state.
5007          $self->{ct} = {type => PI_TOKEN,          $self->{ct} = {type => PI_TOKEN,
5008                         target => chr $self->{nc},                         target => chr $self->{nc},
5009                         data => '',                         data => '',
# Line 4367  sub _get_next_token ($) { Line 5041  sub _get_next_token ($) {
5041          redo A;          redo A;
5042        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5043          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5044          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5045          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5046            } else {
5047              $self->{state} = DATA_STATE;
5048              $self->{s_kwd} = '';
5049            }
5050          ## Reconsume.          ## Reconsume.
5051          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5052          redo A;          redo A;
# Line 4439  sub _get_next_token ($) { Line 5117  sub _get_next_token ($) {
5117          redo A;          redo A;
5118        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
5119          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
5120          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5121          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
5122            } else {
5123              $self->{state} = DATA_STATE;
5124              $self->{s_kwd} = '';
5125            }
5126          ## Reprocess.          ## Reprocess.
5127          return  ($self->{ct}); # pi          return  ($self->{ct}); # pi
5128          redo A;          redo A;
# Line 4464  sub _get_next_token ($) { Line 5146  sub _get_next_token ($) {
5146          redo A;          redo A;
5147        }        }
5148      } elsif ($self->{state} == PI_AFTER_STATE) {      } elsif ($self->{state} == PI_AFTER_STATE) {
5149          ## XML5: Part of "Pi after state".
5150    
5151        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5152          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5153          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5154            } else {
5155              $self->{state} = DATA_STATE;
5156              $self->{s_kwd} = '';
5157            }
5158                    
5159      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5160        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4509  sub _get_next_token ($) { Line 5197  sub _get_next_token ($) {
5197          redo A;          redo A;
5198        }        }
5199      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {      } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
5200        ## XML5: Same as "pi after state" in XML5        ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
5201    
5202        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
5203          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
5204          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5205            } else {
5206              $self->{state} = DATA_STATE;
5207              $self->{s_kwd} = '';
5208            }
5209                    
5210      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5211        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 4547  sub _get_next_token ($) { Line 5240  sub _get_next_token ($) {
5240          ## Reprocess.          ## Reprocess.
5241          redo A;          redo A;
5242        }        }
5243    
5244        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5245          if ($self->{nc} == 0x003C) { # <
5246            $self->{state} = DOCTYPE_TAG_STATE;
5247            
5248        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5249          $self->{line_prev} = $self->{line};
5250          $self->{column_prev} = $self->{column};
5251          $self->{column}++;
5252          $self->{nc}
5253              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5254        } else {
5255          $self->{set_nc}->($self);
5256        }
5257      
5258            redo A;
5259          } elsif ($self->{nc} == 0x0025) { # %
5260            ## XML5: Not defined yet.
5261    
5262            ## TODO:
5263            
5264        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5265          $self->{line_prev} = $self->{line};
5266          $self->{column_prev} = $self->{column};
5267          $self->{column}++;
5268          $self->{nc}
5269              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5270        } else {
5271          $self->{set_nc}->($self);
5272        }
5273      
5274            redo A;
5275          } elsif ($self->{nc} == 0x005D) { # ]
5276            delete $self->{in_subset};
5277            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5278            
5279        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5280          $self->{line_prev} = $self->{line};
5281          $self->{column_prev} = $self->{column};
5282          $self->{column}++;
5283          $self->{nc}
5284              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5285        } else {
5286          $self->{set_nc}->($self);
5287        }
5288      
5289            redo A;
5290          } elsif ($is_space->{$self->{nc}}) {
5291            ## Stay in the state.
5292            
5293        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5294          $self->{line_prev} = $self->{line};
5295          $self->{column_prev} = $self->{column};
5296          $self->{column}++;
5297          $self->{nc}
5298              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5299        } else {
5300          $self->{set_nc}->($self);
5301        }
5302      
5303            redo A;
5304          } elsif ($self->{nc} == -1) {
5305            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5306            delete $self->{in_subset};
5307            $self->{state} = DATA_STATE;
5308            $self->{s_kwd} = '';
5309            ## Reconsume.
5310            return  ({type => END_OF_DOCTYPE_TOKEN});
5311            redo A;
5312          } else {
5313            unless ($self->{internal_subset_tainted}) {
5314              ## XML5: No parse error.
5315              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5316              $self->{internal_subset_tainted} = 1;
5317            }
5318            ## Stay in the state.
5319            
5320        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5321          $self->{line_prev} = $self->{line};
5322          $self->{column_prev} = $self->{column};
5323          $self->{column}++;
5324          $self->{nc}
5325              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5326        } else {
5327          $self->{set_nc}->($self);
5328        }
5329      
5330            redo A;
5331          }
5332        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5333          if ($self->{nc} == 0x003E) { # >
5334            $self->{state} = DATA_STATE;
5335            $self->{s_kwd} = '';
5336            
5337        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5338          $self->{line_prev} = $self->{line};
5339          $self->{column_prev} = $self->{column};
5340          $self->{column}++;
5341          $self->{nc}
5342              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5343        } else {
5344          $self->{set_nc}->($self);
5345        }
5346      
5347            return  ({type => END_OF_DOCTYPE_TOKEN});
5348            redo A;
5349          } elsif ($self->{nc} == -1) {
5350            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5351            $self->{state} = DATA_STATE;
5352            $self->{s_kwd} = '';
5353            ## Reconsume.
5354            return  ({type => END_OF_DOCTYPE_TOKEN});
5355            redo A;
5356          } else {
5357            ## XML5: No parse error and stay in the state.
5358            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5359    
5360            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5361            
5362        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5363          $self->{line_prev} = $self->{line};
5364          $self->{column_prev} = $self->{column};
5365          $self->{column}++;
5366          $self->{nc}
5367              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5368        } else {
5369          $self->{set_nc}->($self);
5370        }
5371      
5372            redo A;
5373          }
5374        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5375          if ($self->{nc} == 0x003E) { # >
5376            $self->{state} = DATA_STATE;
5377            $self->{s_kwd} = '';
5378            
5379        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5380          $self->{line_prev} = $self->{line};
5381          $self->{column_prev} = $self->{column};
5382          $self->{column}++;
5383          $self->{nc}
5384              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5385        } else {
5386          $self->{set_nc}->($self);
5387        }
5388      
5389            return  ({type => END_OF_DOCTYPE_TOKEN});
5390            redo A;
5391          } elsif ($self->{nc} == -1) {
5392            $self->{state} = DATA_STATE;
5393            $self->{s_kwd} = '';
5394            ## Reconsume.
5395            return  ({type => END_OF_DOCTYPE_TOKEN});
5396            redo A;
5397          } else {
5398            ## Stay in the state.
5399            
5400        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5401          $self->{line_prev} = $self->{line};
5402          $self->{column_prev} = $self->{column};
5403          $self->{column}++;
5404          $self->{nc}
5405              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5406        } else {
5407          $self->{set_nc}->($self);
5408        }
5409      
5410            redo A;
5411          }
5412        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5413          if ($self->{nc} == 0x0021) { # !
5414            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5415            
5416        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5417          $self->{line_prev} = $self->{line};
5418          $self->{column_prev} = $self->{column};
5419          $self->{column}++;
5420          $self->{nc}
5421              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5422        } else {
5423          $self->{set_nc}->($self);
5424        }
5425      
5426            redo A;
5427          } elsif ($self->{nc} == 0x003F) { # ?
5428            $self->{state} = PI_STATE;
5429            
5430        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5431          $self->{line_prev} = $self->{line};
5432          $self->{column_prev} = $self->{column};
5433          $self->{column}++;
5434          $self->{nc}
5435              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5436        } else {
5437          $self->{set_nc}->($self);
5438        }
5439      
5440            redo A;
5441          } elsif ($self->{nc} == -1) {
5442            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5443            $self->{state} = DATA_STATE;
5444            $self->{s_kwd} = '';
5445            ## Reconsume.
5446            redo A;
5447          } else {
5448            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5449                            line => $self->{line_prev},
5450                            column => $self->{column_prev});
5451            $self->{state} = BOGUS_COMMENT_STATE;
5452            $self->{ct} = {type => COMMENT_TOKEN,
5453                           data => '',
5454                          }; ## NOTE: Will be discarded.
5455            
5456        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5457          $self->{line_prev} = $self->{line};
5458          $self->{column_prev} = $self->{column};
5459          $self->{column}++;
5460          $self->{nc}
5461              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5462        } else {
5463          $self->{set_nc}->($self);
5464        }
5465      
5466            redo A;
5467          }
5468        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5469          ## XML5: "DOCTYPE markup declaration state".
5470          
5471          if ($self->{nc} == 0x002D) { # -
5472            $self->{state} = MD_HYPHEN_STATE;
5473            
5474        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5475          $self->{line_prev} = $self->{line};
5476          $self->{column_prev} = $self->{column};
5477          $self->{column}++;
5478          $self->{nc}
5479              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5480        } else {
5481          $self->{set_nc}->($self);
5482        }
5483      
5484            redo A;
5485          } elsif ($self->{nc} == 0x0045) { # E
5486            $self->{state} = MD_E_STATE;
5487            $self->{kwd} = chr $self->{nc};
5488            
5489        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5490          $self->{line_prev} = $self->{line};
5491          $self->{column_prev} = $self->{column};
5492          $self->{column}++;
5493          $self->{nc}
5494              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5495        } else {
5496          $self->{set_nc}->($self);
5497        }
5498      
5499            redo A;
5500          } elsif ($self->{nc} == 0x0041) { # A
5501            $self->{state} = MD_ATTLIST_STATE;
5502            $self->{kwd} = chr $self->{nc};
5503            
5504        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5505          $self->{line_prev} = $self->{line};
5506          $self->{column_prev} = $self->{column};
5507          $self->{column}++;
5508          $self->{nc}
5509              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5510        } else {
5511          $self->{set_nc}->($self);
5512        }
5513      
5514            redo A;
5515          } elsif ($self->{nc} == 0x004E) { # N
5516            $self->{state} = MD_NOTATION_STATE;
5517            $self->{kwd} = chr $self->{nc};
5518            
5519        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5520          $self->{line_prev} = $self->{line};
5521          $self->{column_prev} = $self->{column};
5522          $self->{column}++;
5523          $self->{nc}
5524              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5525        } else {
5526          $self->{set_nc}->($self);
5527        }
5528      
5529            redo A;
5530          } else {
5531            #
5532          }
5533          
5534          ## XML5: No parse error.
5535          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5536                          line => $self->{line_prev},
5537                          column => $self->{column_prev} - 1);
5538          ## Reconsume.
5539          $self->{state} = BOGUS_COMMENT_STATE;
5540          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5541          redo A;
5542        } elsif ($self->{state} == MD_E_STATE) {
5543          if ($self->{nc} == 0x004E) { # N
5544            $self->{state} = MD_ENTITY_STATE;
5545            $self->{kwd} .= chr $self->{nc};
5546            
5547        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5548          $self->{line_prev} = $self->{line};
5549          $self->{column_prev} = $self->{column};
5550          $self->{column}++;
5551          $self->{nc}
5552              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5553        } else {
5554          $self->{set_nc}->($self);
5555        }
5556      
5557            redo A;
5558          } elsif ($self->{nc} == 0x004C) { # L
5559            ## XML5: <!ELEMENT> not supported.
5560            $self->{state} = MD_ELEMENT_STATE;
5561            $self->{kwd} .= chr $self->{nc};
5562            
5563        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5564          $self->{line_prev} = $self->{line};
5565          $self->{column_prev} = $self->{column};
5566          $self->{column}++;
5567          $self->{nc}
5568              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5569        } else {
5570          $self->{set_nc}->($self);
5571        }
5572      
5573            redo A;
5574          } else {
5575            ## XML5: No parse error.
5576            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5577                            line => $self->{line_prev},
5578                            column => $self->{column_prev} - 2
5579                                + 1 * ($self->{nc} == -1));
5580            ## Reconsume.
5581            $self->{state} = BOGUS_COMMENT_STATE;
5582            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5583            redo A;
5584          }
5585        } elsif ($self->{state} == MD_ENTITY_STATE) {
5586          if ($self->{nc} == {
5587                'EN' => 0x0054, # T
5588                'ENT' => 0x0049, # I
5589                'ENTI' => 0x0054, # T
5590              }->{$self->{kwd}}) {
5591            ## Stay in the state.
5592            $self->{kwd} .= chr $self->{nc};
5593            
5594        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5595          $self->{line_prev} = $self->{line};
5596          $self->{column_prev} = $self->{column};
5597          $self->{column}++;
5598          $self->{nc}
5599              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5600        } else {
5601          $self->{set_nc}->($self);
5602        }
5603      
5604            redo A;
5605          } elsif ($self->{kwd} eq 'ENTIT' and
5606                   $self->{nc} == 0x0059) { # Y
5607            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',
5608                           line => $self->{line_prev},
5609                           column => $self->{column_prev} - 6};
5610            $self->{state} = DOCTYPE_MD_STATE;
5611            
5612        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5613          $self->{line_prev} = $self->{line};
5614          $self->{column_prev} = $self->{column};
5615          $self->{column}++;
5616          $self->{nc}
5617              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5618        } else {
5619          $self->{set_nc}->($self);
5620        }
5621      
5622            redo A;
5623          } else {
5624            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5625                            line => $self->{line_prev},
5626                            column => $self->{column_prev} - 1
5627                                - (length $self->{kwd})
5628                                + 1 * ($self->{nc} == -1));
5629            $self->{state} = BOGUS_COMMENT_STATE;
5630            ## Reconsume.
5631            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5632            redo A;
5633          }
5634        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5635          if ($self->{nc} == {
5636                'EL' => 0x0045, # E
5637                'ELE' => 0x004D, # M
5638                'ELEM' => 0x0045, # E
5639                'ELEME' => 0x004E, # N
5640              }->{$self->{kwd}}) {
5641            ## Stay in the state.
5642            $self->{kwd} .= chr $self->{nc};
5643            
5644        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5645          $self->{line_prev} = $self->{line};
5646          $self->{column_prev} = $self->{column};
5647          $self->{column}++;
5648          $self->{nc}
5649              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5650        } else {
5651          $self->{set_nc}->($self);
5652        }
5653      
5654            redo A;
5655          } elsif ($self->{kwd} eq 'ELEMEN' and
5656                   $self->{nc} == 0x0054) { # T
5657            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5658                           line => $self->{line_prev},
5659                           column => $self->{column_prev} - 6};
5660            $self->{state} = DOCTYPE_MD_STATE;
5661            
5662        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5663          $self->{line_prev} = $self->{line};
5664          $self->{column_prev} = $self->{column};
5665          $self->{column}++;
5666          $self->{nc}
5667              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5668        } else {
5669          $self->{set_nc}->($self);
5670        }
5671      
5672            redo A;
5673          } else {
5674            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5675                            line => $self->{line_prev},
5676                            column => $self->{column_prev} - 1
5677                                - (length $self->{kwd})
5678                                + 1 * ($self->{nc} == -1));
5679            $self->{state} = BOGUS_COMMENT_STATE;
5680            ## Reconsume.
5681            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5682            redo A;
5683          }
5684        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5685          if ($self->{nc} == {
5686                'A' => 0x0054, # T
5687                'AT' => 0x0054, # T
5688                'ATT' => 0x004C, # L
5689                'ATTL' => 0x0049, # I
5690                'ATTLI' => 0x0053, # S
5691              }->{$self->{kwd}}) {
5692            ## Stay in the state.
5693            $self->{kwd} .= chr $self->{nc};
5694            
5695        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5696          $self->{line_prev} = $self->{line};
5697          $self->{column_prev} = $self->{column};
5698          $self->{column}++;
5699          $self->{nc}
5700              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5701        } else {
5702          $self->{set_nc}->($self);
5703        }
5704      
5705            redo A;
5706          } elsif ($self->{kwd} eq 'ATTLIS' and
5707                   $self->{nc} == 0x0054) { # T
5708            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5709                           attrdefs => [],
5710                           line => $self->{line_prev},
5711                           column => $self->{column_prev} - 6};
5712            $self->{state} = DOCTYPE_MD_STATE;
5713            
5714        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5715          $self->{line_prev} = $self->{line};
5716          $self->{column_prev} = $self->{column};
5717          $self->{column}++;
5718          $self->{nc}
5719              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5720        } else {
5721          $self->{set_nc}->($self);
5722        }
5723      
5724            redo A;
5725          } else {
5726            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5727                            line => $self->{line_prev},
5728                            column => $self->{column_prev} - 1
5729                                 - (length $self->{kwd})
5730                                 + 1 * ($self->{nc} == -1));
5731            $self->{state} = BOGUS_COMMENT_STATE;
5732            ## Reconsume.
5733            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5734            redo A;
5735          }
5736        } elsif ($self->{state} == MD_NOTATION_STATE) {
5737          if ($self->{nc} == {
5738                'N' => 0x004F, # O
5739                'NO' => 0x0054, # T
5740                'NOT' => 0x0041, # A
5741                'NOTA' => 0x0054, # T
5742                'NOTAT' => 0x0049, # I
5743                'NOTATI' => 0x004F, # O
5744              }->{$self->{kwd}}) {
5745            ## Stay in the state.
5746            $self->{kwd} .= chr $self->{nc};
5747            
5748        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5749          $self->{line_prev} = $self->{line};
5750          $self->{column_prev} = $self->{column};
5751          $self->{column}++;
5752          $self->{nc}
5753              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5754        } else {
5755          $self->{set_nc}->($self);
5756        }
5757      
5758            redo A;
5759          } elsif ($self->{kwd} eq 'NOTATIO' and
5760                   $self->{nc} == 0x004E) { # N
5761            $self->{ct} = {type => NOTATION_TOKEN, name => '',
5762                           line => $self->{line_prev},
5763                           column => $self->{column_prev} - 6};
5764            $self->{state} = DOCTYPE_MD_STATE;
5765            
5766        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5767          $self->{line_prev} = $self->{line};
5768          $self->{column_prev} = $self->{column};
5769          $self->{column}++;
5770          $self->{nc}
5771              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5772        } else {
5773          $self->{set_nc}->($self);
5774        }
5775      
5776            redo A;
5777          } else {
5778            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5779                            line => $self->{line_prev},
5780                            column => $self->{column_prev} - 1
5781                                - (length $self->{kwd})
5782                                + 1 * ($self->{nc} == -1));
5783            $self->{state} = BOGUS_COMMENT_STATE;
5784            ## Reconsume.
5785            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5786            redo A;
5787          }
5788        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
5789          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
5790          ## "DOCTYPE NOTATION state".
5791    
5792          if ($is_space->{$self->{nc}}) {
5793            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
5794            $self->{state} = BEFORE_MD_NAME_STATE;
5795            
5796        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5797          $self->{line_prev} = $self->{line};
5798          $self->{column_prev} = $self->{column};
5799          $self->{column}++;
5800          $self->{nc}
5801              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5802        } else {
5803          $self->{set_nc}->($self);
5804        }
5805      
5806            redo A;
5807          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5808                   $self->{nc} == 0x0025) { # %
5809            ## XML5: Switch to the "DOCTYPE bogus comment state".
5810            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5811            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5812            
5813        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5814          $self->{line_prev} = $self->{line};
5815          $self->{column_prev} = $self->{column};
5816          $self->{column}++;
5817          $self->{nc}
5818              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5819        } else {
5820          $self->{set_nc}->($self);
5821        }
5822      
5823            redo A;
5824          } elsif ($self->{nc} == -1) {
5825            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5826            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5827            ## Reconsume.
5828            redo A;
5829          } elsif ($self->{nc} == 0x003E) { # >
5830            ## XML5: Switch to the "DOCTYPE bogus comment state".
5831            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5832            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5833            
5834        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5835          $self->{line_prev} = $self->{line};
5836          $self->{column_prev} = $self->{column};
5837          $self->{column}++;
5838          $self->{nc}
5839              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5840        } else {
5841          $self->{set_nc}->($self);
5842        }
5843      
5844            redo A;
5845          } else {
5846            ## XML5: Switch to the "DOCTYPE bogus comment state".
5847            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5848            $self->{state} = BEFORE_MD_NAME_STATE;
5849            redo A;
5850          }
5851        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
5852          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
5853          ## before state", "DOCTYPE ATTLIST name before state".
5854    
5855          if ($is_space->{$self->{nc}}) {
5856            ## Stay in the state.
5857            
5858        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5859          $self->{line_prev} = $self->{line};
5860          $self->{column_prev} = $self->{column};
5861          $self->{column}++;
5862          $self->{nc}
5863              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5864        } else {
5865          $self->{set_nc}->($self);
5866        }
5867      
5868            redo A;
5869          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5870                   $self->{nc} == 0x0025) { # %
5871            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5872            
5873        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5874          $self->{line_prev} = $self->{line};
5875          $self->{column_prev} = $self->{column};
5876          $self->{column}++;
5877          $self->{nc}
5878              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5879        } else {
5880          $self->{set_nc}->($self);
5881        }
5882      
5883            redo A;
5884          } elsif ($self->{nc} == 0x003E) { # >
5885            ## XML5: Same as "Anything else".
5886            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5887            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5888            
5889        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5890          $self->{line_prev} = $self->{line};
5891          $self->{column_prev} = $self->{column};
5892          $self->{column}++;
5893          $self->{nc}
5894              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5895        } else {
5896          $self->{set_nc}->($self);
5897        }
5898      
5899            redo A;
5900          } elsif ($self->{nc} == -1) {
5901            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5902            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5903            ## Reconsume.
5904            redo A;
5905          } else {
5906            ## XML5: [ATTLIST] Not defined yet.
5907            $self->{ct}->{name} .= chr $self->{nc};
5908            $self->{state} = MD_NAME_STATE;
5909            
5910        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5911          $self->{line_prev} = $self->{line};
5912          $self->{column_prev} = $self->{column};
5913          $self->{column}++;
5914          $self->{nc}
5915              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5916        } else {
5917          $self->{set_nc}->($self);
5918        }
5919      
5920            redo A;
5921          }
5922        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
5923          if ($is_space->{$self->{nc}}) {
5924            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
5925            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
5926            $self->{state} = BEFORE_MD_NAME_STATE;
5927            
5928        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5929          $self->{line_prev} = $self->{line};
5930          $self->{column_prev} = $self->{column};
5931          $self->{column}++;
5932          $self->{nc}
5933              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5934        } else {
5935          $self->{set_nc}->($self);
5936        }
5937      
5938            redo A;
5939          } elsif ($self->{nc} == 0x003E) { # >
5940            ## XML5: Same as "Anything else".
5941            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5942            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5943            
5944        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5945          $self->{line_prev} = $self->{line};
5946          $self->{column_prev} = $self->{column};
5947          $self->{column}++;
5948          $self->{nc}
5949              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5950        } else {
5951          $self->{set_nc}->($self);
5952        }
5953      
5954            redo A;
5955          } elsif ($self->{nc} == -1) {
5956            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
5957            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5958            ## Reconsume.
5959            redo A;
5960          } else {
5961            ## XML5: No parse error.
5962            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
5963            $self->{state} = BOGUS_COMMENT_STATE;
5964            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5965            ## Reconsume.
5966            redo A;
5967          }
5968        } elsif ($self->{state} == MD_NAME_STATE) {
5969          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
5970          
5971          if ($is_space->{$self->{nc}}) {
5972            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
5973              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
5974            } elsif ($self->{ct}->{type} == ELEMENT_TOKEN) {
5975              ## TODO: ...
5976              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
5977            } else { # ENTITY/NOTATION
5978              $self->{state} = AFTER_DOCTYPE_NAME_STATE;
5979            }
5980            
5981        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5982          $self->{line_prev} = $self->{line};
5983          $self->{column_prev} = $self->{column};
5984          $self->{column}++;
5985          $self->{nc}
5986              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5987        } else {
5988          $self->{set_nc}->($self);
5989        }
5990      
5991            redo A;
5992          } elsif ($self->{nc} == 0x003E) { # >
5993            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
5994              #
5995            } else {
5996              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md def'); ## TODO: type
5997            }
5998            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5999            
6000        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6001          $self->{line_prev} = $self->{line};
6002          $self->{column_prev} = $self->{column};
6003          $self->{column}++;
6004          $self->{nc}
6005              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6006        } else {
6007          $self->{set_nc}->($self);
6008        }
6009      
6010            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6011            redo A;
6012          } elsif ($self->{nc} == -1) {
6013            ## XML5: [ATTLIST] No parse error.
6014            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
6015            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6016            ## Reconsume.
6017            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
6018            redo A;
6019          } else {
6020            ## XML5: [ATTLIST] Not defined yet.
6021            $self->{ct}->{name} .= chr $self->{nc};
6022            ## Stay in the state.
6023            
6024        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6025          $self->{line_prev} = $self->{line};
6026          $self->{column_prev} = $self->{column};
6027          $self->{column}++;
6028          $self->{nc}
6029              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6030        } else {
6031          $self->{set_nc}->($self);
6032        }
6033      
6034            redo A;
6035          }
6036        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
6037          if ($is_space->{$self->{nc}}) {
6038            ## Stay in the state.
6039                    
6040        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6041          $self->{line_prev} = $self->{line};
6042          $self->{column_prev} = $self->{column};
6043          $self->{column}++;
6044          $self->{nc}
6045              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6046        } else {
6047          $self->{set_nc}->($self);
6048        }
6049      
6050            redo A;
6051          } elsif ($self->{nc} == 0x003E) { # >
6052            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6053            
6054        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6055          $self->{line_prev} = $self->{line};
6056          $self->{column_prev} = $self->{column};
6057          $self->{column}++;
6058          $self->{nc}
6059              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6060        } else {
6061          $self->{set_nc}->($self);
6062        }
6063      
6064            return  ($self->{ct}); # ATTLIST
6065            redo A;
6066          } elsif ($self->{nc} == -1) {
6067            ## XML5: No parse error.
6068            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6069            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6070            return  ($self->{ct});
6071            redo A;
6072          } else {
6073            ## XML5: Not defined yet.
6074            $self->{ca} = {name => chr ($self->{nc}), # attrdef
6075                           tokens => [],
6076                           line => $self->{line}, column => $self->{column}};
6077            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
6078            
6079        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6080          $self->{line_prev} = $self->{line};
6081          $self->{column_prev} = $self->{column};
6082          $self->{column}++;
6083          $self->{nc}
6084              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6085        } else {
6086          $self->{set_nc}->($self);
6087        }
6088      
6089            redo A;
6090          }
6091        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
6092          if ($is_space->{$self->{nc}}) {
6093            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
6094            
6095        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6096          $self->{line_prev} = $self->{line};
6097          $self->{column_prev} = $self->{column};
6098          $self->{column}++;
6099          $self->{nc}
6100              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6101        } else {
6102          $self->{set_nc}->($self);
6103        }
6104      
6105            redo A;
6106          } elsif ($self->{nc} == 0x003E) { # >
6107            ## XML5: Same as "anything else".
6108            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6109            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6110            
6111        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6112          $self->{line_prev} = $self->{line};
6113          $self->{column_prev} = $self->{column};
6114          $self->{column}++;
6115          $self->{nc}
6116              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6117        } else {
6118          $self->{set_nc}->($self);
6119        }
6120      
6121            return  ($self->{ct}); # ATTLIST
6122            redo A;
6123          } elsif ($self->{nc} == 0x0028) { # (
6124            ## XML5: Same as "anything else".
6125            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6126            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6127            
6128        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6129          $self->{line_prev} = $self->{line};
6130          $self->{column_prev} = $self->{column};
6131          $self->{column}++;
6132          $self->{nc}
6133              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6134        } else {
6135          $self->{set_nc}->($self);
6136        }
6137      
6138            redo A;
6139          } elsif ($self->{nc} == -1) {
6140            ## XML5: No parse error.
6141            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6142            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6143            
6144        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6145          $self->{line_prev} = $self->{line};
6146          $self->{column_prev} = $self->{column};
6147          $self->{column}++;
6148          $self->{nc}
6149              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6150        } else {
6151          $self->{set_nc}->($self);
6152        }
6153      
6154            return  ($self->{ct}); # ATTLIST
6155            redo A;
6156          } else {
6157            ## XML5: Not defined yet.
6158            $self->{ca}->{name} .= chr $self->{nc};
6159            ## Stay in the state.
6160            
6161        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6162          $self->{line_prev} = $self->{line};
6163          $self->{column_prev} = $self->{column};
6164          $self->{column}++;
6165          $self->{nc}
6166              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6167        } else {
6168          $self->{set_nc}->($self);
6169        }
6170      
6171            redo A;
6172          }
6173        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
6174          if ($is_space->{$self->{nc}}) {
6175            ## Stay in the state.
6176            
6177        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6178          $self->{line_prev} = $self->{line};
6179          $self->{column_prev} = $self->{column};
6180          $self->{column}++;
6181          $self->{nc}
6182              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6183        } else {
6184          $self->{set_nc}->($self);
6185        }
6186      
6187            redo A;
6188          } elsif ($self->{nc} == 0x003E) { # >
6189            ## XML5: Same as "anything else".
6190            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr type'); ## TODO: type
6191            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6192            
6193        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6194          $self->{line_prev} = $self->{line};
6195          $self->{column_prev} = $self->{column};
6196          $self->{column}++;
6197          $self->{nc}
6198              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6199        } else {
6200          $self->{set_nc}->($self);
6201        }
6202      
6203            return  ($self->{ct}); # ATTLIST
6204            redo A;
6205          } elsif ($self->{nc} == 0x0028) { # (
6206            ## XML5: Same as "anything else".
6207            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6208            
6209        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6210          $self->{line_prev} = $self->{line};
6211          $self->{column_prev} = $self->{column};
6212          $self->{column}++;
6213          $self->{nc}
6214              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6215        } else {
6216          $self->{set_nc}->($self);
6217        }
6218      
6219            redo A;
6220          } elsif ($self->{nc} == -1) {
6221            ## XML5: No parse error.
6222            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6223            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6224            
6225        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6226          $self->{line_prev} = $self->{line};
6227          $self->{column_prev} = $self->{column};
6228          $self->{column}++;
6229          $self->{nc}
6230              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6231        } else {
6232          $self->{set_nc}->($self);
6233        }
6234      
6235            return  ($self->{ct});
6236            redo A;
6237          } else {
6238            ## XML5: Not defined yet.
6239            $self->{ca}->{type} = chr $self->{nc};
6240            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
6241            
6242        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6243          $self->{line_prev} = $self->{line};
6244          $self->{column_prev} = $self->{column};
6245          $self->{column}++;
6246          $self->{nc}
6247              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6248        } else {
6249          $self->{set_nc}->($self);
6250        }
6251      
6252            redo A;
6253          }
6254        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
6255          if ($is_space->{$self->{nc}}) {
6256            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
6257            
6258        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6259          $self->{line_prev} = $self->{line};
6260          $self->{column_prev} = $self->{column};
6261          $self->{column}++;
6262          $self->{nc}
6263              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6264        } else {
6265          $self->{set_nc}->($self);
6266        }
6267      
6268            redo A;
6269          } elsif ($self->{nc} == 0x0023) { # #
6270            ## XML5: Same as "anything else".
6271            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6272            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6273            
6274        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6275          $self->{line_prev} = $self->{line};
6276          $self->{column_prev} = $self->{column};
6277          $self->{column}++;
6278          $self->{nc}
6279              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6280        } else {
6281          $self->{set_nc}->($self);
6282        }
6283      
6284            redo A;
6285          } elsif ($self->{nc} == 0x0022) { # "
6286            ## XML5: Same as "anything else".
6287            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6288            $self->{ca}->{value} = '';
6289            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6290            
6291        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6292          $self->{line_prev} = $self->{line};
6293          $self->{column_prev} = $self->{column};
6294          $self->{column}++;
6295          $self->{nc}
6296              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6297        } else {
6298          $self->{set_nc}->($self);
6299        }
6300      
6301            redo A;
6302          } elsif ($self->{nc} == 0x0027) { # '
6303            ## XML5: Same as "anything else".
6304            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6305            $self->{ca}->{value} = '';
6306            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6307            
6308        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6309          $self->{line_prev} = $self->{line};
6310          $self->{column_prev} = $self->{column};
6311          $self->{column}++;
6312          $self->{nc}
6313              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6314        } else {
6315          $self->{set_nc}->($self);
6316        }
6317      
6318            redo A;
6319          } elsif ($self->{nc} == 0x003E) { # >
6320            ## XML5: Same as "anything else".
6321            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6322            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6323            
6324        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6325          $self->{line_prev} = $self->{line};
6326          $self->{column_prev} = $self->{column};
6327          $self->{column}++;
6328          $self->{nc}
6329              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6330        } else {
6331          $self->{set_nc}->($self);
6332        }
6333      
6334            return  ($self->{ct}); # ATTLIST
6335            redo A;
6336          } elsif ($self->{nc} == 0x0028) { # (
6337            ## XML5: Same as "anything else".
6338            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before paren'); ## TODO: type
6339            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6340            
6341        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6342          $self->{line_prev} = $self->{line};
6343          $self->{column_prev} = $self->{column};
6344          $self->{column}++;
6345          $self->{nc}
6346              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6347        } else {
6348          $self->{set_nc}->($self);
6349        }
6350      
6351            redo A;
6352          } elsif ($self->{nc} == -1) {
6353            ## XML5: No parse error.
6354            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6355            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6356            
6357        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6358          $self->{line_prev} = $self->{line};
6359          $self->{column_prev} = $self->{column};
6360          $self->{column}++;
6361          $self->{nc}
6362              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6363        } else {
6364          $self->{set_nc}->($self);
6365        }
6366      
6367            return  ($self->{ct});
6368            redo A;
6369          } else {
6370            ## XML5: Not defined yet.
6371            $self->{ca}->{type} .= chr $self->{nc};
6372            ## Stay in the state.
6373            
6374        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6375          $self->{line_prev} = $self->{line};
6376          $self->{column_prev} = $self->{column};
6377          $self->{column}++;
6378          $self->{nc}
6379              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6380        } else {
6381          $self->{set_nc}->($self);
6382        }
6383      
6384            redo A;
6385          }
6386        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
6387          if ($is_space->{$self->{nc}}) {
6388            ## Stay in the state.
6389            
6390        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6391          $self->{line_prev} = $self->{line};
6392          $self->{column_prev} = $self->{column};
6393          $self->{column}++;
6394          $self->{nc}
6395              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6396        } else {
6397          $self->{set_nc}->($self);
6398        }
6399      
6400            redo A;
6401          } elsif ($self->{nc} == 0x0028) { # (
6402            ## XML5: Same as "anything else".
6403            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6404            
6405        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6406          $self->{line_prev} = $self->{line};
6407          $self->{column_prev} = $self->{column};
6408          $self->{column}++;
6409          $self->{nc}
6410              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6411        } else {
6412          $self->{set_nc}->($self);
6413        }
6414      
6415            redo A;
6416          } elsif ($self->{nc} == 0x0023) { # #
6417            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6418            
6419        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6420          $self->{line_prev} = $self->{line};
6421          $self->{column_prev} = $self->{column};
6422          $self->{column}++;
6423          $self->{nc}
6424              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6425        } else {
6426          $self->{set_nc}->($self);
6427        }
6428      
6429            redo A;
6430          } elsif ($self->{nc} == 0x0022) { # "
6431            ## XML5: Same as "anything else".
6432            $self->{ca}->{value} = '';
6433            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6434            
6435        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6436          $self->{line_prev} = $self->{line};
6437          $self->{column_prev} = $self->{column};
6438          $self->{column}++;
6439          $self->{nc}
6440              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6441        } else {
6442          $self->{set_nc}->($self);
6443        }
6444      
6445            redo A;
6446          } elsif ($self->{nc} == 0x0027) { # '
6447            ## XML5: Same as "anything else".
6448            $self->{ca}->{value} = '';
6449            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6450            
6451        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6452          $self->{line_prev} = $self->{line};
6453          $self->{column_prev} = $self->{column};
6454          $self->{column}++;
6455          $self->{nc}
6456              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6457        } else {
6458          $self->{set_nc}->($self);
6459        }
6460      
6461            redo A;
6462          } elsif ($self->{nc} == 0x003E) { # >
6463            ## XML5: Same as "anything else".
6464            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6465            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6466            
6467        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6468          $self->{line_prev} = $self->{line};
6469          $self->{column_prev} = $self->{column};
6470          $self->{column}++;
6471          $self->{nc}
6472              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6473        } else {
6474          $self->{set_nc}->($self);
6475        }
6476      
6477            return  ($self->{ct}); # ATTLIST
6478            redo A;
6479          } elsif ($self->{nc} == -1) {
6480            ## XML5: No parse error.
6481            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6482            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6483            
6484        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6485          $self->{line_prev} = $self->{line};
6486          $self->{column_prev} = $self->{column};
6487          $self->{column}++;
6488          $self->{nc}
6489              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6490        } else {
6491          $self->{set_nc}->($self);
6492        }
6493      
6494            return  ($self->{ct});
6495            redo A;
6496          } else {
6497            ## XML5: Switch to the "DOCTYPE bogus comment state".
6498            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6499            $self->{ca}->{value} = '';
6500            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6501            ## Reconsume.
6502            redo A;
6503          }
6504        } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
6505          if ($is_space->{$self->{nc}}) {
6506            ## Stay in the state.
6507            
6508        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6509          $self->{line_prev} = $self->{line};
6510          $self->{column_prev} = $self->{column};
6511          $self->{column}++;
6512          $self->{nc}
6513              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6514        } else {
6515          $self->{set_nc}->($self);
6516        }
6517      
6518            redo A;
6519          } elsif ($self->{nc} == 0x007C) { # |
6520            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6521            ## Stay in the state.
6522            
6523        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6524          $self->{line_prev} = $self->{line};
6525          $self->{column_prev} = $self->{column};
6526          $self->{column}++;
6527          $self->{nc}
6528              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6529        } else {
6530          $self->{set_nc}->($self);
6531        }
6532      
6533            redo A;
6534          } elsif ($self->{nc} == 0x0029) { # )
6535            $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty allowed token'); ## TODO: type
6536            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6537            
6538        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6539          $self->{line_prev} = $self->{line};
6540          $self->{column_prev} = $self->{column};
6541          $self->{column}++;
6542          $self->{nc}
6543              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6544        } else {
6545          $self->{set_nc}->($self);
6546        }
6547      
6548            redo A;
6549          } elsif ($self->{nc} == 0x003E) { # >
6550            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6551            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6552            
6553        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6554          $self->{line_prev} = $self->{line};
6555          $self->{column_prev} = $self->{column};
6556          $self->{column}++;
6557          $self->{nc}
6558              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6559        } else {
6560          $self->{set_nc}->($self);
6561        }
6562      
6563            return  ($self->{ct}); # ATTLIST
6564            redo A;
6565          } elsif ($self->{nc} == -1) {
6566            ## XML5: No parse error.
6567            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6568            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6569            
6570        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6571          $self->{line_prev} = $self->{line};
6572          $self->{column_prev} = $self->{column};
6573          $self->{column}++;
6574          $self->{nc}
6575              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6576        } else {
6577          $self->{set_nc}->($self);
6578        }
6579      
6580            return  ($self->{ct});
6581            redo A;
6582          } else {
6583            push @{$self->{ca}->{tokens}}, chr $self->{nc};
6584            $self->{state} = ALLOWED_TOKEN_STATE;
6585            
6586        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6587          $self->{line_prev} = $self->{line};
6588          $self->{column_prev} = $self->{column};
6589          $self->{column}++;
6590          $self->{nc}
6591              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6592        } else {
6593          $self->{set_nc}->($self);
6594        }
6595      
6596            redo A;
6597          }
6598        } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
6599          if ($is_space->{$self->{nc}}) {
6600            $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
6601            
6602        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6603          $self->{line_prev} = $self->{line};
6604          $self->{column_prev} = $self->{column};
6605          $self->{column}++;
6606          $self->{nc}
6607              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6608        } else {
6609          $self->{set_nc}->($self);
6610        }
6611      
6612            redo A;
6613          } elsif ($self->{nc} == 0x007C) { # |
6614            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6615            
6616        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6617          $self->{line_prev} = $self->{line};
6618          $self->{column_prev} = $self->{column};
6619          $self->{column}++;
6620          $self->{nc}
6621              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6622        } else {
6623          $self->{set_nc}->($self);
6624        }
6625      
6626            redo A;
6627          } elsif ($self->{nc} == 0x0029) { # )
6628            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6629            
6630        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6631          $self->{line_prev} = $self->{line};
6632          $self->{column_prev} = $self->{column};
6633          $self->{column}++;
6634          $self->{nc}
6635              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6636        } else {
6637          $self->{set_nc}->($self);
6638        }
6639      
6640            redo A;
6641          } elsif ($self->{nc} == 0x003E) { # >
6642            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6643            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6644            
6645        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6646          $self->{line_prev} = $self->{line};
6647          $self->{column_prev} = $self->{column};
6648          $self->{column}++;
6649          $self->{nc}
6650              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6651        } else {
6652          $self->{set_nc}->($self);
6653        }
6654      
6655            return  ($self->{ct}); # ATTLIST
6656            redo A;
6657          } elsif ($self->{nc} == -1) {
6658            ## XML5: No parse error.
6659            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6660            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6661            
6662        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6663          $self->{line_prev} = $self->{line};
6664          $self->{column_prev} = $self->{column};
6665          $self->{column}++;
6666          $self->{nc}
6667              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6668        } else {
6669          $self->{set_nc}->($self);
6670        }
6671      
6672            return  ($self->{ct});
6673            redo A;
6674          } else {
6675            $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
6676            ## Stay in the state.
6677            
6678        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6679          $self->{line_prev} = $self->{line};
6680          $self->{column_prev} = $self->{column};
6681          $self->{column}++;
6682          $self->{nc}
6683              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6684        } else {
6685          $self->{set_nc}->($self);
6686        }
6687      
6688            redo A;
6689          }
6690        } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
6691          if ($is_space->{$self->{nc}}) {
6692            ## Stay in the state.
6693            
6694        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6695          $self->{line_prev} = $self->{line};
6696          $self->{column_prev} = $self->{column};
6697          $self->{column}++;
6698          $self->{nc}
6699              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6700        } else {
6701          $self->{set_nc}->($self);
6702        }
6703      
6704            redo A;
6705          } elsif ($self->{nc} == 0x007C) { # |
6706            $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
6707            
6708        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6709          $self->{line_prev} = $self->{line};
6710          $self->{column_prev} = $self->{column};
6711          $self->{column}++;
6712          $self->{nc}
6713              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6714        } else {
6715          $self->{set_nc}->($self);
6716        }
6717      
6718            redo A;
6719          } elsif ($self->{nc} == 0x0029) { # )
6720            $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
6721            
6722        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6723          $self->{line_prev} = $self->{line};
6724          $self->{column_prev} = $self->{column};
6725          $self->{column}++;
6726          $self->{nc}
6727              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6728        } else {
6729          $self->{set_nc}->($self);
6730        }
6731      
6732            redo A;
6733          } elsif ($self->{nc} == 0x003E) { # >
6734            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed allowed tokens'); ## TODO: type
6735            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6736            
6737        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6738          $self->{line_prev} = $self->{line};
6739          $self->{column_prev} = $self->{column};
6740          $self->{column}++;
6741          $self->{nc}
6742              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6743        } else {
6744          $self->{set_nc}->($self);
6745        }
6746      
6747            return  ($self->{ct}); # ATTLIST
6748            redo A;
6749          } elsif ($self->{nc} == -1) {
6750            ## XML5: No parse error.
6751            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6752            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
6753            
6754        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6755          $self->{line_prev} = $self->{line};
6756          $self->{column_prev} = $self->{column};
6757          $self->{column}++;
6758          $self->{nc}
6759              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6760        } else {
6761          $self->{set_nc}->($self);
6762        }
6763      
6764            return  ($self->{ct});
6765            redo A;
6766          } else {
6767            $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type
6768                            line => $self->{line_prev},
6769                            column => $self->{column_prev});
6770            $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
6771            $self->{state} = ALLOWED_TOKEN_STATE;
6772            
6773        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6774          $self->{line_prev} = $self->{line};
6775          $self->{column_prev} = $self->{column};
6776          $self->{column}++;
6777          $self->{nc}
6778              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6779        } else {
6780          $self->{set_nc}->($self);
6781        }
6782      
6783            redo A;
6784          }
6785        } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
6786          if ($is_space->{$self->{nc}}) {
6787            $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
6788            
6789        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6790          $self->{line_prev} = $self->{line};
6791          $self->{column_prev} = $self->{column};
6792          $self->{column}++;
6793          $self->{nc}
6794              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6795        } else {
6796          $self->{set_nc}->($self);
6797        }
6798      
6799            redo A;
6800          } elsif ($self->{nc} == 0x0023) { # #
6801            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6802            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6803            
6804        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6805          $self->{line_prev} = $self->{line};
6806          $self->{column_prev} = $self->{column};
6807          $self->{column}++;
6808          $self->{nc}
6809              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6810        } else {
6811          $self->{set_nc}->($self);
6812        }
6813      
6814            redo A;
6815          } elsif ($self->{nc} == 0x0022) { # "
6816            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6817            $self->{ca}->{value} = '';
6818            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6819            
6820        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6821          $self->{line_prev} = $self->{line};
6822          $self->{column_prev} = $self->{column};
6823          $self->{column}++;
6824          $self->{nc}
6825              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6826        } else {
6827          $self->{set_nc}->($self);
6828        }
6829      
6830            redo A;
6831          } elsif ($self->{nc} == 0x0027) { # '
6832            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
6833            $self->{ca}->{value} = '';
6834            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6835            
6836        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6837          $self->{line_prev} = $self->{line};
6838          $self->{column_prev} = $self->{column};
6839          $self->{column}++;
6840          $self->{nc}
6841              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6842        } else {
6843          $self->{set_nc}->($self);
6844        }
6845      
6846            redo A;
6847          } elsif ($self->{nc} == 0x003E) { # >
6848            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6849            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6850            
6851        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6852          $self->{line_prev} = $self->{line};
6853          $self->{column_prev} = $self->{column};
6854          $self->{column}++;
6855          $self->{nc}
6856              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6857        } else {
6858          $self->{set_nc}->($self);
6859        }
6860      
6861            return  ($self->{ct}); # ATTLIST
6862            redo A;
6863          } elsif ($self->{nc} == -1) {
6864            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6865            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6866            
6867        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6868          $self->{line_prev} = $self->{line};
6869          $self->{column_prev} = $self->{column};
6870          $self->{column}++;
6871          $self->{nc}
6872              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6873        } else {
6874          $self->{set_nc}->($self);
6875        }
6876      
6877            return  ($self->{ct});
6878            redo A;
6879          } else {
6880            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6881            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6882            ## Reconsume.
6883            redo A;
6884          }
6885        } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
6886          if ($is_space->{$self->{nc}}) {
6887            ## Stay in the state.
6888            
6889        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6890          $self->{line_prev} = $self->{line};
6891          $self->{column_prev} = $self->{column};
6892          $self->{column}++;
6893          $self->{nc}
6894              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6895        } else {
6896          $self->{set_nc}->($self);
6897        }
6898      
6899            redo A;
6900          } elsif ($self->{nc} == 0x0023) { # #
6901            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
6902            
6903        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6904          $self->{line_prev} = $self->{line};
6905          $self->{column_prev} = $self->{column};
6906          $self->{column}++;
6907          $self->{nc}
6908              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6909        } else {
6910          $self->{set_nc}->($self);
6911        }
6912      
6913            redo A;
6914          } elsif ($self->{nc} == 0x0022) { # "
6915            $self->{ca}->{value} = '';
6916            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6917            
6918        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6919          $self->{line_prev} = $self->{line};
6920          $self->{column_prev} = $self->{column};
6921          $self->{column}++;
6922          $self->{nc}
6923              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6924        } else {
6925          $self->{set_nc}->($self);
6926        }
6927      
6928            redo A;
6929          } elsif ($self->{nc} == 0x0027) { # '
6930            $self->{ca}->{value} = '';
6931            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
6932            
6933        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6934          $self->{line_prev} = $self->{line};
6935          $self->{column_prev} = $self->{column};
6936          $self->{column}++;
6937          $self->{nc}
6938              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6939        } else {
6940          $self->{set_nc}->($self);
6941        }
6942      
6943            redo A;
6944          } elsif ($self->{nc} == 0x003E) { # >
6945            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
6946            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6947            
6948        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6949          $self->{line_prev} = $self->{line};
6950          $self->{column_prev} = $self->{column};
6951          $self->{column}++;
6952          $self->{nc}
6953              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6954        } else {
6955          $self->{set_nc}->($self);
6956        }
6957      
6958            return  ($self->{ct}); # ATTLIST
6959            redo A;
6960          } elsif ($self->{nc} == -1) {
6961            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
6962            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
6963            
6964        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6965          $self->{line_prev} = $self->{line};
6966          $self->{column_prev} = $self->{column};
6967          $self->{column}++;
6968          $self->{nc}
6969              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
6970        } else {
6971          $self->{set_nc}->($self);
6972        }
6973      
6974            return  ($self->{ct});
6975            redo A;
6976          } else {
6977            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO: type
6978            $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
6979            ## Reconsume.
6980            redo A;
6981          }
6982        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
6983          if ($is_space->{$self->{nc}}) {
6984            ## XML5: No parse error.
6985            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no default type'); ## TODO: type
6986            $self->{state} = BOGUS_MD_STATE;
6987            ## Reconsume.
6988            redo A;
6989          } elsif ($self->{nc} == 0x0022) { # "
6990            ## XML5: Same as "anything else".
6991            $self->{ca}->{value} = '';
6992            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
6993            
6994        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
6995          $self->{line_prev} = $self->{line};
6996          $self->{column_prev} = $self->{column};
6997          $self->{column}++;
6998          $self->{nc}
6999              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7000        } else {
7001          $self->{set_nc}->($self);
7002        }
7003      
7004            redo A;
7005          } elsif ($self->{nc} == 0x0027) { # '
7006            ## XML5: Same as "anything else".
7007            $self->{ca}->{value} = '';
7008            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7009            
7010        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7011          $self->{line_prev} = $self->{line};
7012          $self->{column_prev} = $self->{column};
7013          $self->{column}++;
7014          $self->{nc}
7015              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7016        } else {
7017          $self->{set_nc}->($self);
7018        }
7019      
7020            redo A;
7021          } elsif ($self->{nc} == 0x003E) { # >
7022            ## XML5: Same as "anything else".
7023            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr default'); ## TODO: type
7024            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7025            
7026        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7027          $self->{line_prev} = $self->{line};
7028          $self->{column_prev} = $self->{column};
7029          $self->{column}++;
7030          $self->{nc}
7031              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7032        } else {
7033          $self->{set_nc}->($self);
7034        }
7035      
7036            return  ($self->{ct}); # ATTLIST
7037            redo A;
7038          } elsif ($self->{nc} == -1) {
7039            ## XML5: No parse error.
7040            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7041            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7042            
7043        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7044          $self->{line_prev} = $self->{line};
7045          $self->{column_prev} = $self->{column};
7046          $self->{column}++;
7047          $self->{nc}
7048              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7049        } else {
7050          $self->{set_nc}->($self);
7051        }
7052      
7053            return  ($self->{ct});
7054            redo A;
7055          } else {
7056            $self->{ca}->{default} = chr $self->{nc};
7057            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
7058            
7059        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7060          $self->{line_prev} = $self->{line};
7061          $self->{column_prev} = $self->{column};
7062          $self->{column}++;
7063          $self->{nc}
7064              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7065        } else {
7066          $self->{set_nc}->($self);
7067        }
7068      
7069            redo A;
7070          }
7071        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
7072          if ($is_space->{$self->{nc}}) {
7073            $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
7074            
7075        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7076          $self->{line_prev} = $self->{line};
7077          $self->{column_prev} = $self->{column};
7078          $self->{column}++;
7079          $self->{nc}
7080              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7081        } else {
7082          $self->{set_nc}->($self);
7083        }
7084      
7085            redo A;
7086          } elsif ($self->{nc} == 0x0022) { # "
7087            ## XML5: Same as "anything else".
7088            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7089            $self->{ca}->{value} = '';
7090            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7091            
7092        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7093          $self->{line_prev} = $self->{line};
7094          $self->{column_prev} = $self->{column};
7095          $self->{column}++;
7096          $self->{nc}
7097              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7098        } else {
7099          $self->{set_nc}->($self);
7100        }
7101      
7102            redo A;
7103          } elsif ($self->{nc} == 0x0027) { # '
7104            ## XML5: Same as "anything else".
7105            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before default value'); ## TODO: type
7106            $self->{ca}->{value} = '';
7107            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7108            
7109        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7110          $self->{line_prev} = $self->{line};
7111          $self->{column_prev} = $self->{column};
7112          $self->{column}++;
7113          $self->{nc}
7114              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7115        } else {
7116          $self->{set_nc}->($self);
7117        }
7118      
7119            redo A;
7120          } elsif ($self->{nc} == 0x003E) { # >
7121            ## XML5: Same as "anything else".
7122            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7123            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7124            
7125        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7126          $self->{line_prev} = $self->{line};
7127          $self->{column_prev} = $self->{column};
7128          $self->{column}++;
7129          $self->{nc}
7130              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7131        } else {
7132          $self->{set_nc}->($self);
7133        }
7134      
7135            return  ($self->{ct}); # ATTLIST
7136            redo A;
7137          } elsif ($self->{nc} == -1) {
7138            ## XML5: No parse error.
7139            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7140            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7141            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7142            
7143        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7144          $self->{line_prev} = $self->{line};
7145          $self->{column_prev} = $self->{column};
7146          $self->{column}++;
7147          $self->{nc}
7148              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7149        } else {
7150          $self->{set_nc}->($self);
7151        }
7152      
7153            return  ($self->{ct});
7154            redo A;
7155          } else {
7156            $self->{ca}->{default} .= chr $self->{nc};
7157            ## Stay in the state.
7158            
7159        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7160          $self->{line_prev} = $self->{line};
7161          $self->{column_prev} = $self->{column};
7162          $self->{column}++;
7163          $self->{nc}
7164              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7165        } else {
7166          $self->{set_nc}->($self);
7167        }
7168      
7169            redo A;
7170          }
7171        } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
7172          if ($is_space->{$self->{nc}}) {
7173            ## Stay in the state.
7174            
7175        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7176          $self->{line_prev} = $self->{line};
7177          $self->{column_prev} = $self->{column};
7178          $self->{column}++;
7179          $self->{nc}
7180              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7181        } else {
7182          $self->{set_nc}->($self);
7183        }
7184      
7185            redo A;
7186          } elsif ($self->{nc} == 0x0022) { # "
7187            $self->{ca}->{value} = '';
7188            $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
7189            
7190        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7191          $self->{line_prev} = $self->{line};
7192          $self->{column_prev} = $self->{column};
7193          $self->{column}++;
7194          $self->{nc}
7195              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7196        } else {
7197          $self->{set_nc}->($self);
7198        }
7199      
7200            redo A;
7201          } elsif ($self->{nc} == 0x0027) { # '
7202            $self->{ca}->{value} = '';
7203            $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
7204            
7205        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7206          $self->{line_prev} = $self->{line};
7207          $self->{column_prev} = $self->{column};
7208          $self->{column}++;
7209          $self->{nc}
7210              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7211        } else {
7212          $self->{set_nc}->($self);
7213        }
7214      
7215            redo A;
7216          } elsif ($self->{nc} == 0x003E) { # >
7217            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7218            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7219            
7220        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7221          $self->{line_prev} = $self->{line};
7222          $self->{column_prev} = $self->{column};
7223          $self->{column}++;
7224          $self->{nc}
7225              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7226        } else {
7227          $self->{set_nc}->($self);
7228        }
7229      
7230            return  ($self->{ct}); # ATTLIST
7231            redo A;
7232          } elsif ($self->{nc} == -1) {
7233            ## XML5: No parse error.
7234            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
7235            push @{$self->{ct}->{attrdefs}}, $self->{ca};
7236            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
7237            
7238        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7239          $self->{line_prev} = $self->{line};
7240          $self->{column_prev} = $self->{column};
7241          $self->{column}++;
7242          $self->{nc}
7243              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7244        } else {
7245          $self->{set_nc}->($self);
7246        }
7247      
7248            return  ($self->{ct});
7249            redo A;
7250          } else {
7251            ## XML5: Not defined yet.
7252            if ($self->{ca}->{default} eq 'FIXED') {
7253              $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
7254            } else {
7255              push @{$self->{ct}->{attrdefs}}, $self->{ca};
7256              $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7257            }
7258            ## Reconsume.
7259            redo A;
7260          }
7261        } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
7262          if ($is_space->{$self->{nc}} or
7263              $self->{nc} == -1 or
7264              $self->{nc} == 0x003E) { # >
7265            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7266            ## Reconsume.
7267            redo A;
7268          } else {
7269            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before attr name'); ## TODO: type
7270            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
7271            ## Reconsume.
7272            redo A;
7273          }
7274    
7275        } elsif ($self->{state} == BOGUS_MD_STATE) {
7276          if ($self->{nc} == 0x003E) { # >
7277            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7278            
7279        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7280          $self->{line_prev} = $self->{line};
7281          $self->{column_prev} = $self->{column};
7282          $self->{column}++;
7283          $self->{nc}
7284              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7285        } else {
7286          $self->{set_nc}->($self);
7287        }
7288      
7289            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7290            redo A;
7291          } elsif ($self->{nc} == -1) {
7292            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
7293            ## Reconsume.
7294            return  ($self->{ct}); # ATTLIST/ENTITY/NOTATION
7295            redo A;
7296          } else {
7297            ## Stay in the state.
7298            
7299        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
7300          $self->{line_prev} = $self->{line};
7301          $self->{column_prev} = $self->{column};
7302          $self->{column}++;
7303          $self->{nc}
7304              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
7305        } else {
7306          $self->{set_nc}->($self);
7307        }
7308      
7309            redo A;
7310          }
7311      } else {      } else {
7312        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
7313      }      }
# Line 4558  sub _get_next_token ($) { Line 7318  sub _get_next_token ($) {
7318    
7319  1;  1;
7320  ## $Date$  ## $Date$
7321                                    

Legend:
Removed from v.1.9  
changed lines
  Added in v.1.16

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24