/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Tue Oct 14 14:38:59 2008 UTC revision 1.14 by wakaba, Fri Oct 17 07:14:29 2008 UTC
# Line 15  BEGIN { Line 15  BEGIN {
15      CHARACTER_TOKEN      CHARACTER_TOKEN
16      PI_TOKEN      PI_TOKEN
17      ABORT_TOKEN      ABORT_TOKEN
18        END_OF_DOCTYPE_TOKEN
19        ATTLIST_TOKEN
20        ELEMENT_TOKEN
21        GENERAL_ENTITY_TOKEN
22        PARAMETER_ENTITY_TOKEN
23        NOTATION_TOKEN
24    );    );
25        
26    our %EXPORT_TAGS = (    our %EXPORT_TAGS = (
# Line 27  BEGIN { Line 33  BEGIN {
33        CHARACTER_TOKEN        CHARACTER_TOKEN
34        PI_TOKEN        PI_TOKEN
35        ABORT_TOKEN        ABORT_TOKEN
36          END_OF_DOCTYPE_TOKEN
37          ATTLIST_TOKEN
38          ELEMENT_TOKEN
39          GENERAL_ENTITY_TOKEN
40          PARAMETER_ENTITY_TOKEN
41          NOTATION_TOKEN
42      )],      )],
43    );    );
44  }  }
45    
46    ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48  ## Token types  ## Token types
49    
50  sub DOCTYPE_TOKEN () { 1 }  sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51  sub COMMENT_TOKEN () { 2 }  sub COMMENT_TOKEN () { 2 }
52  sub START_TAG_TOKEN () { 3 }  sub START_TAG_TOKEN () { 3 }
53  sub END_TAG_TOKEN () { 4 }  sub END_TAG_TOKEN () { 4 }
54  sub END_OF_FILE_TOKEN () { 5 }  sub END_OF_FILE_TOKEN () { 5 }
55  sub CHARACTER_TOKEN () { 6 }  sub CHARACTER_TOKEN () { 6 }
56  sub PI_TOKEN () { 7 } # XML5  sub PI_TOKEN () { 7 } ## NOTE: XML only.
57  sub ABORT_TOKEN () { 8 } # Not a token actually  sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58    sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59    sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60    sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61    sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62    sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63    sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64    
65    ## XML5: XML5 has "empty tag token".  In this implementation, it is
66    ## represented as a start tag token with $self->{self_closing} flag
67    ## set to true.
68    
69    ## XML5: XML5 has "short end tag token".  In this implementation, it
70    ## is represented as an end tag token with $token->{tag_name} flag set
71    ## to an empty string.
72    
73  package Whatpm::HTML;  package Whatpm::HTML;
74    
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 142  sub HEXREF_HEX_STATE () { 48 }
142  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
143  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145    ## XML-only states
146    sub PI_STATE () { 51 }
147    sub PI_TARGET_STATE () { 52 }
148    sub PI_TARGET_AFTER_STATE () { 53 }
149    sub PI_DATA_STATE () { 54 }
150    sub PI_AFTER_STATE () { 55 }
151    sub PI_DATA_AFTER_STATE () { 56 }
152    sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153    sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154    sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155    sub DOCTYPE_TAG_STATE () { 60 }
156    sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157    sub MD_ATTLIST_STATE () { 62 }
158    sub MD_E_STATE () { 63 }
159    sub MD_ELEMENT_STATE () { 64 }
160    sub MD_ENTITY_STATE () { 65 }
161    sub MD_NOTATION_STATE () { 66 }
162    sub DOCTYPE_MD_STATE () { 67 }
163    sub BEFORE_MD_NAME_STATE () { 68 }
164    sub MD_NAME_STATE () { 69 }
165    sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166    sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167    
168  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
169  ## list and descriptions)  ## list and descriptions)
170    
# Line 178  sub _initialize_tokenizer ($) { Line 229  sub _initialize_tokenizer ($) {
229    #$self->{is_xml} (if XML)    #$self->{is_xml} (if XML)
230    
231    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
232    $self->{s_kwd} = ''; # state keyword    $self->{s_kwd} = ''; # Data state keyword
233      #$self->{kwd} = ''; # State-dependent keyword; initialized when used
234    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
235    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
236    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 208  sub _initialize_tokenizer ($) { Line 260  sub _initialize_tokenizer ($) {
260    
261  ## A token has:  ## A token has:
262  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
263  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
264  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
265  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
266    ##   ->{target} (PI_TOKEN)
267  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
268  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
269  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 218  sub _initialize_tokenizer ($) { Line 271  sub _initialize_tokenizer ($) {
271  ##        ->{name}  ##        ->{name}
272  ##        ->{value}  ##        ->{value}
273  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
274  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
275    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
276    ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
277    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
278    ##   ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
279    
280  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
281  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
282  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 238  my $is_space = { Line 296  my $is_space = {
296    0x0009 => 1, # CHARACTER TABULATION (HT)    0x0009 => 1, # CHARACTER TABULATION (HT)
297    0x000A => 1, # LINE FEED (LF)    0x000A => 1, # LINE FEED (LF)
298    #0x000B => 0, # LINE TABULATION (VT)    #0x000B => 0, # LINE TABULATION (VT)
299    0x000C => 1, # FORM FEED (FF)    0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
300    #0x000D => 1, # CARRIAGE RETURN (CR)    #0x000D => 1, # CARRIAGE RETURN (CR)
301    0x0020 => 1, # SPACE (SP)    0x0020 => 1, # SPACE (SP)
302  };  };
# Line 498  sub _get_next_token ($) { Line 556  sub _get_next_token ($) {
556        return  ($token);        return  ($token);
557        redo A;        redo A;
558      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
559          ## XML5: "tag state".
560    
561        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
562          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
563                        
# Line 516  sub _get_next_token ($) { Line 576  sub _get_next_token ($) {
576            redo A;            redo A;
577          } elsif ($self->{nc} == 0x0021) { # !          } elsif ($self->{nc} == 0x0021) { # !
578                        
579            $self->{s_kwd} = '<' unless $self->{escape};            $self->{s_kwd} = $self->{escaped} ? '' : '<';
580            #            #
581          } else {          } else {
582                        
583              $self->{s_kwd} = '';
584            #            #
585          }          }
586    
587          ## reconsume          ## reconsume
588          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
         $self->{s_kwd} = '';  
589          return  ({type => CHARACTER_TOKEN, data => '<',          return  ({type => CHARACTER_TOKEN, data => '<',
590                    line => $self->{line_prev},                    line => $self->{line_prev},
591                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 629  sub _get_next_token ($) { Line 689  sub _get_next_token ($) {
689    
690            redo A;            redo A;
691          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
692                        if ($self->{is_xml}) {
693            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
694                            line => $self->{line_prev},              $self->{state} = PI_STATE;
695                            column => $self->{column_prev});              
696            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
697            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
698                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
699                                      column => $self->{column_prev},        $self->{column}++;
700                                     };        $self->{nc}
701            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
702            redo A;      } else {
703          } else {        $self->{set_nc}->($self);
704        }
705      
706                redo A;
707              } else {
708                
709                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
710                                line => $self->{line_prev},
711                                column => $self->{column_prev});
712                $self->{state} = BOGUS_COMMENT_STATE;
713                $self->{ct} = {type => COMMENT_TOKEN, data => '',
714                               line => $self->{line_prev},
715                               column => $self->{column_prev},
716                              };
717                ## $self->{nc} is intentionally left as is
718                redo A;
719              }
720            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
721                        
722            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
723                            line => $self->{line_prev},                            line => $self->{line_prev},
# Line 655  sub _get_next_token ($) { Line 732  sub _get_next_token ($) {
732                     });                     });
733    
734            redo A;            redo A;
735            } else {
736              ## XML5: "<:" is a parse error.
737              
738              $self->{ct} = {type => START_TAG_TOKEN,
739                                        tag_name => chr ($self->{nc}),
740                                        line => $self->{line_prev},
741                                        column => $self->{column_prev}};
742              $self->{state} = TAG_NAME_STATE;
743              
744        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
745          $self->{line_prev} = $self->{line};
746          $self->{column_prev} = $self->{column};
747          $self->{column}++;
748          $self->{nc}
749              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
750        } else {
751          $self->{set_nc}->($self);
752        }
753      
754              redo A;
755          }          }
756        } else {        } else {
757          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 663  sub _get_next_token ($) { Line 760  sub _get_next_token ($) {
760        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
761        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
762    
763          ## XML5: "end tag state".
764    
765        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
766        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
767          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
768            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;            $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
769            $self->{s_kwd} = '';            $self->{kwd} = '';
770            ## Reconsume.            ## Reconsume.
771            redo A;            redo A;
772          } else {          } else {
# Line 724  sub _get_next_token ($) { Line 823  sub _get_next_token ($) {
823        
824          redo A;          redo A;
825        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
826          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
827                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
828                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
829          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
830          $self->{s_kwd} = '';          $self->{s_kwd} = '';
831                    if ($self->{is_xml}) {
832              
833              ## XML5: No parse error.
834              
835              ## NOTE: This parser raises a parse error, since it supports
836              ## XML1, not XML5.
837    
838              ## NOTE: A short end tag token.
839              my $ct = {type => END_TAG_TOKEN,
840                        tag_name => '',
841                        line => $self->{line_prev},
842                        column => $self->{column_prev} - 1,
843                       };
844              
845      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
846        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
847        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 741  sub _get_next_token ($) { Line 852  sub _get_next_token ($) {
852        $self->{set_nc}->($self);        $self->{set_nc}->($self);
853      }      }
854        
855              return  ($ct);
856            } else {
857              
858              
859        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
860          $self->{line_prev} = $self->{line};
861          $self->{column_prev} = $self->{column};
862          $self->{column}++;
863          $self->{nc}
864              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
865        } else {
866          $self->{set_nc}->($self);
867        }
868      
869            }
870          redo A;          redo A;
871        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
872                    
# Line 754  sub _get_next_token ($) { Line 880  sub _get_next_token ($) {
880                   });                   });
881    
882          redo A;          redo A;
883        } else {        } elsif (not $self->{is_xml} or
884                   $is_space->{$self->{nc}}) {
885                    
886          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
887                            line => $self->{line_prev}, # "<" of "</"
888                            column => $self->{column_prev} - 1);
889          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
890          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
891                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 769  sub _get_next_token ($) { Line 898  sub _get_next_token ($) {
898          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
899          ## "bogus comment state" entry.          ## "bogus comment state" entry.
900          redo A;          redo A;
901          } else {
902            ## XML5: "</:" is a parse error.
903            
904            $self->{ct} = {type => END_TAG_TOKEN,
905                           tag_name => chr ($self->{nc}),
906                           line => $l, column => $c};
907            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
908            
909        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
910          $self->{line_prev} = $self->{line};
911          $self->{column_prev} = $self->{column};
912          $self->{column}++;
913          $self->{nc}
914              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
915        } else {
916          $self->{set_nc}->($self);
917        }
918      
919            redo A;
920        }        }
921      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
922        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
923        if (length $ch) {        if (length $ch) {
924          my $CH = $ch;          my $CH = $ch;
925          $ch =~ tr/a-z/A-Z/;          $ch =~ tr/a-z/A-Z/;
# Line 779  sub _get_next_token ($) { Line 927  sub _get_next_token ($) {
927          if ($nch eq $ch or $nch eq $CH) {          if ($nch eq $ch or $nch eq $CH) {
928                        
929            ## Stay in the state.            ## Stay in the state.
930            $self->{s_kwd} .= $nch;            $self->{kwd} .= $nch;
931                        
932      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
933        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 798  sub _get_next_token ($) { Line 946  sub _get_next_token ($) {
946            $self->{s_kwd} = '';            $self->{s_kwd} = '';
947            ## Reconsume.            ## Reconsume.
948            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
949                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
950                      line => $self->{line_prev},                      line => $self->{line_prev},
951                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
952                     });                     });
953            redo A;            redo A;
954          }          }
# Line 816  sub _get_next_token ($) { Line 964  sub _get_next_token ($) {
964            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
965            $self->{s_kwd} = '';            $self->{s_kwd} = '';
966            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
967                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{kwd},
968                      line => $self->{line_prev},                      line => $self->{line_prev},
969                      column => $self->{column_prev} - 1 - length $self->{s_kwd},                      column => $self->{column_prev} - 1 - length $self->{kwd},
970                     });                     });
971            redo A;            redo A;
972          } else {          } else {
# Line 827  sub _get_next_token ($) { Line 975  sub _get_next_token ($) {
975                = {type => END_TAG_TOKEN,                = {type => END_TAG_TOKEN,
976                   tag_name => $self->{last_stag_name},                   tag_name => $self->{last_stag_name},
977                   line => $self->{line_prev},                   line => $self->{line_prev},
978                   column => $self->{column_prev} - 1 - length $self->{s_kwd}};                   column => $self->{column_prev} - 1 - length $self->{kwd}};
979            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
980            ## Reconsume.            ## Reconsume.
981            redo A;            redo A;
# Line 959  sub _get_next_token ($) { Line 1107  sub _get_next_token ($) {
1107          redo A;          redo A;
1108        }        }
1109      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1110          ## XML5: "Tag attribute name before state".
1111    
1112        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1113                    
1114          ## Stay in the state          ## Stay in the state
# Line 1071  sub _get_next_token ($) { Line 1221  sub _get_next_token ($) {
1221               0x003D => 1, # =               0x003D => 1, # =
1222              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1223                        
1224              ## XML5: Not a parse error.
1225            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1226          } else {          } else {
1227                        
1228              ## XML5: ":" raises a parse error and is ignored.
1229          }          }
1230          $self->{ca}          $self->{ca}
1231              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1094  sub _get_next_token ($) { Line 1246  sub _get_next_token ($) {
1246          redo A;          redo A;
1247        }        }
1248      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1249          ## XML5: "Tag attribute name state".
1250    
1251        my $before_leave = sub {        my $before_leave = sub {
1252          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1253              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1104  sub _get_next_token ($) { Line 1258  sub _get_next_token ($) {
1258                        
1259            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1260              = $self->{ca};              = $self->{ca};
1261              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1262          }          }
1263        }; # $before_leave        }; # $before_leave
1264    
# Line 1140  sub _get_next_token ($) { Line 1295  sub _get_next_token ($) {
1295        
1296          redo A;          redo A;
1297        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1298            if ($self->{is_xml}) {
1299              
1300              ## XML5: Not a parse error.
1301              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1302            } else {
1303              
1304            }
1305    
1306          $before_leave->();          $before_leave->();
1307          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1308                        
# Line 1189  sub _get_next_token ($) { Line 1352  sub _get_next_token ($) {
1352        
1353          redo A;          redo A;
1354        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1355            if ($self->{is_xml}) {
1356              
1357              ## XML5: Not a parse error.
1358              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1359            } else {
1360              
1361            }
1362                    
1363          $before_leave->();          $before_leave->();
1364          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1233  sub _get_next_token ($) { Line 1403  sub _get_next_token ($) {
1403          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1404              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1405                        
1406              ## XML5: Not a parse error.
1407            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1408          } else {          } else {
1409                        
# Line 1253  sub _get_next_token ($) { Line 1424  sub _get_next_token ($) {
1424          redo A;          redo A;
1425        }        }
1426      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1427          ## XML5: "Tag attribute name after state".
1428          
1429        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1430                    
1431          ## Stay in the state          ## Stay in the state
# Line 1284  sub _get_next_token ($) { Line 1457  sub _get_next_token ($) {
1457        
1458          redo A;          redo A;
1459        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1460            if ($self->{is_xml}) {
1461              
1462              ## XML5: Not a parse error.
1463              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1464            } else {
1465              
1466            }
1467    
1468          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1469                        
1470            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1337  sub _get_next_token ($) { Line 1518  sub _get_next_token ($) {
1518        
1519          redo A;          redo A;
1520        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1521            if ($self->{is_xml}) {
1522              
1523              ## XML5: Not a parse error.
1524              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1525            } else {
1526              
1527            }
1528                    
1529          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1530                    
# Line 1376  sub _get_next_token ($) { Line 1564  sub _get_next_token ($) {
1564    
1565          redo A;          redo A;
1566        } else {        } else {
1567            if ($self->{is_xml}) {
1568              
1569              ## XML5: Not a parse error.
1570              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1571            } else {
1572              
1573            }
1574    
1575          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1576              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1577                        
1578              ## XML5: Not a parse error.
1579            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1580          } else {          } else {
1581                        
# Line 1402  sub _get_next_token ($) { Line 1599  sub _get_next_token ($) {
1599          redo A;                  redo A;        
1600        }        }
1601      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1602          ## XML5: "Tag attribute value before state".
1603    
1604        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1605                    
1606          ## Stay in the state          ## Stay in the state
# Line 1513  sub _get_next_token ($) { Line 1712  sub _get_next_token ($) {
1712        } else {        } else {
1713          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D) { # =
1714                        
1715              ## XML5: Not a parse error.
1716            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1717            } elsif ($self->{is_xml}) {
1718              
1719              ## XML5: No parse error.
1720              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1721          } else {          } else {
1722                        
1723          }          }
# Line 1533  sub _get_next_token ($) { Line 1737  sub _get_next_token ($) {
1737          redo A;          redo A;
1738        }        }
1739      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1740          ## XML5: "Tag attribute value double quoted state".
1741          
1742        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1743                    
1744            ## XML5: "Tag attribute name before state".
1745          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1746                    
1747      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1550  sub _get_next_token ($) { Line 1757  sub _get_next_token ($) {
1757          redo A;          redo A;
1758        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1759                    
1760            ## XML5: Not defined yet.
1761    
1762          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1763          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1764          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1594  sub _get_next_token ($) { Line 1803  sub _get_next_token ($) {
1803    
1804          redo A;          redo A;
1805        } else {        } else {
1806                    if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1807              
1808              ## XML5: Not a parse error.
1809              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1810            } else {
1811              
1812            }
1813          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1814          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1815                                q["&],                                q["&<],
1816                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1817    
1818          ## Stay in the state          ## Stay in the state
# Line 1615  sub _get_next_token ($) { Line 1830  sub _get_next_token ($) {
1830          redo A;          redo A;
1831        }        }
1832      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1833          ## XML5: "Tag attribute value single quoted state".
1834    
1835        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1836                    
1837            ## XML5: "Before attribute name state" (sic).
1838          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1839                    
1840      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1632  sub _get_next_token ($) { Line 1850  sub _get_next_token ($) {
1850          redo A;          redo A;
1851        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1852                    
1853            ## XML5: Not defined yet.
1854    
1855          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1856          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1857          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1676  sub _get_next_token ($) { Line 1896  sub _get_next_token ($) {
1896    
1897          redo A;          redo A;
1898        } else {        } else {
1899                    if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1900              
1901              ## XML5: Not a parse error.
1902              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1903            } else {
1904              
1905            }
1906          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1907          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1908                                q['&],                                q['&<],
1909                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1910    
1911          ## Stay in the state          ## Stay in the state
# Line 1697  sub _get_next_token ($) { Line 1923  sub _get_next_token ($) {
1923          redo A;          redo A;
1924        }        }
1925      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1926          ## XML5: "Tag attribute value unquoted state".
1927    
1928        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1929                    
1930            ## XML5: "Tag attribute name before state".
1931          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1932                    
1933      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1714  sub _get_next_token ($) { Line 1943  sub _get_next_token ($) {
1943          redo A;          redo A;
1944        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1945                    
1946    
1947            ## XML5: Not defined yet.
1948    
1949          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1950          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1951          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1797  sub _get_next_token ($) { Line 2029  sub _get_next_token ($) {
2029               0x003D => 1, # =               0x003D => 1, # =
2030              }->{$self->{nc}}) {              }->{$self->{nc}}) {
2031                        
2032              ## XML5: Not a parse error.
2033            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
2034          } else {          } else {
2035                        
# Line 1913  sub _get_next_token ($) { Line 2146  sub _get_next_token ($) {
2146          redo A;          redo A;
2147        }        }
2148      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2149          ## XML5: "Empty tag state".
2150    
2151        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2152          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2153                        
# Line 1964  sub _get_next_token ($) { Line 2199  sub _get_next_token ($) {
2199          } else {          } else {
2200            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2201          }          }
2202            ## XML5: "Tag attribute name before state".
2203          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2204          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2205          ## Reconsume.          ## Reconsume.
# Line 1978  sub _get_next_token ($) { Line 2214  sub _get_next_token ($) {
2214          redo A;          redo A;
2215        }        }
2216      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {      } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2217        ## (only happen if PCDATA state)        ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
2218    
2219        ## NOTE: Unlike spec's "bogus comment state", this implementation        ## NOTE: Unlike spec's "bogus comment state", this implementation
2220        ## consumes characters one-by-one basis.        ## consumes characters one-by-one basis.
2221                
2222        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2223                    if ($self->{in_subset}) {
2224          $self->{state} = DATA_STATE;            
2225          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2226            } else {
2227              
2228              $self->{state} = DATA_STATE;
2229              $self->{s_kwd} = '';
2230            }
2231                    
2232      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2233        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2002  sub _get_next_token ($) { Line 2243  sub _get_next_token ($) {
2243          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
2244          redo A;          redo A;
2245        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2246                    if ($self->{in_subset}) {
2247          $self->{state} = DATA_STATE;            
2248          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2249            } else {
2250              
2251              $self->{state} = DATA_STATE;
2252              $self->{s_kwd} = '';
2253            }
2254          ## reconsume          ## reconsume
2255    
2256          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2031  sub _get_next_token ($) { Line 2277  sub _get_next_token ($) {
2277          redo A;          redo A;
2278        }        }
2279      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {      } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2280        ## (only happen if PCDATA state)        ## XML5: "Markup declaration state".
2281                
2282        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2283                    
# Line 2053  sub _get_next_token ($) { Line 2299  sub _get_next_token ($) {
2299          ## ASCII case-insensitive.          ## ASCII case-insensitive.
2300                    
2301          $self->{state} = MD_DOCTYPE_STATE;          $self->{state} = MD_DOCTYPE_STATE;
2302          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
2303                    
2304      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2305        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2072  sub _get_next_token ($) { Line 2318  sub _get_next_token ($) {
2318                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2319                                                    
2320          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
2321          $self->{s_kwd} = '[';          $self->{kwd} = '[';
2322                    
2323      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2324        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2106  sub _get_next_token ($) { Line 2352  sub _get_next_token ($) {
2352                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2353                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2354                                   };                                   };
2355          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2356                    
2357      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2358        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2142  sub _get_next_token ($) { Line 2388  sub _get_next_token ($) {
2388              0x0054, # T              0x0054, # T
2389              0x0059, # Y              0x0059, # Y
2390              0x0050, # P              0x0050, # P
2391            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
2392            $self->{nc} == [            $self->{nc} == [
2393              undef,              undef,
2394              0x006F, # o              0x006F, # o
# Line 2150  sub _get_next_token ($) { Line 2396  sub _get_next_token ($) {
2396              0x0074, # t              0x0074, # t
2397              0x0079, # y              0x0079, # y
2398              0x0070, # p              0x0070, # p
2399            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
2400                    
2401          ## Stay in the state.          ## Stay in the state.
2402          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2403                    
2404      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2405        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2166  sub _get_next_token ($) { Line 2412  sub _get_next_token ($) {
2412      }      }
2413        
2414          redo A;          redo A;
2415        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{kwd}) == 6 and
2416                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2417                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2418                    if ($self->{is_xml} and
2419                ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
2420              
2421              ## XML5: case-sensitive.
2422              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2423                              text => 'DOCTYPE',
2424                              line => $self->{line_prev},
2425                              column => $self->{column_prev} - 5);
2426            } else {
2427              
2428            }
2429          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2430          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2431                                    quirks => 1,                                    quirks => 1,
# Line 2192  sub _get_next_token ($) { Line 2448  sub _get_next_token ($) {
2448                                    
2449          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2450                          line => $self->{line_prev},                          line => $self->{line_prev},
2451                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2452          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2453          ## Reconsume.          ## Reconsume.
2454          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2455                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2456                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2457                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2458                                   };                                   };
2459          redo A;          redo A;
2460        }        }
# Line 2209  sub _get_next_token ($) { Line 2465  sub _get_next_token ($) {
2465              '[CD' => 0x0041, # A              '[CD' => 0x0041, # A
2466              '[CDA' => 0x0054, # T              '[CDA' => 0x0054, # T
2467              '[CDAT' => 0x0041, # A              '[CDAT' => 0x0041, # A
2468            }->{$self->{s_kwd}}) {            }->{$self->{kwd}}) {
2469                    
2470          ## Stay in the state.          ## Stay in the state.
2471          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
2472                    
2473      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2474        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2225  sub _get_next_token ($) { Line 2481  sub _get_next_token ($) {
2481      }      }
2482        
2483          redo A;          redo A;
2484        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{kwd} eq '[CDATA' and
2485                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2486                    if ($self->{is_xml} and
2487                not $self->{tainted} and
2488                @{$self->{open_elements} or []} == 0) {
2489              
2490              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2491                              line => $self->{line_prev},
2492                              column => $self->{column_prev} - 7);
2493              $self->{tainted} = 1;
2494            } else {
2495              
2496            }
2497    
2498          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2499                                    data => '',                                    data => '',
2500                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2249  sub _get_next_token ($) { Line 2516  sub _get_next_token ($) {
2516                    
2517          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
2518                          line => $self->{line_prev},                          line => $self->{line_prev},
2519                          column => $self->{column_prev} - 1 - length $self->{s_kwd});                          column => $self->{column_prev} - 1 - length $self->{kwd});
2520          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
2521          ## Reconsume.          ## Reconsume.
2522          $self->{ct} = {type => COMMENT_TOKEN,          $self->{ct} = {type => COMMENT_TOKEN,
2523                                    data => $self->{s_kwd},                                    data => $self->{kwd},
2524                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2525                                    column => $self->{column_prev} - 1 - length $self->{s_kwd},                                    column => $self->{column_prev} - 1 - length $self->{kwd},
2526                                   };                                   };
2527          redo A;          redo A;
2528        }        }
# Line 2276  sub _get_next_token ($) { Line 2543  sub _get_next_token ($) {
2543        
2544          redo A;          redo A;
2545        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2546          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2547          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2548          $self->{s_kwd} = '';            
2549              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2550            } else {
2551              
2552              $self->{state} = DATA_STATE;
2553              $self->{s_kwd} = '';
2554            }
2555                    
2556      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2557        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2296  sub _get_next_token ($) { Line 2568  sub _get_next_token ($) {
2568    
2569          redo A;          redo A;
2570        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2571          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2572          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2573          $self->{s_kwd} = '';            
2574              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2575            } else {
2576              
2577              $self->{state} = DATA_STATE;
2578              $self->{s_kwd} = '';
2579            }
2580          ## reconsume          ## reconsume
2581    
2582          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2340  sub _get_next_token ($) { Line 2617  sub _get_next_token ($) {
2617        
2618          redo A;          redo A;
2619        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
2620          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment');
2621          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2622          $self->{s_kwd} = '';            
2623              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2624            } else {
2625              
2626              $self->{state} = DATA_STATE;
2627              $self->{s_kwd} = '';
2628            }
2629                    
2630      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2631        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2360  sub _get_next_token ($) { Line 2642  sub _get_next_token ($) {
2642    
2643          redo A;          redo A;
2644        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2645          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2646          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2647          $self->{s_kwd} = '';            
2648              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2649            } else {
2650              
2651              $self->{state} = DATA_STATE;
2652              $self->{s_kwd} = '';
2653            }
2654          ## reconsume          ## reconsume
2655    
2656          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2388  sub _get_next_token ($) { Line 2675  sub _get_next_token ($) {
2675          redo A;          redo A;
2676        }        }
2677      } elsif ($self->{state} == COMMENT_STATE) {      } elsif ($self->{state} == COMMENT_STATE) {
2678          ## XML5: "Comment state" and "DOCTYPE comment state".
2679    
2680        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2681                    
2682          $self->{state} = COMMENT_END_DASH_STATE;          $self->{state} = COMMENT_END_DASH_STATE;
# Line 2404  sub _get_next_token ($) { Line 2693  sub _get_next_token ($) {
2693        
2694          redo A;          redo A;
2695        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2696          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2697          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2698          $self->{s_kwd} = '';            
2699              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2700            } else {
2701              
2702              $self->{state} = DATA_STATE;
2703              $self->{s_kwd} = '';
2704            }
2705          ## reconsume          ## reconsume
2706    
2707          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2435  sub _get_next_token ($) { Line 2729  sub _get_next_token ($) {
2729          redo A;          redo A;
2730        }        }
2731      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2732          ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2733    
2734        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2735                    
2736          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2451  sub _get_next_token ($) { Line 2747  sub _get_next_token ($) {
2747        
2748          redo A;          redo A;
2749        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2750          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2751          $self->{s_kwd} = '';          if ($self->{in_subset}) {
2752          $self->{state} = DATA_STATE;            
2753          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2754            } else {
2755              
2756              $self->{state} = DATA_STATE;
2757              $self->{s_kwd} = '';
2758            }
2759          ## reconsume          ## reconsume
2760    
2761          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2479  sub _get_next_token ($) { Line 2779  sub _get_next_token ($) {
2779          redo A;          redo A;
2780        }        }
2781      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE) {
2782          ## XML5: "Comment end state" and "DOCTYPE comment end state".
2783    
2784        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2785                    if ($self->{in_subset}) {
2786          $self->{state} = DATA_STATE;            
2787          $self->{s_kwd} = '';            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2788            } else {
2789              
2790              $self->{state} = DATA_STATE;
2791              $self->{s_kwd} = '';
2792            }
2793                    
2794      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2795        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2500  sub _get_next_token ($) { Line 2807  sub _get_next_token ($) {
2807          redo A;          redo A;
2808        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2809                    
2810            ## XML5: Not a parse error.
2811          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2812                          line => $self->{line_prev},                          line => $self->{line_prev},
2813                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2518  sub _get_next_token ($) { Line 2826  sub _get_next_token ($) {
2826        
2827          redo A;          redo A;
2828        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
           
2829          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed comment');
2830          $self->{state} = DATA_STATE;          if ($self->{in_subset}) {
2831          $self->{s_kwd} = '';            
2832              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2833            } else {
2834              
2835              $self->{state} = DATA_STATE;
2836              $self->{s_kwd} = '';
2837            }
2838          ## reconsume          ## reconsume
2839    
2840          return  ($self->{ct}); # comment          return  ($self->{ct}); # comment
# Line 2529  sub _get_next_token ($) { Line 2842  sub _get_next_token ($) {
2842          redo A;          redo A;
2843        } else {        } else {
2844                    
2845            ## XML5: Not a parse error.
2846          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2847                          line => $self->{line_prev},                          line => $self->{line_prev},
2848                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2565  sub _get_next_token ($) { Line 2879  sub _get_next_token ($) {
2879          redo A;          redo A;
2880        } else {        } else {
2881                    
2882            ## XML5: Unless EOF, swith to the bogus comment state.
2883          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name');
2884          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2885          ## reconsume          ## reconsume
2886          redo A;          redo A;
2887        }        }
2888      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2889          ## XML5: "DOCTYPE root name before state".
2890    
2891        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2892                    
2893          ## Stay in the state          ## Stay in the state
# Line 2588  sub _get_next_token ($) { Line 2905  sub _get_next_token ($) {
2905          redo A;          redo A;
2906        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2907                    
2908            ## XML5: No parse error.
2909          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
2910          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2911          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 2616  sub _get_next_token ($) { Line 2934  sub _get_next_token ($) {
2934          return  ($self->{ct}); # DOCTYPE (quirks)          return  ($self->{ct}); # DOCTYPE (quirks)
2935    
2936          redo A;          redo A;
2937          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2938            
2939            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name');
2940            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2941            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2942            $self->{in_subset} = 1;
2943            
2944        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2945          $self->{line_prev} = $self->{line};
2946          $self->{column_prev} = $self->{column};
2947          $self->{column}++;
2948          $self->{nc}
2949              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
2950        } else {
2951          $self->{set_nc}->($self);
2952        }
2953      
2954            return  ($self->{ct}); # DOCTYPE
2955            redo A;
2956        } else {        } else {
2957                    
2958          $self->{ct}->{name} = chr $self->{nc};          $self->{ct}->{name} = chr $self->{nc};
# Line 2635  sub _get_next_token ($) { Line 2972  sub _get_next_token ($) {
2972          redo A;          redo A;
2973        }        }
2974      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2975  ## ISSUE: Redundant "First," in the spec.        ## XML5: "DOCTYPE root name state".
2976    
2977          ## ISSUE: Redundant "First," in the spec.
2978    
2979        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
2980                    
2981          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
# Line 2681  sub _get_next_token ($) { Line 3021  sub _get_next_token ($) {
3021          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3022    
3023          redo A;          redo A;
3024          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3025            
3026            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3027            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3028            $self->{in_subset} = 1;
3029            
3030        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3031          $self->{line_prev} = $self->{line};
3032          $self->{column_prev} = $self->{column};
3033          $self->{column}++;
3034          $self->{nc}
3035              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3036        } else {
3037          $self->{set_nc}->($self);
3038        }
3039      
3040            return  ($self->{ct}); # DOCTYPE
3041            redo A;
3042        } else {        } else {
3043                    
3044          $self->{ct}->{name}          $self->{ct}->{name}
# Line 2700  sub _get_next_token ($) { Line 3058  sub _get_next_token ($) {
3058          redo A;          redo A;
3059        }        }
3060      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
3061          ## XML5: Corresponding to XML5's "DOCTYPE root name after
3062          ## state", but implemented differently.
3063    
3064        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
3065                    
3066          ## Stay in the state          ## Stay in the state
# Line 2747  sub _get_next_token ($) { Line 3108  sub _get_next_token ($) {
3108          redo A;          redo A;
3109        } elsif ($self->{nc} == 0x0050 or # P        } elsif ($self->{nc} == 0x0050 or # P
3110                 $self->{nc} == 0x0070) { # p                 $self->{nc} == 0x0070) { # p
3111            
3112          $self->{state} = PUBLIC_STATE;          $self->{state} = PUBLIC_STATE;
3113          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3114                    
3115      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3116        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2763  sub _get_next_token ($) { Line 3125  sub _get_next_token ($) {
3125          redo A;          redo A;
3126        } elsif ($self->{nc} == 0x0053 or # S        } elsif ($self->{nc} == 0x0053 or # S
3127                 $self->{nc} == 0x0073) { # s                 $self->{nc} == 0x0073) { # s
3128            
3129          $self->{state} = SYSTEM_STATE;          $self->{state} = SYSTEM_STATE;
3130          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
3131            
3132        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3133          $self->{line_prev} = $self->{line};
3134          $self->{column_prev} = $self->{column};
3135          $self->{column}++;
3136          $self->{nc}
3137              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3138        } else {
3139          $self->{set_nc}->($self);
3140        }
3141      
3142            redo A;
3143          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3144            
3145            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3146            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3147            $self->{in_subset} = 1;
3148                    
3149      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3150        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2776  sub _get_next_token ($) { Line 3156  sub _get_next_token ($) {
3156        $self->{set_nc}->($self);        $self->{set_nc}->($self);
3157      }      }
3158        
3159            return  ($self->{ct}); # DOCTYPE
3160          redo A;          redo A;
3161        } else {        } else {
3162                    
# Line 2804  sub _get_next_token ($) { Line 3185  sub _get_next_token ($) {
3185              0x0042, # B              0x0042, # B
3186              0x004C, # L              0x004C, # L
3187              0x0049, # I              0x0049, # I
3188            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3189            $self->{nc} == [            $self->{nc} == [
3190              undef,              undef,
3191              0x0075, # u              0x0075, # u
3192              0x0062, # b              0x0062, # b
3193              0x006C, # l              0x006C, # l
3194              0x0069, # i              0x0069, # i
3195            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3196                    
3197          ## Stay in the state.          ## Stay in the state.
3198          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3199                    
3200      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3201        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2827  sub _get_next_token ($) { Line 3208  sub _get_next_token ($) {
3208      }      }
3209        
3210          redo A;          redo A;
3211        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3212                 ($self->{nc} == 0x0043 or # C                 ($self->{nc} == 0x0043 or # C
3213                  $self->{nc} == 0x0063)) { # c                  $self->{nc} == 0x0063)) { # c
3214                    if ($self->{is_xml} and
3215                ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
3216              
3217              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3218                              text => 'PUBLIC',
3219                              line => $self->{line_prev},
3220                              column => $self->{column_prev} - 4);
3221            } else {
3222              
3223            }
3224          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
3225                    
3226      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2848  sub _get_next_token ($) { Line 3238  sub _get_next_token ($) {
3238                    
3239          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',
3240                          line => $self->{line_prev},                          line => $self->{line_prev},
3241                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3242          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3243    
3244          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
# Line 2863  sub _get_next_token ($) { Line 3253  sub _get_next_token ($) {
3253              0x0053, # S              0x0053, # S
3254              0x0054, # T              0x0054, # T
3255              0x0045, # E              0x0045, # E
3256            ]->[length $self->{s_kwd}] or            ]->[length $self->{kwd}] or
3257            $self->{nc} == [            $self->{nc} == [
3258              undef,              undef,
3259              0x0079, # y              0x0079, # y
3260              0x0073, # s              0x0073, # s
3261              0x0074, # t              0x0074, # t
3262              0x0065, # e              0x0065, # e
3263            ]->[length $self->{s_kwd}]) {            ]->[length $self->{kwd}]) {
3264                    
3265          ## Stay in the state.          ## Stay in the state.
3266          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
3267                    
3268      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3269        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2886  sub _get_next_token ($) { Line 3276  sub _get_next_token ($) {
3276      }      }
3277        
3278          redo A;          redo A;
3279        } elsif ((length $self->{s_kwd}) == 5 and        } elsif ((length $self->{kwd}) == 5 and
3280                 ($self->{nc} == 0x004D or # M                 ($self->{nc} == 0x004D or # M
3281                  $self->{nc} == 0x006D)) { # m                  $self->{nc} == 0x006D)) { # m
3282                    if ($self->{is_xml} and
3283                ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
3284              
3285              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO: type
3286                              text => 'SYSTEM',
3287                              line => $self->{line_prev},
3288                              column => $self->{column_prev} - 4);
3289            } else {
3290              
3291            }
3292          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;          $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
3293                    
3294      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 2907  sub _get_next_token ($) { Line 3306  sub _get_next_token ($) {
3306                    
3307          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after DOCTYPE name',
3308                          line => $self->{line_prev},                          line => $self->{line_prev},
3309                          column => $self->{column_prev} + 1 - length $self->{s_kwd});                          column => $self->{column_prev} + 1 - length $self->{kwd});
3310          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
3311    
3312          $self->{state} = BOGUS_DOCTYPE_STATE;          $self->{state} = BOGUS_DOCTYPE_STATE;
# Line 2996  sub _get_next_token ($) { Line 3395  sub _get_next_token ($) {
3395          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3396    
3397          redo A;          redo A;
3398          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3399            
3400            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no PUBLIC literal');
3401            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3402            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3403            $self->{in_subset} = 1;
3404            
3405        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3406          $self->{line_prev} = $self->{line};
3407          $self->{column_prev} = $self->{column};
3408          $self->{column}++;
3409          $self->{nc}
3410              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3411        } else {
3412          $self->{set_nc}->($self);
3413        }
3414      
3415            return  ($self->{ct}); # DOCTYPE
3416            redo A;
3417        } else {        } else {
3418                    
3419          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC');
# Line 3206  sub _get_next_token ($) { Line 3624  sub _get_next_token ($) {
3624        
3625          redo A;          redo A;
3626        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
3627                    if ($self->{is_xml}) {
3628              
3629              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3630            } else {
3631              
3632            }
3633          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3634          $self->{s_kwd} = '';          $self->{s_kwd} = '';
3635                    
# Line 3236  sub _get_next_token ($) { Line 3659  sub _get_next_token ($) {
3659          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3660    
3661          redo A;          redo A;
3662          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3663            
3664            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3665            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3666            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3667            $self->{in_subset} = 1;
3668            
3669        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3670          $self->{line_prev} = $self->{line};
3671          $self->{column_prev} = $self->{column};
3672          $self->{column}++;
3673          $self->{nc}
3674              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3675        } else {
3676          $self->{set_nc}->($self);
3677        }
3678      
3679            return  ($self->{ct}); # DOCTYPE
3680            redo A;
3681        } else {        } else {
3682                    
3683          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after PUBLIC literal');
# Line 3336  sub _get_next_token ($) { Line 3778  sub _get_next_token ($) {
3778          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
3779    
3780          redo A;          redo A;
3781          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
3782            
3783            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no SYSTEM literal');
3784    
3785            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3786            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
3787            $self->{in_subset} = 1;
3788            
3789        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
3790          $self->{line_prev} = $self->{line};
3791          $self->{column_prev} = $self->{column};
3792          $self->{column}++;
3793          $self->{nc}
3794              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
3795        } else {
3796          $self->{set_nc}->($self);
3797        }
3798      
3799            return  ($self->{ct}); # DOCTYPE
3800            redo A;
3801        } else {        } else {
3802                    
3803          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM');
# Line 3371  sub _get_next_token ($) { Line 3833  sub _get_next_token ($) {
3833      }      }
3834        
3835          redo A;          redo A;
3836        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
3837                    
3838          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3839    
# Line 3442  sub _get_next_token ($) { Line 3904  sub _get_next_token ($) {
3904      }      }
3905        
3906          redo A;          redo A;
3907        } elsif ($self->{nc} == 0x003E) { # >        } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
3908                    
3909          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed SYSTEM literal');
3910    
# Line 3543  sub _get_next_token ($) { Line 4005  sub _get_next_token ($) {
4005          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4006    
4007          redo A;          redo A;
4008          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4009            
4010            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4011            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4012            $self->{in_subset} = 1;
4013            
4014        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4015          $self->{line_prev} = $self->{line};
4016          $self->{column_prev} = $self->{column};
4017          $self->{column}++;
4018          $self->{nc}
4019              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4020        } else {
4021          $self->{set_nc}->($self);
4022        }
4023      
4024            return  ($self->{ct}); # DOCTYPE
4025            redo A;
4026        } else {        } else {
4027                    
4028          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after SYSTEM literal');
# Line 3582  sub _get_next_token ($) { Line 4062  sub _get_next_token ($) {
4062          return  ($self->{ct}); # DOCTYPE          return  ($self->{ct}); # DOCTYPE
4063    
4064          redo A;          redo A;
4065          } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
4066            
4067            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4068            $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
4069            $self->{in_subset} = 1;
4070            
4071        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4072          $self->{line_prev} = $self->{line};
4073          $self->{column_prev} = $self->{column};
4074          $self->{column}++;
4075          $self->{nc}
4076              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4077        } else {
4078          $self->{set_nc}->($self);
4079        }
4080      
4081            return  ($self->{ct}); # DOCTYPE
4082            redo A;
4083        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4084                    
4085          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
# Line 3594  sub _get_next_token ($) { Line 4092  sub _get_next_token ($) {
4092        } else {        } else {
4093                    
4094          my $s = '';          my $s = '';
4095          $self->{read_until}->($s, q[>], 0);          $self->{read_until}->($s, q{>[}, 0);
4096    
4097          ## Stay in the state          ## Stay in the state
4098                    
# Line 3614  sub _get_next_token ($) { Line 4112  sub _get_next_token ($) {
4112        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
4113        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
4114        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
4115    
4116          ## XML5: "CDATA state".
4117                
4118        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4119                    
# Line 3631  sub _get_next_token ($) { Line 4131  sub _get_next_token ($) {
4131        
4132          redo A;          redo A;
4133        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
4134            if ($self->{is_xml}) {
4135              
4136              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
4137            } else {
4138              
4139            }
4140    
4141          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4142          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4143                    ## Reconsume.
     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {  
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
4144          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
4145                        
4146            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3676  sub _get_next_token ($) { Line 4173  sub _get_next_token ($) {
4173    
4174        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
4175      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
4176          ## XML5: "CDATA bracket state".
4177    
4178        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
4179                    
4180          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3693  sub _get_next_token ($) { Line 4192  sub _get_next_token ($) {
4192          redo A;          redo A;
4193        } else {        } else {
4194                    
4195            ## XML5: If EOF, "]" is not appended and changed to the data state.
4196          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
4197          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
4198          ## Reconsume.          ## Reconsume.
4199          redo A;          redo A;
4200        }        }
4201      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
4202          ## XML5: "CDATA end state".
4203    
4204        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
4205          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
4206          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 3741  sub _get_next_token ($) { Line 4243  sub _get_next_token ($) {
4243                    
4244          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
4245          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
4246          ## Reconsume.          ## Reconsume. ## XML5: Emit.
4247          redo A;          redo A;
4248        }        }
4249      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3758  sub _get_next_token ($) { Line 4260  sub _get_next_token ($) {
4260        } elsif ($self->{nc} == 0x0023) { # #        } elsif ($self->{nc} == 0x0023) { # #
4261                    
4262          $self->{state} = ENTITY_HASH_STATE;          $self->{state} = ENTITY_HASH_STATE;
4263          $self->{s_kwd} = '#';          $self->{kwd} = '#';
4264                    
4265      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4266        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3778  sub _get_next_token ($) { Line 4280  sub _get_next_token ($) {
4280                    
4281          require Whatpm::_NamedEntityList;          require Whatpm::_NamedEntityList;
4282          $self->{state} = ENTITY_NAME_STATE;          $self->{state} = ENTITY_NAME_STATE;
4283          $self->{s_kwd} = chr $self->{nc};          $self->{kwd} = chr $self->{nc};
4284          $self->{entity__value} = $self->{s_kwd};          $self->{entity__value} = $self->{kwd};
4285          $self->{entity__match} = 0;          $self->{entity__match} = 0;
4286                    
4287      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 3829  sub _get_next_token ($) { Line 4331  sub _get_next_token ($) {
4331            $self->{nc} == 0x0058) { # X            $self->{nc} == 0x0058) { # X
4332                    
4333          $self->{state} = HEXREF_X_STATE;          $self->{state} = HEXREF_X_STATE;
4334          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4335                    
4336      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4337        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3846  sub _get_next_token ($) { Line 4348  sub _get_next_token ($) {
4348                 $self->{nc} <= 0x0039) { # 0..9                 $self->{nc} <= 0x0039) { # 0..9
4349                    
4350          $self->{state} = NCR_NUM_STATE;          $self->{state} = NCR_NUM_STATE;
4351          $self->{s_kwd} = $self->{nc} - 0x0030;          $self->{kwd} = $self->{nc} - 0x0030;
4352                    
4353      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4354        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 3892  sub _get_next_token ($) { Line 4394  sub _get_next_token ($) {
4394        if (0x0030 <= $self->{nc} and        if (0x0030 <= $self->{nc} and
4395            $self->{nc} <= 0x0039) { # 0..9            $self->{nc} <= 0x0039) { # 0..9
4396                    
4397          $self->{s_kwd} *= 10;          $self->{kwd} *= 10;
4398          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4399                    
4400          ## Stay in the state.          ## Stay in the state.
4401                    
# Line 3929  sub _get_next_token ($) { Line 4431  sub _get_next_token ($) {
4431          #          #
4432        }        }
4433    
4434        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4435        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4436        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4437        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 3952  sub _get_next_token ($) { Line 4454  sub _get_next_token ($) {
4454          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4455          ## Reconsume.          ## Reconsume.
4456          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4457                      has_reference => 1,
4458                    line => $l, column => $c,                    line => $l, column => $c,
4459                   });                   });
4460          redo A;          redo A;
# Line 3971  sub _get_next_token ($) { Line 4474  sub _get_next_token ($) {
4474          # 0..9, A..F, a..f          # 0..9, A..F, a..f
4475                    
4476          $self->{state} = HEXREF_HEX_STATE;          $self->{state} = HEXREF_HEX_STATE;
4477          $self->{s_kwd} = 0;          $self->{kwd} = 0;
4478          ## Reconsume.          ## Reconsume.
4479          redo A;          redo A;
4480        } else {        } else {
# Line 3989  sub _get_next_token ($) { Line 4492  sub _get_next_token ($) {
4492            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4493            ## Reconsume.            ## Reconsume.
4494            return  ({type => CHARACTER_TOKEN,            return  ({type => CHARACTER_TOKEN,
4495                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{kwd},
4496                      line => $self->{line_prev},                      line => $self->{line_prev},
4497                      column => $self->{column_prev} - length $self->{s_kwd},                      column => $self->{column_prev} - length $self->{kwd},
4498                     });                     });
4499            redo A;            redo A;
4500          } else {          } else {
4501                        
4502            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{kwd};
4503            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
4504            $self->{s_kwd} = '';            $self->{s_kwd} = '';
4505            ## Reconsume.            ## Reconsume.
# Line 4007  sub _get_next_token ($) { Line 4510  sub _get_next_token ($) {
4510        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {        if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
4511          # 0..9          # 0..9
4512                    
4513          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4514          $self->{s_kwd} += $self->{nc} - 0x0030;          $self->{kwd} += $self->{nc} - 0x0030;
4515          ## Stay in the state.          ## Stay in the state.
4516                    
4517      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4025  sub _get_next_token ($) { Line 4528  sub _get_next_token ($) {
4528        } elsif (0x0061 <= $self->{nc} and        } elsif (0x0061 <= $self->{nc} and
4529                 $self->{nc} <= 0x0066) { # a..f                 $self->{nc} <= 0x0066) { # a..f
4530                    
4531          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4532          $self->{s_kwd} += $self->{nc} - 0x0060 + 9;          $self->{kwd} += $self->{nc} - 0x0060 + 9;
4533          ## Stay in the state.          ## Stay in the state.
4534                    
4535      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4043  sub _get_next_token ($) { Line 4546  sub _get_next_token ($) {
4546        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
4547                 $self->{nc} <= 0x0046) { # A..F                 $self->{nc} <= 0x0046) { # A..F
4548                    
4549          $self->{s_kwd} *= 0x10;          $self->{kwd} *= 0x10;
4550          $self->{s_kwd} += $self->{nc} - 0x0040 + 9;          $self->{kwd} += $self->{nc} - 0x0040 + 9;
4551          ## Stay in the state.          ## Stay in the state.
4552                    
4553      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4081  sub _get_next_token ($) { Line 4584  sub _get_next_token ($) {
4584          #          #
4585        }        }
4586    
4587        my $code = $self->{s_kwd};        my $code = $self->{kwd};
4588        my $l = $self->{line_prev};        my $l = $self->{line_prev};
4589        my $c = $self->{column_prev};        my $c = $self->{column_prev};
4590        if ($charref_map->{$code}) {        if ($charref_map->{$code}) {
# Line 4104  sub _get_next_token ($) { Line 4607  sub _get_next_token ($) {
4607          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4608          ## Reconsume.          ## Reconsume.
4609          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4610                      has_reference => 1,
4611                    line => $l, column => $c,                    line => $l, column => $c,
4612                   });                   });
4613          redo A;          redo A;
# Line 4117  sub _get_next_token ($) { Line 4621  sub _get_next_token ($) {
4621          redo A;          redo A;
4622        }        }
4623      } elsif ($self->{state} == ENTITY_NAME_STATE) {      } elsif ($self->{state} == ENTITY_NAME_STATE) {
4624        if (length $self->{s_kwd} < 30 and        if (length $self->{kwd} < 30 and
4625            ## NOTE: Some number greater than the maximum length of entity name            ## NOTE: Some number greater than the maximum length of entity name
4626            ((0x0041 <= $self->{nc} and # a            ((0x0041 <= $self->{nc} and # a
4627              $self->{nc} <= 0x005A) or # x              $self->{nc} <= 0x005A) or # x
# Line 4127  sub _get_next_token ($) { Line 4631  sub _get_next_token ($) {
4631              $self->{nc} <= 0x0039) or # 9              $self->{nc} <= 0x0039) or # 9
4632             $self->{nc} == 0x003B)) { # ;             $self->{nc} == 0x003B)) { # ;
4633          our $EntityChar;          our $EntityChar;
4634          $self->{s_kwd} .= chr $self->{nc};          $self->{kwd} .= chr $self->{nc};
4635          if (defined $EntityChar->{$self->{s_kwd}}) {          if (defined $EntityChar->{$self->{kwd}}) {
4636            if ($self->{nc} == 0x003B) { # ;            if ($self->{nc} == 0x003B) { # ;
4637                            
4638              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
4639              $self->{entity__match} = 1;              $self->{entity__match} = 1;
4640                            
4641      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 4147  sub _get_next_token ($) { Line 4651  sub _get_next_token ($) {
4651              #              #
4652            } else {            } else {
4653                            
4654              $self->{entity__value} = $EntityChar->{$self->{s_kwd}};              $self->{entity__value} = $EntityChar->{$self->{kwd}};
4655              $self->{entity__match} = -1;              $self->{entity__match} = -1;
4656              ## Stay in the state.              ## Stay in the state.
4657                            
# Line 4195  sub _get_next_token ($) { Line 4699  sub _get_next_token ($) {
4699          if ($self->{prev_state} != DATA_STATE and # in attribute          if ($self->{prev_state} != DATA_STATE and # in attribute
4700              $self->{entity__match} < -1) {              $self->{entity__match} < -1) {
4701                        
4702            $data = '&' . $self->{s_kwd};            $data = '&' . $self->{kwd};
4703            #            #
4704          } else {          } else {
4705                        
# Line 4207  sub _get_next_token ($) { Line 4711  sub _get_next_token ($) {
4711                    
4712          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare ero',
4713                          line => $self->{line_prev},                          line => $self->{line_prev},
4714                          column => $self->{column_prev} - length $self->{s_kwd});                          column => $self->{column_prev} - length $self->{kwd});
4715          $data = '&' . $self->{s_kwd};          $data = '&' . $self->{kwd};
4716          #          #
4717        }        }
4718        
# Line 4229  sub _get_next_token ($) { Line 4733  sub _get_next_token ($) {
4733          ## Reconsume.          ## Reconsume.
4734          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
4735                    data => $data,                    data => $data,
4736                      has_reference => $has_ref,
4737                    line => $self->{line_prev},                    line => $self->{line_prev},
4738                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{kwd},
4739                   });                   });
4740          redo A;          redo A;
4741        } else {        } else {
# Line 4242  sub _get_next_token ($) { Line 4747  sub _get_next_token ($) {
4747          ## Reconsume.          ## Reconsume.
4748          redo A;          redo A;
4749        }        }
4750    
4751        ## XML-only states
4752    
4753        } elsif ($self->{state} == PI_STATE) {
4754          ## XML5: "Pi state" and "DOCTYPE pi state".
4755    
4756          if ($is_space->{$self->{nc}} or
4757              $self->{nc} == 0x003F or # ?
4758              $self->{nc} == -1) {
4759            ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
4760            ## pi state": Switch to the "DOCTYPE pi after state".  EOF:
4761            ## "DOCTYPE pi state": Parse error, switch to the "data
4762            ## state".
4763            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
4764                            line => $self->{line_prev},
4765                            column => $self->{column_prev}
4766                                - 1 * ($self->{nc} != -1));
4767            $self->{state} = BOGUS_COMMENT_STATE;
4768            ## Reconsume.
4769            $self->{ct} = {type => COMMENT_TOKEN,
4770                           data => '?',
4771                           line => $self->{line_prev},
4772                           column => $self->{column_prev}
4773                               - 1 * ($self->{nc} != -1),
4774                          };
4775            redo A;
4776          } else {
4777            ## XML5: "DOCTYPE pi state": Stay in the state.
4778            $self->{ct} = {type => PI_TOKEN,
4779                           target => chr $self->{nc},
4780                           data => '',
4781                           line => $self->{line_prev},
4782                           column => $self->{column_prev} - 1,
4783                          };
4784            $self->{state} = PI_TARGET_STATE;
4785            
4786        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4787          $self->{line_prev} = $self->{line};
4788          $self->{column_prev} = $self->{column};
4789          $self->{column}++;
4790          $self->{nc}
4791              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4792        } else {
4793          $self->{set_nc}->($self);
4794        }
4795      
4796            redo A;
4797          }
4798        } elsif ($self->{state} == PI_TARGET_STATE) {
4799          if ($is_space->{$self->{nc}}) {
4800            $self->{state} = PI_TARGET_AFTER_STATE;
4801            
4802        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4803          $self->{line_prev} = $self->{line};
4804          $self->{column_prev} = $self->{column};
4805          $self->{column}++;
4806          $self->{nc}
4807              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4808        } else {
4809          $self->{set_nc}->($self);
4810        }
4811      
4812            redo A;
4813          } elsif ($self->{nc} == -1) {
4814            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4815            if ($self->{in_subset}) {
4816              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4817            } else {
4818              $self->{state} = DATA_STATE;
4819              $self->{s_kwd} = '';
4820            }
4821            ## Reconsume.
4822            return  ($self->{ct}); # pi
4823            redo A;
4824          } elsif ($self->{nc} == 0x003F) { # ?
4825            $self->{state} = PI_AFTER_STATE;
4826            
4827        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4828          $self->{line_prev} = $self->{line};
4829          $self->{column_prev} = $self->{column};
4830          $self->{column}++;
4831          $self->{nc}
4832              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4833        } else {
4834          $self->{set_nc}->($self);
4835        }
4836      
4837            redo A;
4838          } else {
4839            ## XML5: typo ("tag name" -> "target")
4840            $self->{ct}->{target} .= chr $self->{nc}; # pi
4841            
4842        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4843          $self->{line_prev} = $self->{line};
4844          $self->{column_prev} = $self->{column};
4845          $self->{column}++;
4846          $self->{nc}
4847              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4848        } else {
4849          $self->{set_nc}->($self);
4850        }
4851      
4852            redo A;
4853          }
4854        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
4855          if ($is_space->{$self->{nc}}) {
4856            ## Stay in the state.
4857            
4858        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4859          $self->{line_prev} = $self->{line};
4860          $self->{column_prev} = $self->{column};
4861          $self->{column}++;
4862          $self->{nc}
4863              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4864        } else {
4865          $self->{set_nc}->($self);
4866        }
4867      
4868            redo A;
4869          } else {
4870            $self->{state} = PI_DATA_STATE;
4871            ## Reprocess.
4872            redo A;
4873          }
4874        } elsif ($self->{state} == PI_DATA_STATE) {
4875          if ($self->{nc} == 0x003F) { # ?
4876            $self->{state} = PI_DATA_AFTER_STATE;
4877            
4878        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4879          $self->{line_prev} = $self->{line};
4880          $self->{column_prev} = $self->{column};
4881          $self->{column}++;
4882          $self->{nc}
4883              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4884        } else {
4885          $self->{set_nc}->($self);
4886        }
4887      
4888            redo A;
4889          } elsif ($self->{nc} == -1) {
4890            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4891            if ($self->{in_subset}) {
4892              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
4893            } else {
4894              $self->{state} = DATA_STATE;
4895              $self->{s_kwd} = '';
4896            }
4897            ## Reprocess.
4898            return  ($self->{ct}); # pi
4899            redo A;
4900          } else {
4901            $self->{ct}->{data} .= chr $self->{nc}; # pi
4902            $self->{read_until}->($self->{ct}->{data}, q[?],
4903                                  length $self->{ct}->{data});
4904            ## Stay in the state.
4905            
4906        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4907          $self->{line_prev} = $self->{line};
4908          $self->{column_prev} = $self->{column};
4909          $self->{column}++;
4910          $self->{nc}
4911              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4912        } else {
4913          $self->{set_nc}->($self);
4914        }
4915      
4916            ## Reprocess.
4917            redo A;
4918          }
4919        } elsif ($self->{state} == PI_AFTER_STATE) {
4920          ## XML5: Part of "Pi after state".
4921    
4922          if ($self->{nc} == 0x003E) { # >
4923            if ($self->{in_subset}) {
4924              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4925            } else {
4926              $self->{state} = DATA_STATE;
4927              $self->{s_kwd} = '';
4928            }
4929            
4930        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4931          $self->{line_prev} = $self->{line};
4932          $self->{column_prev} = $self->{column};
4933          $self->{column}++;
4934          $self->{nc}
4935              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4936        } else {
4937          $self->{set_nc}->($self);
4938        }
4939      
4940            return  ($self->{ct}); # pi
4941            redo A;
4942          } elsif ($self->{nc} == 0x003F) { # ?
4943            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4944                            line => $self->{line_prev},
4945                            column => $self->{column_prev}); ## XML5: no error
4946            $self->{ct}->{data} .= '?';
4947            $self->{state} = PI_DATA_AFTER_STATE;
4948            
4949        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4950          $self->{line_prev} = $self->{line};
4951          $self->{column_prev} = $self->{column};
4952          $self->{column}++;
4953          $self->{nc}
4954              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4955        } else {
4956          $self->{set_nc}->($self);
4957        }
4958      
4959            redo A;
4960          } else {
4961            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4962                            line => $self->{line_prev},
4963                            column => $self->{column_prev}
4964                                + 1 * ($self->{nc} == -1)); ## XML5: no error
4965            $self->{ct}->{data} .= '?'; ## XML5: not appended
4966            $self->{state} = PI_DATA_STATE;
4967            ## Reprocess.
4968            redo A;
4969          }
4970        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
4971          ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
4972    
4973          if ($self->{nc} == 0x003E) { # >
4974            if ($self->{in_subset}) {
4975              $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4976            } else {
4977              $self->{state} = DATA_STATE;
4978              $self->{s_kwd} = '';
4979            }
4980            
4981        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4982          $self->{line_prev} = $self->{line};
4983          $self->{column_prev} = $self->{column};
4984          $self->{column}++;
4985          $self->{nc}
4986              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4987        } else {
4988          $self->{set_nc}->($self);
4989        }
4990      
4991            return  ($self->{ct}); # pi
4992            redo A;
4993          } elsif ($self->{nc} == 0x003F) { # ?
4994            $self->{ct}->{data} .= '?';
4995            ## Stay in the state.
4996            
4997        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4998          $self->{line_prev} = $self->{line};
4999          $self->{column_prev} = $self->{column};
5000          $self->{column}++;
5001          $self->{nc}
5002              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5003        } else {
5004          $self->{set_nc}->($self);
5005        }
5006      
5007            redo A;
5008          } else {
5009            $self->{ct}->{data} .= '?'; ## XML5: not appended
5010            $self->{state} = PI_DATA_STATE;
5011            ## Reprocess.
5012            redo A;
5013          }
5014    
5015        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
5016          if ($self->{nc} == 0x003C) { # <
5017            $self->{state} = DOCTYPE_TAG_STATE;
5018            
5019        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5020          $self->{line_prev} = $self->{line};
5021          $self->{column_prev} = $self->{column};
5022          $self->{column}++;
5023          $self->{nc}
5024              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5025        } else {
5026          $self->{set_nc}->($self);
5027        }
5028      
5029            redo A;
5030          } elsif ($self->{nc} == 0x0025) { # %
5031            ## XML5: Not defined yet.
5032    
5033            ## TODO:
5034            
5035        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5036          $self->{line_prev} = $self->{line};
5037          $self->{column_prev} = $self->{column};
5038          $self->{column}++;
5039          $self->{nc}
5040              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5041        } else {
5042          $self->{set_nc}->($self);
5043        }
5044      
5045            redo A;
5046          } elsif ($self->{nc} == 0x005D) { # ]
5047            delete $self->{in_subset};
5048            $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5049            
5050        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5051          $self->{line_prev} = $self->{line};
5052          $self->{column_prev} = $self->{column};
5053          $self->{column}++;
5054          $self->{nc}
5055              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5056        } else {
5057          $self->{set_nc}->($self);
5058        }
5059      
5060            redo A;
5061          } elsif ($is_space->{$self->{nc}}) {
5062            ## Stay in the state.
5063            
5064        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5065          $self->{line_prev} = $self->{line};
5066          $self->{column_prev} = $self->{column};
5067          $self->{column}++;
5068          $self->{nc}
5069              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5070        } else {
5071          $self->{set_nc}->($self);
5072        }
5073      
5074            redo A;
5075          } elsif ($self->{nc} == -1) {
5076            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed internal subset'); ## TODO: type
5077            delete $self->{in_subset};
5078            $self->{state} = DATA_STATE;
5079            $self->{s_kwd} = '';
5080            ## Reconsume.
5081            return  ({type => END_OF_DOCTYPE_TOKEN});
5082            redo A;
5083          } else {
5084            unless ($self->{internal_subset_tainted}) {
5085              ## XML5: No parse error.
5086              $self->{parse_error}->(level => $self->{level}->{must}, type => 'string in internal subset');
5087              $self->{internal_subset_tainted} = 1;
5088            }
5089            ## Stay in the state.
5090            
5091        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5092          $self->{line_prev} = $self->{line};
5093          $self->{column_prev} = $self->{column};
5094          $self->{column}++;
5095          $self->{nc}
5096              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5097        } else {
5098          $self->{set_nc}->($self);
5099        }
5100      
5101            redo A;
5102          }
5103        } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5104          if ($self->{nc} == 0x003E) { # >
5105            $self->{state} = DATA_STATE;
5106            $self->{s_kwd} = '';
5107            
5108        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5109          $self->{line_prev} = $self->{line};
5110          $self->{column_prev} = $self->{column};
5111          $self->{column}++;
5112          $self->{nc}
5113              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5114        } else {
5115          $self->{set_nc}->($self);
5116        }
5117      
5118            return  ({type => END_OF_DOCTYPE_TOKEN});
5119            redo A;
5120          } elsif ($self->{nc} == -1) {
5121            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE');
5122            $self->{state} = DATA_STATE;
5123            $self->{s_kwd} = '';
5124            ## Reconsume.
5125            return  ({type => END_OF_DOCTYPE_TOKEN});
5126            redo A;
5127          } else {
5128            ## XML5: No parse error and stay in the state.
5129            $self->{parse_error}->(level => $self->{level}->{must}, type => 'string after internal subset'); ## TODO: type
5130    
5131            $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
5132            
5133        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5134          $self->{line_prev} = $self->{line};
5135          $self->{column_prev} = $self->{column};
5136          $self->{column}++;
5137          $self->{nc}
5138              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5139        } else {
5140          $self->{set_nc}->($self);
5141        }
5142      
5143            redo A;
5144          }
5145        } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
5146          if ($self->{nc} == 0x003E) { # >
5147            $self->{state} = DATA_STATE;
5148            $self->{s_kwd} = '';
5149            
5150        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5151          $self->{line_prev} = $self->{line};
5152          $self->{column_prev} = $self->{column};
5153          $self->{column}++;
5154          $self->{nc}
5155              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5156        } else {
5157          $self->{set_nc}->($self);
5158        }
5159      
5160            return  ({type => END_OF_DOCTYPE_TOKEN});
5161            redo A;
5162          } elsif ($self->{nc} == -1) {
5163            $self->{state} = DATA_STATE;
5164            $self->{s_kwd} = '';
5165            ## Reconsume.
5166            return  ({type => END_OF_DOCTYPE_TOKEN});
5167            redo A;
5168          } else {
5169            ## Stay in the state.
5170            
5171        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5172          $self->{line_prev} = $self->{line};
5173          $self->{column_prev} = $self->{column};
5174          $self->{column}++;
5175          $self->{nc}
5176              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5177        } else {
5178          $self->{set_nc}->($self);
5179        }
5180      
5181            redo A;
5182          }
5183        } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
5184          if ($self->{nc} == 0x0021) { # !
5185            $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
5186            
5187        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5188          $self->{line_prev} = $self->{line};
5189          $self->{column_prev} = $self->{column};
5190          $self->{column}++;
5191          $self->{nc}
5192              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5193        } else {
5194          $self->{set_nc}->($self);
5195        }
5196      
5197            redo A;
5198          } elsif ($self->{nc} == 0x003F) { # ?
5199            $self->{state} = PI_STATE;
5200            
5201        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5202          $self->{line_prev} = $self->{line};
5203          $self->{column_prev} = $self->{column};
5204          $self->{column}++;
5205          $self->{nc}
5206              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5207        } else {
5208          $self->{set_nc}->($self);
5209        }
5210      
5211            redo A;
5212          } elsif ($self->{nc} == -1) {
5213            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago');
5214            $self->{state} = DATA_STATE;
5215            $self->{s_kwd} = '';
5216            ## Reconsume.
5217            redo A;
5218          } else {
5219            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago', ## XML5: Not a parse error.
5220                            line => $self->{line_prev},
5221                            column => $self->{column_prev});
5222            $self->{state} = BOGUS_COMMENT_STATE;
5223            $self->{ct} = {type => COMMENT_TOKEN,
5224                           data => '',
5225                          }; ## NOTE: Will be discarded.
5226            
5227        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5228          $self->{line_prev} = $self->{line};
5229          $self->{column_prev} = $self->{column};
5230          $self->{column}++;
5231          $self->{nc}
5232              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5233        } else {
5234          $self->{set_nc}->($self);
5235        }
5236      
5237            redo A;
5238          }
5239        } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
5240          ## XML5: "DOCTYPE markup declaration state".
5241          
5242          if ($self->{nc} == 0x002D) { # -
5243            $self->{state} = MD_HYPHEN_STATE;
5244            
5245        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5246          $self->{line_prev} = $self->{line};
5247          $self->{column_prev} = $self->{column};
5248          $self->{column}++;
5249          $self->{nc}
5250              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5251        } else {
5252          $self->{set_nc}->($self);
5253        }
5254      
5255            redo A;
5256          } elsif ($self->{nc} == 0x0045) { # E
5257            $self->{state} = MD_E_STATE;
5258            $self->{kwd} = chr $self->{nc};
5259            
5260        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5261          $self->{line_prev} = $self->{line};
5262          $self->{column_prev} = $self->{column};
5263          $self->{column}++;
5264          $self->{nc}
5265              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5266        } else {
5267          $self->{set_nc}->($self);
5268        }
5269      
5270            redo A;
5271          } elsif ($self->{nc} == 0x0041) { # A
5272            $self->{state} = MD_ATTLIST_STATE;
5273            $self->{kwd} = chr $self->{nc};
5274            
5275        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5276          $self->{line_prev} = $self->{line};
5277          $self->{column_prev} = $self->{column};
5278          $self->{column}++;
5279          $self->{nc}
5280              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5281        } else {
5282          $self->{set_nc}->($self);
5283        }
5284      
5285            redo A;
5286          } elsif ($self->{nc} == 0x004E) { # N
5287            $self->{state} = MD_NOTATION_STATE;
5288            $self->{kwd} = chr $self->{nc};
5289            
5290        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5291          $self->{line_prev} = $self->{line};
5292          $self->{column_prev} = $self->{column};
5293          $self->{column}++;
5294          $self->{nc}
5295              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5296        } else {
5297          $self->{set_nc}->($self);
5298        }
5299      
5300            redo A;
5301          } else {
5302            #
5303          }
5304          
5305          ## XML5: No parse error.
5306          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5307                          line => $self->{line_prev},
5308                          column => $self->{column_prev} - 1);
5309          ## Reconsume.
5310          $self->{state} = BOGUS_COMMENT_STATE;
5311          $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
5312          redo A;
5313        } elsif ($self->{state} == MD_E_STATE) {
5314          if ($self->{nc} == 0x004E) { # N
5315            $self->{state} = MD_ENTITY_STATE;
5316            $self->{kwd} .= chr $self->{nc};
5317            
5318        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5319          $self->{line_prev} = $self->{line};
5320          $self->{column_prev} = $self->{column};
5321          $self->{column}++;
5322          $self->{nc}
5323              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5324        } else {
5325          $self->{set_nc}->($self);
5326        }
5327      
5328            redo A;
5329          } elsif ($self->{nc} == 0x004C) { # L
5330            ## XML5: <!ELEMENT> not supported.
5331            $self->{state} = MD_ELEMENT_STATE;
5332            $self->{kwd} .= chr $self->{nc};
5333            
5334        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5335          $self->{line_prev} = $self->{line};
5336          $self->{column_prev} = $self->{column};
5337          $self->{column}++;
5338          $self->{nc}
5339              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5340        } else {
5341          $self->{set_nc}->($self);
5342        }
5343      
5344            redo A;
5345          } else {
5346            ## XML5: No parse error.
5347            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5348                            line => $self->{line_prev},
5349                            column => $self->{column_prev} - 2
5350                                + 1 * ($self->{nc} == -1));
5351            ## Reconsume.
5352            $self->{state} = BOGUS_COMMENT_STATE;
5353            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5354            redo A;
5355          }
5356        } elsif ($self->{state} == MD_ENTITY_STATE) {
5357          if ($self->{nc} == {
5358                'EN' => 0x0054, # T
5359                'ENT' => 0x0049, # I
5360                'ENTI' => 0x0054, # T
5361              }->{$self->{kwd}}) {
5362            ## Stay in the state.
5363            $self->{kwd} .= chr $self->{nc};
5364            
5365        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5366          $self->{line_prev} = $self->{line};
5367          $self->{column_prev} = $self->{column};
5368          $self->{column}++;
5369          $self->{nc}
5370              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5371        } else {
5372          $self->{set_nc}->($self);
5373        }
5374      
5375            redo A;
5376          } elsif ($self->{kwd} eq 'ENTIT' and
5377                   $self->{nc} == 0x0059) { # Y
5378            $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',
5379                           line => $self->{line_prev},
5380                           column => $self->{column_prev} - 6};
5381            $self->{state} = DOCTYPE_MD_STATE;
5382            
5383        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5384          $self->{line_prev} = $self->{line};
5385          $self->{column_prev} = $self->{column};
5386          $self->{column}++;
5387          $self->{nc}
5388              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5389        } else {
5390          $self->{set_nc}->($self);
5391        }
5392      
5393            redo A;
5394          } else {
5395            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5396                            line => $self->{line_prev},
5397                            column => $self->{column_prev} - 1
5398                                - (length $self->{kwd})
5399                                + 1 * ($self->{nc} == -1));
5400            $self->{state} = BOGUS_COMMENT_STATE;
5401            ## Reconsume.
5402            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5403            redo A;
5404          }
5405        } elsif ($self->{state} == MD_ELEMENT_STATE) {
5406          if ($self->{nc} == {
5407                'EL' => 0x0045, # E
5408                'ELE' => 0x004D, # M
5409                'ELEM' => 0x0045, # E
5410                'ELEME' => 0x004E, # N
5411              }->{$self->{kwd}}) {
5412            ## Stay in the state.
5413            $self->{kwd} .= chr $self->{nc};
5414            
5415        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5416          $self->{line_prev} = $self->{line};
5417          $self->{column_prev} = $self->{column};
5418          $self->{column}++;
5419          $self->{nc}
5420              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5421        } else {
5422          $self->{set_nc}->($self);
5423        }
5424      
5425            redo A;
5426          } elsif ($self->{kwd} eq 'ELEMEN' and
5427                   $self->{nc} == 0x0054) { # T
5428            $self->{ct} = {type => ELEMENT_TOKEN, name => '',
5429                           line => $self->{line_prev},
5430                           column => $self->{column_prev} - 6};
5431            $self->{state} = DOCTYPE_MD_STATE;
5432            
5433        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5434          $self->{line_prev} = $self->{line};
5435          $self->{column_prev} = $self->{column};
5436          $self->{column}++;
5437          $self->{nc}
5438              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5439        } else {
5440          $self->{set_nc}->($self);
5441        }
5442      
5443            redo A;
5444          } else {
5445            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5446                            line => $self->{line_prev},
5447                            column => $self->{column_prev} - 1
5448                                - (length $self->{kwd})
5449                                + 1 * ($self->{nc} == -1));
5450            $self->{state} = BOGUS_COMMENT_STATE;
5451            ## Reconsume.
5452            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5453            redo A;
5454          }
5455        } elsif ($self->{state} == MD_ATTLIST_STATE) {
5456          if ($self->{nc} == {
5457                'A' => 0x0054, # T
5458                'AT' => 0x0054, # T
5459                'ATT' => 0x004C, # L
5460                'ATTL' => 0x0049, # I
5461                'ATTLI' => 0x0053, # S
5462              }->{$self->{kwd}}) {
5463            ## Stay in the state.
5464            $self->{kwd} .= chr $self->{nc};
5465            
5466        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5467          $self->{line_prev} = $self->{line};
5468          $self->{column_prev} = $self->{column};
5469          $self->{column}++;
5470          $self->{nc}
5471              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5472        } else {
5473          $self->{set_nc}->($self);
5474        }
5475      
5476            redo A;
5477          } elsif ($self->{kwd} eq 'ATTLIS' and
5478                   $self->{nc} == 0x0054) { # T
5479            $self->{ct} = {type => ATTLIST_TOKEN, name => '',
5480                           line => $self->{line_prev},
5481                           column => $self->{column_prev} - 6};
5482            $self->{state} = DOCTYPE_MD_STATE;
5483            
5484        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5485          $self->{line_prev} = $self->{line};
5486          $self->{column_prev} = $self->{column};
5487          $self->{column}++;
5488          $self->{nc}
5489              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5490        } else {
5491          $self->{set_nc}->($self);
5492        }
5493      
5494            redo A;
5495          } else {
5496            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5497                            line => $self->{line_prev},
5498                            column => $self->{column_prev} - 1
5499                                 - (length $self->{kwd})
5500                                 + 1 * ($self->{nc} == -1));
5501            $self->{state} = BOGUS_COMMENT_STATE;
5502            ## Reconsume.
5503            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5504            redo A;
5505          }
5506        } elsif ($self->{state} == MD_NOTATION_STATE) {
5507          if ($self->{nc} == {
5508                'N' => 0x004F, # O
5509                'NO' => 0x0054, # T
5510                'NOT' => 0x0041, # A
5511                'NOTA' => 0x0054, # T
5512                'NOTAT' => 0x0049, # I
5513                'NOTATI' => 0x004F, # O
5514              }->{$self->{kwd}}) {
5515            ## Stay in the state.
5516            $self->{kwd} .= chr $self->{nc};
5517            
5518        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5519          $self->{line_prev} = $self->{line};
5520          $self->{column_prev} = $self->{column};
5521          $self->{column}++;
5522          $self->{nc}
5523              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5524        } else {
5525          $self->{set_nc}->($self);
5526        }
5527      
5528            redo A;
5529          } elsif ($self->{kwd} eq 'NOTATIO' and
5530                   $self->{nc} == 0x004E) { # N
5531            $self->{ct} = {type => NOTATION_TOKEN, name => '',
5532                           line => $self->{line_prev},
5533                           column => $self->{column_prev} - 6};
5534            $self->{state} = DOCTYPE_MD_STATE;
5535            
5536        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5537          $self->{line_prev} = $self->{line};
5538          $self->{column_prev} = $self->{column};
5539          $self->{column}++;
5540          $self->{nc}
5541              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5542        } else {
5543          $self->{set_nc}->($self);
5544        }
5545      
5546            redo A;
5547          } else {
5548            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus comment',
5549                            line => $self->{line_prev},
5550                            column => $self->{column_prev} - 1
5551                                - (length $self->{kwd})
5552                                + 1 * ($self->{nc} == -1));
5553            $self->{state} = BOGUS_COMMENT_STATE;
5554            ## Reconsume.
5555            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5556            redo A;
5557          }
5558        } elsif ($self->{state} == DOCTYPE_MD_STATE) {
5559          ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
5560          ## "DOCTYPE NOTATION state".
5561    
5562          if ($is_space->{$self->{nc}}) {
5563            ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
5564            $self->{state} = BEFORE_MD_NAME_STATE;
5565            
5566        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5567          $self->{line_prev} = $self->{line};
5568          $self->{column_prev} = $self->{column};
5569          $self->{column}++;
5570          $self->{nc}
5571              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5572        } else {
5573          $self->{set_nc}->($self);
5574        }
5575      
5576            redo A;
5577          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5578                   $self->{nc} == 0x0025) { # %
5579            ## XML5: Switch to the "DOCTYPE bogus comment state".
5580            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5581            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5582            
5583        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5584          $self->{line_prev} = $self->{line};
5585          $self->{column_prev} = $self->{column};
5586          $self->{column}++;
5587          $self->{nc}
5588              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5589        } else {
5590          $self->{set_nc}->($self);
5591        }
5592      
5593            redo A;
5594          } elsif ($self->{nc} == -1) {
5595            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5596            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5597            ## Reconsume.
5598            redo A;
5599          } elsif ($self->{nc} == 0x003E) { # >
5600            ## XML5: Switch to the "DOCTYPE bogus comment state".
5601            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5602            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5603            
5604        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5605          $self->{line_prev} = $self->{line};
5606          $self->{column_prev} = $self->{column};
5607          $self->{column}++;
5608          $self->{nc}
5609              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5610        } else {
5611          $self->{set_nc}->($self);
5612        }
5613      
5614            redo A;
5615          } else {
5616            ## XML5: Switch to the "DOCTYPE bogus comment state".
5617            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before md name'); ## TODO: type
5618            $self->{state} = BEFORE_MD_NAME_STATE;
5619            redo A;
5620          }
5621        } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
5622          ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
5623          ## before state", "DOCTYPE ATTLIST name before state".
5624    
5625          if ($is_space->{$self->{nc}}) {
5626            ## Stay in the state.
5627            
5628        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5629          $self->{line_prev} = $self->{line};
5630          $self->{column_prev} = $self->{column};
5631          $self->{column}++;
5632          $self->{nc}
5633              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5634        } else {
5635          $self->{set_nc}->($self);
5636        }
5637      
5638            redo A;
5639          } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
5640                   $self->{nc} == 0x0025) { # %
5641            $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
5642            
5643        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5644          $self->{line_prev} = $self->{line};
5645          $self->{column_prev} = $self->{column};
5646          $self->{column}++;
5647          $self->{nc}
5648              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5649        } else {
5650          $self->{set_nc}->($self);
5651        }
5652      
5653            redo A;
5654          } elsif ($self->{nc} == 0x003E) { # >
5655            ## XML5: Same as "Anything else".
5656            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5657            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5658            
5659        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5660          $self->{line_prev} = $self->{line};
5661          $self->{column_prev} = $self->{column};
5662          $self->{column}++;
5663          $self->{nc}
5664              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5665        } else {
5666          $self->{set_nc}->($self);
5667        }
5668      
5669            redo A;
5670          } elsif ($self->{nc} == -1) {
5671            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5672            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5673            ## Reconsume.
5674            redo A;
5675          } else {
5676            ## XML5: [ATTLIST] Not defined yet.
5677            $self->{ct}->{name} .= chr $self->{nc};
5678            $self->{state} = MD_NAME_STATE;
5679            
5680        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5681          $self->{line_prev} = $self->{line};
5682          $self->{column_prev} = $self->{column};
5683          $self->{column}++;
5684          $self->{nc}
5685              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5686        } else {
5687          $self->{set_nc}->($self);
5688        }
5689      
5690            redo A;
5691          }
5692        } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
5693          if ($is_space->{$self->{nc}}) {
5694            ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
5695            $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
5696            $self->{state} = BEFORE_MD_NAME_STATE;
5697            
5698        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5699          $self->{line_prev} = $self->{line};
5700          $self->{column_prev} = $self->{column};
5701          $self->{column}++;
5702          $self->{nc}
5703              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5704        } else {
5705          $self->{set_nc}->($self);
5706        }
5707      
5708            redo A;
5709          } elsif ($self->{nc} == 0x003E) { # >
5710            ## XML5: Same as "Anything else".
5711            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md name'); ## TODO: type
5712            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5713            
5714        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5715          $self->{line_prev} = $self->{line};
5716          $self->{column_prev} = $self->{column};
5717          $self->{column}++;
5718          $self->{nc}
5719              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5720        } else {
5721          $self->{set_nc}->($self);
5722        }
5723      
5724            redo A;
5725          } elsif ($self->{nc} == -1) {
5726            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
5727            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5728            ## Reconsume.
5729            redo A;
5730          } else {
5731            ## XML5: No parse error.
5732            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no space after ENTITY percent'); ## TODO: type
5733            $self->{state} = BOGUS_COMMENT_STATE;
5734            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5735            ## Reconsume.
5736            redo A;
5737          }
5738        } elsif ($self->{state} == MD_NAME_STATE) {
5739          ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
5740          
5741          if ($is_space->{$self->{nc}}) {
5742            ## TODO:
5743            $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
5744            
5745        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5746          $self->{line_prev} = $self->{line};
5747          $self->{column_prev} = $self->{column};
5748          $self->{column}++;
5749          $self->{nc}
5750              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5751        } else {
5752          $self->{set_nc}->($self);
5753        }
5754      
5755            redo A;
5756          } elsif ($self->{nc} == 0x003E) { # >
5757            if ($self->{ct}->{type} == ATTLIST_TOKEN) {
5758              #
5759            } else {
5760              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no md body'); ## TODO: type
5761            }
5762            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5763            
5764        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5765          $self->{line_prev} = $self->{line};
5766          $self->{column_prev} = $self->{column};
5767          $self->{column}++;
5768          $self->{nc}
5769              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5770        } else {
5771          $self->{set_nc}->($self);
5772        }
5773      
5774            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
5775            redo A;
5776          } elsif ($self->{nc} == -1) {
5777            ## XML5: [ATTLIST] No parse error.
5778            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md');
5779            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5780            ## Reconsume.
5781            return  ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
5782            redo A;
5783          } else {
5784            ## XML5: [ATTLIST] Not defined yet.
5785            $self->{ct}->{name} .= chr $self->{nc};
5786            ## Stay in the state.
5787            
5788        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5789          $self->{line_prev} = $self->{line};
5790          $self->{column_prev} = $self->{column};
5791          $self->{column}++;
5792          $self->{nc}
5793              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5794        } else {
5795          $self->{set_nc}->($self);
5796        }
5797      
5798            redo A;
5799          }
5800        } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
5801          if ($is_space->{$self->{nc}}) {
5802            ## Stay in the state.
5803            
5804        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5805          $self->{line_prev} = $self->{line};
5806          $self->{column_prev} = $self->{column};
5807          $self->{column}++;
5808          $self->{nc}
5809              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5810        } else {
5811          $self->{set_nc}->($self);
5812        }
5813      
5814            redo A;
5815          } elsif ($self->{nc} == 0x003E) { # >
5816            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
5817            
5818        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
5819          $self->{line_prev} = $self->{line};
5820          $self->{column_prev} = $self->{column};
5821          $self->{column}++;
5822          $self->{nc}
5823              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
5824        } else {
5825          $self->{set_nc}->($self);
5826        }
5827      
5828            return  ($self->{ct}); # ATTLIST
5829            redo A;
5830          } elsif ($self->{nc} == -1) {
5831            ## XML5: No parse error.
5832            $self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed md'); ## TODO: type
5833            $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
5834            redo A;
5835          } else {
5836            ## XML5: Not defined yet.
5837    
5838            ## TODO: ...
5839    
5840            $self->{state} = BOGUS_COMMENT_STATE;
5841            $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
5842            ## Reconsume.
5843            redo A;
5844          }
5845    
5846      } else {      } else {
5847        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
5848      }      }

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24