/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Tue Oct 14 14:38:59 2008 UTC revision 1.11 by wakaba, Wed Oct 15 10:50:38 2008 UTC
# Line 114  sub HEXREF_HEX_STATE () { 48 } Line 114  sub HEXREF_HEX_STATE () { 48 }
114  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
115  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
116    
117    ## XML states
118    sub PI_STATE () { 51 }
119    sub PI_TARGET_STATE () { 52 }
120    sub PI_TARGET_AFTER_STATE () { 53 }
121    sub PI_DATA_STATE () { 54 }
122    sub PI_AFTER_STATE () { 55 }
123    sub PI_DATA_AFTER_STATE () { 56 }
124    
125  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
126  ## list and descriptions)  ## list and descriptions)
127    
# Line 208  sub _initialize_tokenizer ($) { Line 216  sub _initialize_tokenizer ($) {
216    
217  ## A token has:  ## A token has:
218  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,  ##   ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
219  ##       CHARACTER_TOKEN, or END_OF_FILE_TOKEN  ##       CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
220  ##   ->{name} (DOCTYPE_TOKEN)  ##   ->{name} (DOCTYPE_TOKEN)
221  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)  ##   ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
222    ##   ->{target} (PI_TOKEN)
223  ##   ->{pubid} (DOCTYPE_TOKEN)  ##   ->{pubid} (DOCTYPE_TOKEN)
224  ##   ->{sysid} (DOCTYPE_TOKEN)  ##   ->{sysid} (DOCTYPE_TOKEN)
225  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag  ##   ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
# Line 218  sub _initialize_tokenizer ($) { Line 227  sub _initialize_tokenizer ($) {
227  ##        ->{name}  ##        ->{name}
228  ##        ->{value}  ##        ->{value}
229  ##        ->{has_reference} == 1 or 0  ##        ->{has_reference} == 1 or 0
230  ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)  ##        ->{index}: Index of the attribute in a tag.
231    ##   ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
232    ##   ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
233    ##   ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
234  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.  ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
235  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|  ##     |->{self_closing}| is used to save the value of |$self->{self_closing}|
236  ##     while the token is pushed back to the stack.  ##     while the token is pushed back to the stack.
# Line 498  sub _get_next_token ($) { Line 510  sub _get_next_token ($) {
510        return  ($token);        return  ($token);
511        redo A;        redo A;
512      } elsif ($self->{state} == TAG_OPEN_STATE) {      } elsif ($self->{state} == TAG_OPEN_STATE) {
513          ## XML5: "tag state".
514    
515        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
516          if ($self->{nc} == 0x002F) { # /          if ($self->{nc} == 0x002F) { # /
517                        
# Line 629  sub _get_next_token ($) { Line 643  sub _get_next_token ($) {
643    
644            redo A;            redo A;
645          } elsif ($self->{nc} == 0x003F) { # ?          } elsif ($self->{nc} == 0x003F) { # ?
646                        if ($self->{is_xml}) {
647            $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',              
648                            line => $self->{line_prev},              $self->{state} = PI_STATE;
649                            column => $self->{column_prev});              
650            $self->{state} = BOGUS_COMMENT_STATE;      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
651            $self->{ct} = {type => COMMENT_TOKEN, data => '',        $self->{line_prev} = $self->{line};
652                                      line => $self->{line_prev},        $self->{column_prev} = $self->{column};
653                                      column => $self->{column_prev},        $self->{column}++;
654                                     };        $self->{nc}
655            ## $self->{nc} is intentionally left as is            = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
656            redo A;      } else {
657          } else {        $self->{set_nc}->($self);
658        }
659      
660                redo A;
661              } else {
662                
663                $self->{parse_error}->(level => $self->{level}->{must}, type => 'pio',
664                                line => $self->{line_prev},
665                                column => $self->{column_prev});
666                $self->{state} = BOGUS_COMMENT_STATE;
667                $self->{ct} = {type => COMMENT_TOKEN, data => '',
668                               line => $self->{line_prev},
669                               column => $self->{column_prev},
670                              };
671                ## $self->{nc} is intentionally left as is
672                redo A;
673              }
674            } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
675                        
676            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare stago',
677                            line => $self->{line_prev},                            line => $self->{line_prev},
# Line 655  sub _get_next_token ($) { Line 686  sub _get_next_token ($) {
686                     });                     });
687    
688            redo A;            redo A;
689            } else {
690              ## XML5: "<:" is a parse error.
691              
692              $self->{ct} = {type => START_TAG_TOKEN,
693                                        tag_name => chr ($self->{nc}),
694                                        line => $self->{line_prev},
695                                        column => $self->{column_prev}};
696              $self->{state} = TAG_NAME_STATE;
697              
698        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
699          $self->{line_prev} = $self->{line};
700          $self->{column_prev} = $self->{column};
701          $self->{column}++;
702          $self->{nc}
703              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
704        } else {
705          $self->{set_nc}->($self);
706        }
707      
708              redo A;
709          }          }
710        } else {        } else {
711          die "$0: $self->{content_model} in tag open";          die "$0: $self->{content_model} in tag open";
# Line 663  sub _get_next_token ($) { Line 714  sub _get_next_token ($) {
714        ## NOTE: The "close tag open state" in the spec is implemented as        ## NOTE: The "close tag open state" in the spec is implemented as
715        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.        ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
716    
717          ## XML5: "end tag state".
718    
719        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"        my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
720        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA        if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
721          if (defined $self->{last_stag_name}) {          if (defined $self->{last_stag_name}) {
# Line 724  sub _get_next_token ($) { Line 777  sub _get_next_token ($) {
777        
778          redo A;          redo A;
779        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
           
780          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'empty end tag',
781                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
782                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
783          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
784          $self->{s_kwd} = '';          $self->{s_kwd} = '';
785                    if ($self->{is_xml}) {
786              
787              ## XML5: No parse error.
788              
789              ## NOTE: This parser raises a parse error, since it supports
790              ## XML1, not XML5.
791    
792              ## NOTE: A short end tag token.
793              my $ct = {type => END_TAG_TOKEN,
794                        tag_name => '',
795                        line => $self->{line_prev},
796                        column => $self->{column_prev} - 1,
797                       };
798              
799        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
800          $self->{line_prev} = $self->{line};
801          $self->{column_prev} = $self->{column};
802          $self->{column}++;
803          $self->{nc}
804              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
805        } else {
806          $self->{set_nc}->($self);
807        }
808      
809              return  ($ct);
810            } else {
811              
812              
813      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
814        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
815        $self->{column_prev} = $self->{column};        $self->{column_prev} = $self->{column};
# Line 741  sub _get_next_token ($) { Line 820  sub _get_next_token ($) {
820        $self->{set_nc}->($self);        $self->{set_nc}->($self);
821      }      }
822        
823            }
824          redo A;          redo A;
825        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
826                    
# Line 754  sub _get_next_token ($) { Line 834  sub _get_next_token ($) {
834                   });                   });
835    
836          redo A;          redo A;
837        } else {        } elsif (not $self->{is_xml} or
838                   $is_space->{$self->{nc}}) {
839                    
840          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag');          $self->{parse_error}->(level => $self->{level}->{must}, type => 'bogus end tag',
841                            line => $self->{line_prev}, # "<" of "</"
842                            column => $self->{column_prev} - 1);
843          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
844          $self->{ct} = {type => COMMENT_TOKEN, data => '',          $self->{ct} = {type => COMMENT_TOKEN, data => '',
845                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
# Line 769  sub _get_next_token ($) { Line 852  sub _get_next_token ($) {
852          ## generated from the bogus end tag, as defined in the          ## generated from the bogus end tag, as defined in the
853          ## "bogus comment state" entry.          ## "bogus comment state" entry.
854          redo A;          redo A;
855          } else {
856            ## XML5: "</:" is a parse error.
857            
858            $self->{ct} = {type => END_TAG_TOKEN,
859                           tag_name => chr ($self->{nc}),
860                           line => $l, column => $c};
861            $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
862            
863        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
864          $self->{line_prev} = $self->{line};
865          $self->{column_prev} = $self->{column};
866          $self->{column}++;
867          $self->{nc}
868              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
869        } else {
870          $self->{set_nc}->($self);
871        }
872      
873            redo A;
874        }        }
875      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {      } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
876        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;        my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;
# Line 959  sub _get_next_token ($) { Line 1061  sub _get_next_token ($) {
1061          redo A;          redo A;
1062        }        }
1063      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1064          ## XML5: "Tag attribute name before state".
1065    
1066        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1067                    
1068          ## Stay in the state          ## Stay in the state
# Line 1071  sub _get_next_token ($) { Line 1175  sub _get_next_token ($) {
1175               0x003D => 1, # =               0x003D => 1, # =
1176              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1177                        
1178              ## XML5: Not a parse error.
1179            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1180          } else {          } else {
1181                        
1182              ## XML5: ":" raises a parse error and is ignored.
1183          }          }
1184          $self->{ca}          $self->{ca}
1185              = {name => chr ($self->{nc}),              = {name => chr ($self->{nc}),
# Line 1094  sub _get_next_token ($) { Line 1200  sub _get_next_token ($) {
1200          redo A;          redo A;
1201        }        }
1202      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1203          ## XML5: "Tag attribute name state".
1204    
1205        my $before_leave = sub {        my $before_leave = sub {
1206          if (exists $self->{ct}->{attributes} # start tag or end tag          if (exists $self->{ct}->{attributes} # start tag or end tag
1207              ->{$self->{ca}->{name}}) { # MUST              ->{$self->{ca}->{name}}) { # MUST
# Line 1104  sub _get_next_token ($) { Line 1212  sub _get_next_token ($) {
1212                        
1213            $self->{ct}->{attributes}->{$self->{ca}->{name}}            $self->{ct}->{attributes}->{$self->{ca}->{name}}
1214              = $self->{ca};              = $self->{ca};
1215              $self->{ca}->{index} = ++$self->{ct}->{last_index};
1216          }          }
1217        }; # $before_leave        }; # $before_leave
1218    
# Line 1140  sub _get_next_token ($) { Line 1249  sub _get_next_token ($) {
1249        
1250          redo A;          redo A;
1251        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1252            if ($self->{is_xml}) {
1253              
1254              ## XML5: Not a parse error.
1255              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1256            } else {
1257              
1258            }
1259    
1260          $before_leave->();          $before_leave->();
1261          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1262                        
# Line 1189  sub _get_next_token ($) { Line 1306  sub _get_next_token ($) {
1306        
1307          redo A;          redo A;
1308        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1309            if ($self->{is_xml}) {
1310              
1311              ## XML5: Not a parse error.
1312              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1313            } else {
1314              
1315            }
1316                    
1317          $before_leave->();          $before_leave->();
1318          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
# Line 1233  sub _get_next_token ($) { Line 1357  sub _get_next_token ($) {
1357          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1358              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1359                        
1360              ## XML5: Not a parse error.
1361            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1362          } else {          } else {
1363                        
# Line 1253  sub _get_next_token ($) { Line 1378  sub _get_next_token ($) {
1378          redo A;          redo A;
1379        }        }
1380      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1381          ## XML5: "Tag attribute name after state".
1382          
1383        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1384                    
1385          ## Stay in the state          ## Stay in the state
# Line 1284  sub _get_next_token ($) { Line 1411  sub _get_next_token ($) {
1411        
1412          redo A;          redo A;
1413        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1414            if ($self->{is_xml}) {
1415              
1416              ## XML5: Not a parse error.
1417              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1418            } else {
1419              
1420            }
1421    
1422          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
1423                        
1424            $self->{last_stag_name} = $self->{ct}->{tag_name};            $self->{last_stag_name} = $self->{ct}->{tag_name};
# Line 1337  sub _get_next_token ($) { Line 1472  sub _get_next_token ($) {
1472        
1473          redo A;          redo A;
1474        } elsif ($self->{nc} == 0x002F) { # /        } elsif ($self->{nc} == 0x002F) { # /
1475            if ($self->{is_xml}) {
1476              
1477              ## XML5: Not a parse error.
1478              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1479            } else {
1480              
1481            }
1482                    
1483          $self->{state} = SELF_CLOSING_START_TAG_STATE;          $self->{state} = SELF_CLOSING_START_TAG_STATE;
1484                    
# Line 1376  sub _get_next_token ($) { Line 1518  sub _get_next_token ($) {
1518    
1519          redo A;          redo A;
1520        } else {        } else {
1521            if ($self->{is_xml}) {
1522              
1523              ## XML5: Not a parse error.
1524              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no attr value'); ## TODO: type
1525            } else {
1526              
1527            }
1528    
1529          if ($self->{nc} == 0x0022 or # "          if ($self->{nc} == 0x0022 or # "
1530              $self->{nc} == 0x0027) { # '              $self->{nc} == 0x0027) { # '
1531                        
1532              ## XML5: Not a parse error.
1533            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name');
1534          } else {          } else {
1535                        
# Line 1402  sub _get_next_token ($) { Line 1553  sub _get_next_token ($) {
1553          redo A;                  redo A;        
1554        }        }
1555      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1556          ## XML5: "Tag attribute value before state".
1557    
1558        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1559                    
1560          ## Stay in the state          ## Stay in the state
# Line 1513  sub _get_next_token ($) { Line 1666  sub _get_next_token ($) {
1666        } else {        } else {
1667          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D) { # =
1668                        
1669              ## XML5: Not a parse error.
1670            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1671            } elsif ($self->{is_xml}) {
1672              
1673              ## XML5: No parse error.
1674              $self->{parse_error}->(level => $self->{level}->{must}, type => 'unquoted attr value'); ## TODO
1675          } else {          } else {
1676                        
1677          }          }
# Line 1533  sub _get_next_token ($) { Line 1691  sub _get_next_token ($) {
1691          redo A;          redo A;
1692        }        }
1693      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1694          ## XML5: "Tag attribute value double quoted state".
1695          
1696        if ($self->{nc} == 0x0022) { # "        if ($self->{nc} == 0x0022) { # "
1697                    
1698            ## XML5: "Tag attribute name before state".
1699          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1700                    
1701      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1550  sub _get_next_token ($) { Line 1711  sub _get_next_token ($) {
1711          redo A;          redo A;
1712        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1713                    
1714            ## XML5: Not defined yet.
1715    
1716          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1717          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1718          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1594  sub _get_next_token ($) { Line 1757  sub _get_next_token ($) {
1757    
1758          redo A;          redo A;
1759        } else {        } else {
1760                    if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1761              
1762              ## XML5: Not a parse error.
1763              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1764            } else {
1765              
1766            }
1767          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1768          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1769                                q["&],                                q["&<],
1770                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1771    
1772          ## Stay in the state          ## Stay in the state
# Line 1615  sub _get_next_token ($) { Line 1784  sub _get_next_token ($) {
1784          redo A;          redo A;
1785        }        }
1786      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1787          ## XML5: "Tag attribute value single quoted state".
1788    
1789        if ($self->{nc} == 0x0027) { # '        if ($self->{nc} == 0x0027) { # '
1790                    
1791            ## XML5: "Before attribute name state" (sic).
1792          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;          $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1793                    
1794      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1632  sub _get_next_token ($) { Line 1804  sub _get_next_token ($) {
1804          redo A;          redo A;
1805        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1806                    
1807            ## XML5: Not defined yet.
1808    
1809          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1810          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1811          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1676  sub _get_next_token ($) { Line 1850  sub _get_next_token ($) {
1850    
1851          redo A;          redo A;
1852        } else {        } else {
1853                    if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1854              
1855              ## XML5: Not a parse error.
1856              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lt in attr value'); ## TODO: type
1857            } else {
1858              
1859            }
1860          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1861          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1862                                q['&],                                q['&<],
1863                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1864    
1865          ## Stay in the state          ## Stay in the state
# Line 1697  sub _get_next_token ($) { Line 1877  sub _get_next_token ($) {
1877          redo A;          redo A;
1878        }        }
1879      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1880          ## XML5: "Tag attribute value unquoted state".
1881    
1882        if ($is_space->{$self->{nc}}) {        if ($is_space->{$self->{nc}}) {
1883                    
1884            ## XML5: "Tag attribute name before state".
1885          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1886                    
1887      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
# Line 1714  sub _get_next_token ($) { Line 1897  sub _get_next_token ($) {
1897          redo A;          redo A;
1898        } elsif ($self->{nc} == 0x0026) { # &        } elsif ($self->{nc} == 0x0026) { # &
1899                    
1900    
1901            ## XML5: Not defined yet.
1902    
1903          ## NOTE: In the spec, the tokenizer is switched to the          ## NOTE: In the spec, the tokenizer is switched to the
1904          ## "entity in attribute value state".  In this implementation, the          ## "entity in attribute value state".  In this implementation, the
1905          ## tokenizer is switched to the |ENTITY_STATE|, which is an          ## tokenizer is switched to the |ENTITY_STATE|, which is an
# Line 1797  sub _get_next_token ($) { Line 1983  sub _get_next_token ($) {
1983               0x003D => 1, # =               0x003D => 1, # =
1984              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1985                        
1986              ## XML5: Not a parse error.
1987            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute value');
1988          } else {          } else {
1989                        
# Line 1913  sub _get_next_token ($) { Line 2100  sub _get_next_token ($) {
2100          redo A;          redo A;
2101        }        }
2102      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {      } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
2103          ## XML5: "Empty tag state".
2104    
2105        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2106          if ($self->{ct}->{type} == END_TAG_TOKEN) {          if ($self->{ct}->{type} == END_TAG_TOKEN) {
2107                        
# Line 1964  sub _get_next_token ($) { Line 2153  sub _get_next_token ($) {
2153          } else {          } else {
2154            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
2155          }          }
2156            ## XML5: "Tag attribute name before state".
2157          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2158          $self->{s_kwd} = '';          $self->{s_kwd} = '';
2159          ## Reconsume.          ## Reconsume.
# Line 2106  sub _get_next_token ($) { Line 2296  sub _get_next_token ($) {
2296                                    line => $self->{line_prev},                                    line => $self->{line_prev},
2297                                    column => $self->{column_prev} - 2,                                    column => $self->{column_prev} - 2,
2298                                   };                                   };
2299          $self->{state} = COMMENT_START_STATE;          $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
2300                    
2301      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {      if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
2302        $self->{line_prev} = $self->{line};        $self->{line_prev} = $self->{line};
# Line 2169  sub _get_next_token ($) { Line 2359  sub _get_next_token ($) {
2359        } elsif ((length $self->{s_kwd}) == 6 and        } elsif ((length $self->{s_kwd}) == 6 and
2360                 ($self->{nc} == 0x0045 or # E                 ($self->{nc} == 0x0045 or # E
2361                  $self->{nc} == 0x0065)) { # e                  $self->{nc} == 0x0065)) { # e
2362                    if ($self->{s_kwd} ne 'DOCTYP') {
2363              
2364              ## XML5: case-sensitive.
2365              $self->{parse_error}->(level => $self->{level}->{must}, type => 'lowercase keyword', ## TODO
2366                              text => 'DOCTYPE',
2367                              line => $self->{line_prev},
2368                              column => $self->{column_prev} - 5);
2369            } else {
2370              
2371            }
2372          $self->{state} = DOCTYPE_STATE;          $self->{state} = DOCTYPE_STATE;
2373          $self->{ct} = {type => DOCTYPE_TOKEN,          $self->{ct} = {type => DOCTYPE_TOKEN,
2374                                    quirks => 1,                                    quirks => 1,
# Line 2227  sub _get_next_token ($) { Line 2426  sub _get_next_token ($) {
2426          redo A;          redo A;
2427        } elsif ($self->{s_kwd} eq '[CDATA' and        } elsif ($self->{s_kwd} eq '[CDATA' and
2428                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
2429                    if ($self->{is_xml} and
2430                not $self->{tainted} and
2431                @{$self->{open_elements} or []} == 0) {
2432              
2433              $self->{parse_error}->(level => $self->{level}->{must}, type => 'cdata outside of root element',
2434                              line => $self->{line_prev},
2435                              column => $self->{column_prev} - 7);
2436              $self->{tainted} = 1;
2437            } else {
2438              
2439            }
2440    
2441          $self->{ct} = {type => CHARACTER_TOKEN,          $self->{ct} = {type => CHARACTER_TOKEN,
2442                                    data => '',                                    data => '',
2443                                    line => $self->{line_prev},                                    line => $self->{line_prev},
# Line 2435  sub _get_next_token ($) { Line 2645  sub _get_next_token ($) {
2645          redo A;          redo A;
2646        }        }
2647      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {      } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2648          ## XML5: "comment dash state".
2649    
2650        if ($self->{nc} == 0x002D) { # -        if ($self->{nc} == 0x002D) { # -
2651                    
2652          $self->{state} = COMMENT_END_STATE;          $self->{state} = COMMENT_END_STATE;
# Line 2500  sub _get_next_token ($) { Line 2712  sub _get_next_token ($) {
2712          redo A;          redo A;
2713        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2714                    
2715            ## XML5: Not a parse error.
2716          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2717                          line => $self->{line_prev},                          line => $self->{line_prev},
2718                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 2529  sub _get_next_token ($) { Line 2742  sub _get_next_token ($) {
2742          redo A;          redo A;
2743        } else {        } else {
2744                    
2745            ## XML5: Not a parse error.
2746          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',          $self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment',
2747                          line => $self->{line_prev},                          line => $self->{line_prev},
2748                          column => $self->{column_prev});                          column => $self->{column_prev});
# Line 3614  sub _get_next_token ($) { Line 3828  sub _get_next_token ($) {
3828        ## NOTE: "CDATA section state" in the state is jointly implemented        ## NOTE: "CDATA section state" in the state is jointly implemented
3829        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,        ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
3830        ## and |CDATA_SECTION_MSE2_STATE|.        ## and |CDATA_SECTION_MSE2_STATE|.
3831    
3832          ## XML5: "CDATA state".
3833                
3834        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
3835                    
# Line 3631  sub _get_next_token ($) { Line 3847  sub _get_next_token ($) {
3847        
3848          redo A;          redo A;
3849        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
3850            if ($self->{is_xml}) {
3851              
3852              $self->{parse_error}->(level => $self->{level}->{must}, type => 'no mse'); ## TODO: type
3853            } else {
3854              
3855            }
3856    
3857          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3858          $self->{s_kwd} = '';          $self->{s_kwd} = '';
3859                    ## Reconsume.
     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {  
       $self->{line_prev} = $self->{line};  
       $self->{column_prev} = $self->{column};  
       $self->{column}++;  
       $self->{nc}  
           = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);  
     } else {  
       $self->{set_nc}->($self);  
     }  
     
3860          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
3861                        
3862            return  ($self->{ct}); # character            return  ($self->{ct}); # character
# Line 3676  sub _get_next_token ($) { Line 3889  sub _get_next_token ($) {
3889    
3890        ## ISSUE: "text tokens" in spec.        ## ISSUE: "text tokens" in spec.
3891      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
3892          ## XML5: "CDATA bracket state".
3893    
3894        if ($self->{nc} == 0x005D) { # ]        if ($self->{nc} == 0x005D) { # ]
3895                    
3896          $self->{state} = CDATA_SECTION_MSE2_STATE;          $self->{state} = CDATA_SECTION_MSE2_STATE;
# Line 3693  sub _get_next_token ($) { Line 3908  sub _get_next_token ($) {
3908          redo A;          redo A;
3909        } else {        } else {
3910                    
3911            ## XML5: If EOF, "]" is not appended and changed to the data state.
3912          $self->{ct}->{data} .= ']';          $self->{ct}->{data} .= ']';
3913          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
3914          ## Reconsume.          ## Reconsume.
3915          redo A;          redo A;
3916        }        }
3917      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
3918          ## XML5: "CDATA end state".
3919    
3920        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
3921          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
3922          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 3741  sub _get_next_token ($) { Line 3959  sub _get_next_token ($) {
3959                    
3960          $self->{ct}->{data} .= ']]'; # character          $self->{ct}->{data} .= ']]'; # character
3961          $self->{state} = CDATA_SECTION_STATE;          $self->{state} = CDATA_SECTION_STATE;
3962          ## Reconsume.          ## Reconsume. ## XML5: Emit.
3963          redo A;          redo A;
3964        }        }
3965      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
# Line 3952  sub _get_next_token ($) { Line 4170  sub _get_next_token ($) {
4170          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4171          ## Reconsume.          ## Reconsume.
4172          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4173                      has_reference => 1,
4174                    line => $l, column => $c,                    line => $l, column => $c,
4175                   });                   });
4176          redo A;          redo A;
# Line 4104  sub _get_next_token ($) { Line 4323  sub _get_next_token ($) {
4323          $self->{s_kwd} = '';          $self->{s_kwd} = '';
4324          ## Reconsume.          ## Reconsume.
4325          return  ({type => CHARACTER_TOKEN, data => chr $code,          return  ({type => CHARACTER_TOKEN, data => chr $code,
4326                      has_reference => 1,
4327                    line => $l, column => $c,                    line => $l, column => $c,
4328                   });                   });
4329          redo A;          redo A;
# Line 4229  sub _get_next_token ($) { Line 4449  sub _get_next_token ($) {
4449          ## Reconsume.          ## Reconsume.
4450          return  ({type => CHARACTER_TOKEN,          return  ({type => CHARACTER_TOKEN,
4451                    data => $data,                    data => $data,
4452                      has_reference => $has_ref,
4453                    line => $self->{line_prev},                    line => $self->{line_prev},
4454                    column => $self->{column_prev} + 1 - length $self->{s_kwd},                    column => $self->{column_prev} + 1 - length $self->{s_kwd},
4455                   });                   });
# Line 4242  sub _get_next_token ($) { Line 4463  sub _get_next_token ($) {
4463          ## Reconsume.          ## Reconsume.
4464          redo A;          redo A;
4465        }        }
4466    
4467        ## XML-only states
4468    
4469        } elsif ($self->{state} == PI_STATE) {
4470          if ($is_space->{$self->{nc}} or
4471              $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"
4472              $self->{nc} == -1) {
4473            $self->{parse_error}->(level => $self->{level}->{must}, type => 'bare pio', ## TODO: type
4474                            line => $self->{line_prev},
4475                            column => $self->{column_prev}
4476                                - 1 * ($self->{nc} != -1));
4477            $self->{state} = BOGUS_COMMENT_STATE;
4478            ## Reconsume.
4479            $self->{ct} = {type => COMMENT_TOKEN,
4480                           data => '?',
4481                           line => $self->{line_prev},
4482                           column => $self->{column_prev}
4483                               - 1 * ($self->{nc} != -1),
4484                          };
4485            redo A;
4486          } else {
4487            $self->{ct} = {type => PI_TOKEN,
4488                           target => chr $self->{nc},
4489                           data => '',
4490                           line => $self->{line_prev},
4491                           column => $self->{column_prev} - 1,
4492                          };
4493            $self->{state} = PI_TARGET_STATE;
4494            
4495        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4496          $self->{line_prev} = $self->{line};
4497          $self->{column_prev} = $self->{column};
4498          $self->{column}++;
4499          $self->{nc}
4500              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4501        } else {
4502          $self->{set_nc}->($self);
4503        }
4504      
4505            redo A;
4506          }
4507        } elsif ($self->{state} == PI_TARGET_STATE) {
4508          if ($is_space->{$self->{nc}}) {
4509            $self->{state} = PI_TARGET_AFTER_STATE;
4510            
4511        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4512          $self->{line_prev} = $self->{line};
4513          $self->{column_prev} = $self->{column};
4514          $self->{column}++;
4515          $self->{nc}
4516              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4517        } else {
4518          $self->{set_nc}->($self);
4519        }
4520      
4521            redo A;
4522          } elsif ($self->{nc} == -1) {
4523            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4524            $self->{state} = DATA_STATE;
4525            $self->{s_kwd} = '';
4526            ## Reconsume.
4527            return  ($self->{ct}); # pi
4528            redo A;
4529          } elsif ($self->{nc} == 0x003F) { # ?
4530            $self->{state} = PI_AFTER_STATE;
4531            
4532        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4533          $self->{line_prev} = $self->{line};
4534          $self->{column_prev} = $self->{column};
4535          $self->{column}++;
4536          $self->{nc}
4537              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4538        } else {
4539          $self->{set_nc}->($self);
4540        }
4541      
4542            redo A;
4543          } else {
4544            ## XML5: typo ("tag name" -> "target")
4545            $self->{ct}->{target} .= chr $self->{nc}; # pi
4546            
4547        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4548          $self->{line_prev} = $self->{line};
4549          $self->{column_prev} = $self->{column};
4550          $self->{column}++;
4551          $self->{nc}
4552              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4553        } else {
4554          $self->{set_nc}->($self);
4555        }
4556      
4557            redo A;
4558          }
4559        } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
4560          if ($is_space->{$self->{nc}}) {
4561            ## Stay in the state.
4562            
4563        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4564          $self->{line_prev} = $self->{line};
4565          $self->{column_prev} = $self->{column};
4566          $self->{column}++;
4567          $self->{nc}
4568              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4569        } else {
4570          $self->{set_nc}->($self);
4571        }
4572      
4573            redo A;
4574          } else {
4575            $self->{state} = PI_DATA_STATE;
4576            ## Reprocess.
4577            redo A;
4578          }
4579        } elsif ($self->{state} == PI_DATA_STATE) {
4580          if ($self->{nc} == 0x003F) { # ?
4581            $self->{state} = PI_DATA_AFTER_STATE;
4582            
4583        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4584          $self->{line_prev} = $self->{line};
4585          $self->{column_prev} = $self->{column};
4586          $self->{column}++;
4587          $self->{nc}
4588              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4589        } else {
4590          $self->{set_nc}->($self);
4591        }
4592      
4593            redo A;
4594          } elsif ($self->{nc} == -1) {
4595            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no pic'); ## TODO: type
4596            $self->{state} = DATA_STATE;
4597            $self->{s_kwd} = '';
4598            ## Reprocess.
4599            return  ($self->{ct}); # pi
4600            redo A;
4601          } else {
4602            $self->{ct}->{data} .= chr $self->{nc}; # pi
4603            $self->{read_until}->($self->{ct}->{data}, q[?],
4604                                  length $self->{ct}->{data});
4605            ## Stay in the state.
4606            
4607        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4608          $self->{line_prev} = $self->{line};
4609          $self->{column_prev} = $self->{column};
4610          $self->{column}++;
4611          $self->{nc}
4612              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4613        } else {
4614          $self->{set_nc}->($self);
4615        }
4616      
4617            ## Reprocess.
4618            redo A;
4619          }
4620        } elsif ($self->{state} == PI_AFTER_STATE) {
4621          if ($self->{nc} == 0x003E) { # >
4622            $self->{state} = DATA_STATE;
4623            $self->{s_kwd} = '';
4624            
4625        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4626          $self->{line_prev} = $self->{line};
4627          $self->{column_prev} = $self->{column};
4628          $self->{column}++;
4629          $self->{nc}
4630              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4631        } else {
4632          $self->{set_nc}->($self);
4633        }
4634      
4635            return  ($self->{ct}); # pi
4636            redo A;
4637          } elsif ($self->{nc} == 0x003F) { # ?
4638            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4639                            line => $self->{line_prev},
4640                            column => $self->{column_prev}); ## XML5: no error
4641            $self->{ct}->{data} .= '?';
4642            $self->{state} = PI_DATA_AFTER_STATE;
4643            
4644        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4645          $self->{line_prev} = $self->{line};
4646          $self->{column_prev} = $self->{column};
4647          $self->{column}++;
4648          $self->{nc}
4649              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4650        } else {
4651          $self->{set_nc}->($self);
4652        }
4653      
4654            redo A;
4655          } else {
4656            $self->{parse_error}->(level => $self->{level}->{must}, type => 'no s after target', ## TODO: type
4657                            line => $self->{line_prev},
4658                            column => $self->{column_prev}
4659                                + 1 * ($self->{nc} == -1)); ## XML5: no error
4660            $self->{ct}->{data} .= '?'; ## XML5: not appended
4661            $self->{state} = PI_DATA_STATE;
4662            ## Reprocess.
4663            redo A;
4664          }
4665        } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
4666          ## XML5: Same as "pi after state" in XML5
4667          if ($self->{nc} == 0x003E) { # >
4668            $self->{state} = DATA_STATE;
4669            $self->{s_kwd} = '';
4670            
4671        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4672          $self->{line_prev} = $self->{line};
4673          $self->{column_prev} = $self->{column};
4674          $self->{column}++;
4675          $self->{nc}
4676              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4677        } else {
4678          $self->{set_nc}->($self);
4679        }
4680      
4681            return  ($self->{ct}); # pi
4682            redo A;
4683          } elsif ($self->{nc} == 0x003F) { # ?
4684            $self->{ct}->{data} .= '?';
4685            ## Stay in the state.
4686            
4687        if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
4688          $self->{line_prev} = $self->{line};
4689          $self->{column_prev} = $self->{column};
4690          $self->{column}++;
4691          $self->{nc}
4692              = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
4693        } else {
4694          $self->{set_nc}->($self);
4695        }
4696      
4697            redo A;
4698          } else {
4699            $self->{ct}->{data} .= '?'; ## XML5: not appended
4700            $self->{state} = PI_DATA_STATE;
4701            ## Reprocess.
4702            redo A;
4703          }
4704            
4705      } else {      } else {
4706        die "$0: $self->{state}: Unknown state";        die "$0: $self->{state}: Unknown state";
4707      }      }

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.11

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24