/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.116 by wakaba, Mon Mar 17 13:23:39 2008 UTC revision 1.120 by wakaba, Thu Mar 20 03:57:00 2008 UTC
# Line 466  sub _get_next_token ($) { Line 466  sub _get_next_token ($) {
466        # Anything else        # Anything else
467        my $token = {type => CHARACTER_TOKEN,        my $token = {type => CHARACTER_TOKEN,
468                     data => chr $self->{next_char},                     data => chr $self->{next_char},
469                     line => $self->{line}, column => $self->{column}};                     line => $self->{line}, column => $self->{column},
470                      };
471        ## Stay in the data state        ## Stay in the data state
472        !!!next-input-character;        !!!next-input-character;
473    
# Line 486  sub _get_next_token ($) { Line 487  sub _get_next_token ($) {
487        unless (defined $token) {        unless (defined $token) {
488          !!!cp (13);          !!!cp (13);
489          !!!emit ({type => CHARACTER_TOKEN, data => '&',          !!!emit ({type => CHARACTER_TOKEN, data => '&',
490                    line => $l, column => $c});                    line => $l, column => $c,
491                     });
492        } else {        } else {
493          !!!cp (14);          !!!cp (14);
494          !!!emit ($token);          !!!emit ($token);
# Line 507  sub _get_next_token ($) { Line 509  sub _get_next_token ($) {
509    
510            !!!emit ({type => CHARACTER_TOKEN, data => '<',            !!!emit ({type => CHARACTER_TOKEN, data => '<',
511                      line => $self->{line_prev},                      line => $self->{line_prev},
512                      column => $self->{column_prev}});                      column => $self->{column_prev},
513                       });
514    
515            redo A;            redo A;
516          }          }
# Line 553  sub _get_next_token ($) { Line 556  sub _get_next_token ($) {
556    
557            !!!emit ({type => CHARACTER_TOKEN, data => '<>',            !!!emit ({type => CHARACTER_TOKEN, data => '<>',
558                      line => $self->{line_prev},                      line => $self->{line_prev},
559                      column => $self->{column_prev}});                      column => $self->{column_prev},
560                       });
561    
562            redo A;            redo A;
563          } elsif ($self->{next_char} == 0x003F) { # ?          } elsif ($self->{next_char} == 0x003F) { # ?
# Line 564  sub _get_next_token ($) { Line 568  sub _get_next_token ($) {
568            $self->{state} = BOGUS_COMMENT_STATE;            $self->{state} = BOGUS_COMMENT_STATE;
569            $self->{current_token} = {type => COMMENT_TOKEN, data => '',            $self->{current_token} = {type => COMMENT_TOKEN, data => '',
570                                      line => $self->{line_prev},                                      line => $self->{line_prev},
571                                      column => $self->{column_prev}};                                      column => $self->{column_prev},
572                                       };
573            ## $self->{next_char} is intentionally left as is            ## $self->{next_char} is intentionally left as is
574            redo A;            redo A;
575          } else {          } else {
# Line 575  sub _get_next_token ($) { Line 580  sub _get_next_token ($) {
580    
581            !!!emit ({type => CHARACTER_TOKEN, data => '<',            !!!emit ({type => CHARACTER_TOKEN, data => '<',
582                      line => $self->{line_prev},                      line => $self->{line_prev},
583                      column => $self->{column_prev}});                      column => $self->{column_prev},
584                       });
585    
586            redo A;            redo A;
587          }          }
# Line 604  sub _get_next_token ($) { Line 610  sub _get_next_token ($) {
610                $self->{state} = DATA_STATE;                $self->{state} = DATA_STATE;
611    
612                !!!emit ({type => CHARACTER_TOKEN, data => '</',                !!!emit ({type => CHARACTER_TOKEN, data => '</',
613                          line => $l, column => $c});                          line => $l, column => $c,
614                           });
615        
616                redo A;                redo A;
617              }              }
# Line 624  sub _get_next_token ($) { Line 631  sub _get_next_token ($) {
631              !!!back-next-input-character (@next_char);              !!!back-next-input-character (@next_char);
632              $self->{state} = DATA_STATE;              $self->{state} = DATA_STATE;
633              !!!emit ({type => CHARACTER_TOKEN, data => '</',              !!!emit ({type => CHARACTER_TOKEN, data => '</',
634                        line => $l, column => $c});                        line => $l, column => $c,
635                         });
636              redo A;              redo A;
637            } else {            } else {
638              !!!cp (27);              !!!cp (27);
# Line 638  sub _get_next_token ($) { Line 646  sub _get_next_token ($) {
646            # next-input-character is already done            # next-input-character is already done
647            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
648            !!!emit ({type => CHARACTER_TOKEN, data => '</',            !!!emit ({type => CHARACTER_TOKEN, data => '</',
649                      line => $l, column => $c});                      line => $l, column => $c,
650                       });
651            redo A;            redo A;
652          }          }
653        }        }
# Line 677  sub _get_next_token ($) { Line 686  sub _get_next_token ($) {
686          # reconsume          # reconsume
687    
688          !!!emit ({type => CHARACTER_TOKEN, data => '</',          !!!emit ({type => CHARACTER_TOKEN, data => '</',
689                    line => $l, column => $c});                    line => $l, column => $c,
690                     });
691    
692          redo A;          redo A;
693        } else {        } else {
# Line 686  sub _get_next_token ($) { Line 696  sub _get_next_token ($) {
696          $self->{state} = BOGUS_COMMENT_STATE;          $self->{state} = BOGUS_COMMENT_STATE;
697          $self->{current_token} = {type => COMMENT_TOKEN, data => '',          $self->{current_token} = {type => COMMENT_TOKEN, data => '',
698                                    line => $self->{line_prev}, # "<" of "</"                                    line => $self->{line_prev}, # "<" of "</"
699                                    column => $self->{column_prev} - 1};                                    column => $self->{column_prev} - 1,
700                                     };
701          ## $self->{next_char} is intentionally left as is          ## $self->{next_char} is intentionally left as is
702          redo A;          redo A;
703        }        }
# Line 703  sub _get_next_token ($) { Line 714  sub _get_next_token ($) {
714        } elsif ($self->{next_char} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
715          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
716            !!!cp (35);            !!!cp (35);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
717            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
718          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
719            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 736  sub _get_next_token ($) { Line 745  sub _get_next_token ($) {
745          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
746          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
747            !!!cp (39);            !!!cp (39);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
748            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
749          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
750            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 793  sub _get_next_token ($) { Line 800  sub _get_next_token ($) {
800        } elsif ($self->{next_char} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
801          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
802            !!!cp (46);            !!!cp (46);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
803            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
804          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
805            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 816  sub _get_next_token ($) { Line 821  sub _get_next_token ($) {
821        } elsif (0x0041 <= $self->{next_char} and        } elsif (0x0041 <= $self->{next_char} and
822                 $self->{next_char} <= 0x005A) { # A..Z                 $self->{next_char} <= 0x005A) { # A..Z
823          !!!cp (49);          !!!cp (49);
824          $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),          $self->{current_attribute}
825                                value => ''};              = {name => chr ($self->{next_char} + 0x0020),
826                   value => '',
827                   line => $self->{line}, column => $self->{column}};
828          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
829          !!!next-input-character;          !!!next-input-character;
830          redo A;          redo A;
# Line 840  sub _get_next_token ($) { Line 847  sub _get_next_token ($) {
847          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
848          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
849            !!!cp (52);            !!!cp (52);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
850            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
851          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
852            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 871  sub _get_next_token ($) { Line 876  sub _get_next_token ($) {
876          } else {          } else {
877            !!!cp (56);            !!!cp (56);
878          }          }
879          $self->{current_attribute} = {name => chr ($self->{next_char}),          $self->{current_attribute}
880                                value => ''};              = {name => chr ($self->{next_char}),
881                   value => '',
882                   line => $self->{line}, column => $self->{column}};
883          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
884          !!!next-input-character;          !!!next-input-character;
885          redo A;          redo A;
# Line 882  sub _get_next_token ($) { Line 889  sub _get_next_token ($) {
889          if (exists $self->{current_token}->{attributes} # start tag or end tag          if (exists $self->{current_token}->{attributes} # start tag or end tag
890              ->{$self->{current_attribute}->{name}}) { # MUST              ->{$self->{current_attribute}->{name}}) { # MUST
891            !!!cp (57);            !!!cp (57);
892            !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name});            !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
893            ## Discard $self->{current_attribute} # MUST            ## Discard $self->{current_attribute} # MUST
894          } else {          } else {
895            !!!cp (58);            !!!cp (58);
# Line 911  sub _get_next_token ($) { Line 918  sub _get_next_token ($) {
918          $before_leave->();          $before_leave->();
919          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
920            !!!cp (61);            !!!cp (61);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
921            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
922          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
923            !!!cp (62);            !!!cp (62);
# Line 957  sub _get_next_token ($) { Line 962  sub _get_next_token ($) {
962          $before_leave->();          $before_leave->();
963          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
964            !!!cp (66);            !!!cp (66);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
965            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
966          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
967            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1009  sub _get_next_token ($) { Line 1012  sub _get_next_token ($) {
1012        } elsif ($self->{next_char} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1013          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1014            !!!cp (73);            !!!cp (73);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1015            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1016          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1017            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1033  sub _get_next_token ($) { Line 1034  sub _get_next_token ($) {
1034        } elsif (0x0041 <= $self->{next_char} and        } elsif (0x0041 <= $self->{next_char} and
1035                 $self->{next_char} <= 0x005A) { # A..Z                 $self->{next_char} <= 0x005A) { # A..Z
1036          !!!cp (76);          !!!cp (76);
1037          $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),          $self->{current_attribute}
1038                                value => ''};              = {name => chr ($self->{next_char} + 0x0020),
1039                   value => '',
1040                   line => $self->{line}, column => $self->{column}};
1041          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
1042          !!!next-input-character;          !!!next-input-character;
1043          redo A;          redo A;
# Line 1058  sub _get_next_token ($) { Line 1061  sub _get_next_token ($) {
1061          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
1062          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1063            !!!cp (79);            !!!cp (79);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1064            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1065          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1066            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1081  sub _get_next_token ($) { Line 1082  sub _get_next_token ($) {
1082          redo A;          redo A;
1083        } else {        } else {
1084          !!!cp (82);          !!!cp (82);
1085          $self->{current_attribute} = {name => chr ($self->{next_char}),          $self->{current_attribute}
1086                                value => ''};              = {name => chr ($self->{next_char}),
1087                   value => '',
1088                   line => $self->{line}, column => $self->{column}};
1089          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
1090          !!!next-input-character;          !!!next-input-character;
1091          redo A;                  redo A;        
# Line 1115  sub _get_next_token ($) { Line 1118  sub _get_next_token ($) {
1118        } elsif ($self->{next_char} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1119          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1120            !!!cp (87);            !!!cp (87);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1121            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1122          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1123            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1140  sub _get_next_token ($) { Line 1141  sub _get_next_token ($) {
1141          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
1142          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1143            !!!cp (90);            !!!cp (90);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1144            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1145          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1146            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1189  sub _get_next_token ($) { Line 1188  sub _get_next_token ($) {
1188          !!!parse-error (type => 'unclosed attribute value');          !!!parse-error (type => 'unclosed attribute value');
1189          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1190            !!!cp (97);            !!!cp (97);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1191            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1192          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1193            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1233  sub _get_next_token ($) { Line 1230  sub _get_next_token ($) {
1230          !!!parse-error (type => 'unclosed attribute value');          !!!parse-error (type => 'unclosed attribute value');
1231          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1232            !!!cp (103);            !!!cp (103);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1233            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1234          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1235            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1280  sub _get_next_token ($) { Line 1275  sub _get_next_token ($) {
1275        } elsif ($self->{next_char} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1276          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1277            !!!cp (109);            !!!cp (109);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1278            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1279          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1280            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1305  sub _get_next_token ($) { Line 1298  sub _get_next_token ($) {
1298          !!!parse-error (type => 'unclosed tag');          !!!parse-error (type => 'unclosed tag');
1299          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1300            !!!cp (112);            !!!cp (112);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1301            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1302          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1303            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1377  sub _get_next_token ($) { Line 1368  sub _get_next_token ($) {
1368        } elsif ($self->{next_char} == 0x003E) { # >        } elsif ($self->{next_char} == 0x003E) { # >
1369          if ($self->{current_token}->{type} == START_TAG_TOKEN) {          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1370            !!!cp (119);            !!!cp (119);
           $self->{current_token}->{first_start_tag}  
               = not defined $self->{last_emitted_start_tag_name};  
1371            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1372          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1373            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
# Line 1466  sub _get_next_token ($) { Line 1455  sub _get_next_token ($) {
1455          if ($self->{next_char} == 0x002D) { # -          if ($self->{next_char} == 0x002D) { # -
1456            !!!cp (127);            !!!cp (127);
1457            $self->{current_token} = {type => COMMENT_TOKEN, data => '',            $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1458                                      line => $l, column => $c};                                      line => $l, column => $c,
1459                                       };
1460            $self->{state} = COMMENT_START_STATE;            $self->{state} = COMMENT_START_STATE;
1461            !!!next-input-character;            !!!next-input-character;
1462            redo A;            redo A;
# Line 1504  sub _get_next_token ($) { Line 1494  sub _get_next_token ($) {
1494                      $self->{state} = DOCTYPE_STATE;                      $self->{state} = DOCTYPE_STATE;
1495                      $self->{current_token} = {type => DOCTYPE_TOKEN,                      $self->{current_token} = {type => DOCTYPE_TOKEN,
1496                                                quirks => 1,                                                quirks => 1,
1497                                                line => $l, column => $c};                                                line => $l, column => $c,
1498                                                 };
1499                      !!!next-input-character;                      !!!next-input-character;
1500                      redo A;                      redo A;
1501                    } else {                    } else {
# Line 1534  sub _get_next_token ($) { Line 1525  sub _get_next_token ($) {
1525        !!!back-next-input-character (@next_char);        !!!back-next-input-character (@next_char);
1526        $self->{state} = BOGUS_COMMENT_STATE;        $self->{state} = BOGUS_COMMENT_STATE;
1527        $self->{current_token} = {type => COMMENT_TOKEN, data => '',        $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1528                                  line => $l, column => $c};                                  line => $l, column => $c,
1529                                   };
1530        redo A;        redo A;
1531                
1532        ## ISSUE: typos in spec: chacacters, is is a parse error        ## ISSUE: typos in spec: chacacters, is is a parse error
# Line 2332  sub _tokenize_attempt_to_consume_an_enti Line 2324  sub _tokenize_attempt_to_consume_an_enti
2324          }          }
2325    
2326          return {type => CHARACTER_TOKEN, data => chr $code,          return {type => CHARACTER_TOKEN, data => chr $code,
2327                  has_reference => 1, line => $l, column => $c};                  has_reference => 1,
2328                    line => $l, column => $c,
2329                   };
2330        } # X        } # X
2331      } elsif (0x0030 <= $self->{next_char} and      } elsif (0x0030 <= $self->{next_char} and
2332               $self->{next_char} <= 0x0039) { # 0..9               $self->{next_char} <= 0x0039) { # 0..9
# Line 2375  sub _tokenize_attempt_to_consume_an_enti Line 2369  sub _tokenize_attempt_to_consume_an_enti
2369        }        }
2370                
2371        return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,        return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2372                line => $l, column => $c};                line => $l, column => $c,
2373                 };
2374      } else {      } else {
2375        !!!cp (1019);        !!!cp (1019);
2376        !!!parse-error (type => 'bare nero', line => $l, column => $c);        !!!parse-error (type => 'bare nero', line => $l, column => $c);
# Line 2429  sub _tokenize_attempt_to_consume_an_enti Line 2424  sub _tokenize_attempt_to_consume_an_enti
2424      if ($match > 0) {      if ($match > 0) {
2425        !!!cp (1023);        !!!cp (1023);
2426        return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,        return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2427                line => $l, column => $c};                line => $l, column => $c,
2428                 };
2429      } elsif ($match < 0) {      } elsif ($match < 0) {
2430        !!!parse-error (type => 'no refc', line => $l, column => $c);        !!!parse-error (type => 'no refc', line => $l, column => $c);
2431        if ($in_attr and $match < -1) {        if ($in_attr and $match < -1) {
2432          !!!cp (1024);          !!!cp (1024);
2433          return {type => CHARACTER_TOKEN, data => '&'.$entity_name,          return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2434                  line => $l, column => $c};                  line => $l, column => $c,
2435                   };
2436        } else {        } else {
2437          !!!cp (1025);          !!!cp (1025);
2438          return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,          return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2439                  line => $l, column => $c};                  line => $l, column => $c,
2440                   };
2441        }        }
2442      } else {      } else {
2443        !!!cp (1026);        !!!cp (1026);
2444        !!!parse-error (type => 'bare ero', line => $l, column => $c);        !!!parse-error (type => 'bare ero', line => $l, column => $c);
2445        ## NOTE: "No characters are consumed" in the spec.        ## NOTE: "No characters are consumed" in the spec.
2446        return {type => CHARACTER_TOKEN, data => '&'.$value,        return {type => CHARACTER_TOKEN, data => '&'.$value,
2447                line => $l, column => $c};                line => $l, column => $c,
2448                 };
2449      }      }
2450    } else {    } else {
2451      !!!cp (1027);      !!!cp (1027);
# Line 2757  sub _tree_construction_root_element ($) Line 2756  sub _tree_construction_root_element ($)
2756              !!!cp ('t24');              !!!cp ('t24');
2757              $self->{application_cache_selection}              $self->{application_cache_selection}
2758                  ->($token->{attributes}->{manifest}->{value});                  ->($token->{attributes}->{manifest}->{value});
2759              ## ISSUE: No relative reference resolution?              ## ISSUE: Spec is unclear on relative references.
2760                ## According to Hixie (#whatwg 2008-03-19), it should be
2761                ## resolved against the base URI of the document in HTML
2762                ## or xml:base of the element in XHTML.
2763            } else {            } else {
2764              !!!cp ('t25');              !!!cp ('t25');
2765              $self->{application_cache_selection}->(undef);              $self->{application_cache_selection}->(undef);
# Line 5938  sub _tree_construction_main ($) { Line 5940  sub _tree_construction_main ($) {
5940            if ($prompt_attr) {            if ($prompt_attr) {
5941              !!!cp ('t390');              !!!cp ('t390');
5942              push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},              push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
5943                             line => $token->{line}, column => $token->{column}};                             #line => $token->{line}, column => $token->{column},
5944                              };
5945            } else {            } else {
5946              !!!cp ('t391');              !!!cp ('t391');
5947              push @tokens, {type => CHARACTER_TOKEN,              push @tokens, {type => CHARACTER_TOKEN,
5948                             data => 'This is a searchable index. Insert your search keywords here: ',                             data => 'This is a searchable index. Insert your search keywords here: ',
5949                             line => $token->{line}, column => $token->{column}}; # SHOULD                             #line => $token->{line}, column => $token->{column},
5950                              }; # SHOULD
5951              ## TODO: make this configurable              ## TODO: make this configurable
5952            }            }
5953            push @tokens,            push @tokens,

Legend:
Removed from v.1.116  
changed lines
  Added in v.1.120

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24