/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory | Revision Log | View Patch Patch

-revision 1.116 by wakaba,
Mon Mar 17 13:23:39 2008 UTC
+revision 1.120 by wakaba,
Thu Mar 20 03:57:00 2008 UTC
 Line 466 
 sub _get_next_token ($) {
        # Anything else
        my $token = {type => CHARACTER_TOKEN,
                     data => chr $self->{next_char},
-                    line => $self->{line}, column => $self->{column}};
+                    line => $self->{line}, column => $self->{column},
+                   };
        ## Stay in the data state
        !!!next-input-character;
-Line 486 
 sub _get_next_token ($) {
+Line 487 
 sub _get_next_token ($) {
        unless (defined $token) {
          !!!cp (13);
          !!!emit ({type => CHARACTER_TOKEN, data => '&',
-                   line => $l, column => $c});
+                   line => $l, column => $c,
+                  });
        } else {
          !!!cp (14);
          !!!emit ($token);
-Line 507 
 sub _get_next_token ($) {
+Line 509 
 sub _get_next_token ($) {
            !!!emit ({type => CHARACTER_TOKEN, data => '<',
                      line => $self->{line_prev},
-                     column => $self->{column_prev}});
+                     column => $self->{column_prev},
+                    });
            redo A;
          }
-Line 553 
 sub _get_next_token ($) {
+Line 556 
 sub _get_next_token ($) {
            !!!emit ({type => CHARACTER_TOKEN, data => '<>',
                      line => $self->{line_prev},
-                     column => $self->{column_prev}});
+                     column => $self->{column_prev},
+                    });
            redo A;
          } elsif ($self->{next_char} == 0x003F) { # ?
-Line 564 
 sub _get_next_token ($) {
+Line 568 
 sub _get_next_token ($) {
            $self->{state} = BOGUS_COMMENT_STATE;
            $self->{current_token} = {type => COMMENT_TOKEN, data => '',
                                      line => $self->{line_prev},
-                                     column => $self->{column_prev}};
+                                     column => $self->{column_prev},
+                                    };
            ## $self->{next_char} is intentionally left as is
            redo A;
          } else {
-Line 575 
 sub _get_next_token ($) {
+Line 580 
 sub _get_next_token ($) {
            !!!emit ({type => CHARACTER_TOKEN, data => '<',
                      line => $self->{line_prev},
-                     column => $self->{column_prev}});
+                     column => $self->{column_prev},
+                    });
            redo A;
          }
-Line 604 
 sub _get_next_token ($) {
+Line 610 
 sub _get_next_token ($) {
                $self->{state} = DATA_STATE;
                !!!emit ({type => CHARACTER_TOKEN, data => '</',
-                         line => $l, column => $c});
+                         line => $l, column => $c,
+                        });
                redo A;
              }
-Line 624 
 sub _get_next_token ($) {
+Line 631 
 sub _get_next_token ($) {
              !!!back-next-input-character (@next_char);
              $self->{state} = DATA_STATE;
              !!!emit ({type => CHARACTER_TOKEN, data => '</',
-                       line => $l, column => $c});
+                       line => $l, column => $c,
+                      });
              redo A;
            } else {
              !!!cp (27);
-Line 638 
 sub _get_next_token ($) {
+Line 646 
 sub _get_next_token ($) {
            # next-input-character is already done
            $self->{state} = DATA_STATE;
            !!!emit ({type => CHARACTER_TOKEN, data => '</',
-                     line => $l, column => $c});
+                     line => $l, column => $c,
+                    });
            redo A;
          }
        }
-Line 677 
 sub _get_next_token ($) {
+Line 686 
 sub _get_next_token ($) {
          # reconsume
          !!!emit ({type => CHARACTER_TOKEN, data => '</',
-                   line => $l, column => $c});
+                   line => $l, column => $c,
+                  });
          redo A;
        } else {
-Line 686 
 sub _get_next_token ($) {
+Line 696 
 sub _get_next_token ($) {
          $self->{state} = BOGUS_COMMENT_STATE;
          $self->{current_token} = {type => COMMENT_TOKEN, data => '',
                                    line => $self->{line_prev}, # "<" of "</"
-                                   column => $self->{column_prev} - 1};
+                                   column => $self->{column_prev} - 1,
+                                  };
          ## $self->{next_char} is intentionally left as is
          redo A;
        }
-Line 703 
 sub _get_next_token ($) {
+Line 714 
 sub _get_next_token ($) {
        } elsif ($self->{next_char} == 0x003E) { # >
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (35);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 736 
 sub _get_next_token ($) {
+Line 745 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (39);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 793 
 sub _get_next_token ($) {
+Line 800 
 sub _get_next_token ($) {
        } elsif ($self->{next_char} == 0x003E) { # >
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (46);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 816 
 sub _get_next_token ($) {
+Line 821 
 sub _get_next_token ($) {
        } elsif (0x0041 <= $self->{next_char} and
                 $self->{next_char} <= 0x005A) { # A..Z
          !!!cp (49);
-         $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
+         $self->{current_attribute}
-                               value => ''};
+             = {name => chr ($self->{next_char} + 0x0020),
+                value => '',
+                line => $self->{line}, column => $self->{column}};
          $self->{state} = ATTRIBUTE_NAME_STATE;
          !!!next-input-character;
          redo A;
-Line 840 
 sub _get_next_token ($) {
+Line 847 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (52);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 871 
 sub _get_next_token ($) {
+Line 876 
 sub _get_next_token ($) {
          } else {
            !!!cp (56);
          }
-         $self->{current_attribute} = {name => chr ($self->{next_char}),
+         $self->{current_attribute}
-                               value => ''};
+             = {name => chr ($self->{next_char}),
+                value => '',
+                line => $self->{line}, column => $self->{column}};
          $self->{state} = ATTRIBUTE_NAME_STATE;
          !!!next-input-character;
          redo A;
-Line 882 
 sub _get_next_token ($) {
+Line 889 
 sub _get_next_token ($) {
          if (exists $self->{current_token}->{attributes} # start tag or end tag
              ->{$self->{current_attribute}->{name}}) { # MUST
            !!!cp (57);
-           !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name});
+           !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
            ## Discard $self->{current_attribute} # MUST
          } else {
            !!!cp (58);
-Line 911 
 sub _get_next_token ($) {
+Line 918 
 sub _get_next_token ($) {
          $before_leave->();
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (61);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            !!!cp (62);
-Line 957 
 sub _get_next_token ($) {
+Line 962 
 sub _get_next_token ($) {
          $before_leave->();
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (66);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1009 
 sub _get_next_token ($) {
+Line 1012 
 sub _get_next_token ($) {
        } elsif ($self->{next_char} == 0x003E) { # >
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (73);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1033 
 sub _get_next_token ($) {
+Line 1034 
 sub _get_next_token ($) {
        } elsif (0x0041 <= $self->{next_char} and
                 $self->{next_char} <= 0x005A) { # A..Z
          !!!cp (76);
-         $self->{current_attribute} = {name => chr ($self->{next_char} + 0x0020),
+         $self->{current_attribute}
-                               value => ''};
+             = {name => chr ($self->{next_char} + 0x0020),
+                value => '',
+                line => $self->{line}, column => $self->{column}};
          $self->{state} = ATTRIBUTE_NAME_STATE;
          !!!next-input-character;
          redo A;
-Line 1058 
 sub _get_next_token ($) {
+Line 1061 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (79);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1081 
 sub _get_next_token ($) {
+Line 1082 
 sub _get_next_token ($) {
          redo A;
        } else {
          !!!cp (82);
-         $self->{current_attribute} = {name => chr ($self->{next_char}),
+         $self->{current_attribute}
-                               value => ''};
+             = {name => chr ($self->{next_char}),
+                value => '',
+                line => $self->{line}, column => $self->{column}};
          $self->{state} = ATTRIBUTE_NAME_STATE;
          !!!next-input-character;
          redo A;
-Line 1115 
 sub _get_next_token ($) {
+Line 1118 
 sub _get_next_token ($) {
        } elsif ($self->{next_char} == 0x003E) { # >
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (87);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1140 
 sub _get_next_token ($) {
+Line 1141 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (90);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1189 
 sub _get_next_token ($) {
+Line 1188 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed attribute value');
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (97);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1233 
 sub _get_next_token ($) {
+Line 1230 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed attribute value');
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (103);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1280 
 sub _get_next_token ($) {
+Line 1275 
 sub _get_next_token ($) {
        } elsif ($self->{next_char} == 0x003E) { # >
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (109);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1305 
 sub _get_next_token ($) {
+Line 1298 
 sub _get_next_token ($) {
          !!!parse-error (type => 'unclosed tag');
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (112);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1377 
 sub _get_next_token ($) {
+Line 1368 
 sub _get_next_token ($) {
        } elsif ($self->{next_char} == 0x003E) { # >
          if ($self->{current_token}->{type} == START_TAG_TOKEN) {
            !!!cp (119);
-           $self->{current_token}->{first_start_tag}
-               = not defined $self->{last_emitted_start_tag_name};
            $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
          } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
            $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
-Line 1466 
 sub _get_next_token ($) {
+Line 1455 
 sub _get_next_token ($) {
          if ($self->{next_char} == 0x002D) { # -
            !!!cp (127);
            $self->{current_token} = {type => COMMENT_TOKEN, data => '',
-                                     line => $l, column => $c};
+                                     line => $l, column => $c,
+                                    };
            $self->{state} = COMMENT_START_STATE;
            !!!next-input-character;
            redo A;
-Line 1504 
 sub _get_next_token ($) {
+Line 1494 
 sub _get_next_token ($) {
                      $self->{state} = DOCTYPE_STATE;
                      $self->{current_token} = {type => DOCTYPE_TOKEN,
                                                quirks => 1,
-                                               line => $l, column => $c};
+                                               line => $l, column => $c,
+                                              };
                      !!!next-input-character;
                      redo A;
                    } else {
-Line 1534 
 sub _get_next_token ($) {
+Line 1525 
 sub _get_next_token ($) {
        !!!back-next-input-character (@next_char);
        $self->{state} = BOGUS_COMMENT_STATE;
        $self->{current_token} = {type => COMMENT_TOKEN, data => '',
-                                 line => $l, column => $c};
+                                 line => $l, column => $c,
+                                };
        redo A;
        ## ISSUE: typos in spec: chacacters, is is a parse error
-Line 2332 
 sub _tokenize_attempt_to_consume_an_enti
+Line 2324 
 sub _tokenize_attempt_to_consume_an_enti
          }
          return {type => CHARACTER_TOKEN, data => chr $code,
-                 has_reference => 1, line => $l, column => $c};
+                 has_reference => 1,
+                 line => $l, column => $c,
+                };
        } # X
      } elsif (0x0030 <= $self->{next_char} and
               $self->{next_char} <= 0x0039) { # 0..9
-Line 2375 
 sub _tokenize_attempt_to_consume_an_enti
+Line 2369 
 sub _tokenize_attempt_to_consume_an_enti
        }
        return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
-               line => $l, column => $c};
+               line => $l, column => $c,
+              };
      } else {
        !!!cp (1019);
        !!!parse-error (type => 'bare nero', line => $l, column => $c);
-Line 2429 
 sub _tokenize_attempt_to_consume_an_enti
+Line 2424 
 sub _tokenize_attempt_to_consume_an_enti
      if ($match > 0) {
        !!!cp (1023);
        return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
-               line => $l, column => $c};
+               line => $l, column => $c,
+              };
      } elsif ($match < 0) {
        !!!parse-error (type => 'no refc', line => $l, column => $c);
        if ($in_attr and $match < -1) {
          !!!cp (1024);
          return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
-                 line => $l, column => $c};
+                 line => $l, column => $c,
+                };
        } else {
          !!!cp (1025);
          return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
-                 line => $l, column => $c};
+                 line => $l, column => $c,
+                };
        }
      } else {
        !!!cp (1026);
        !!!parse-error (type => 'bare ero', line => $l, column => $c);
        ## NOTE: "No characters are consumed" in the spec.
        return {type => CHARACTER_TOKEN, data => '&'.$value,
-               line => $l, column => $c};
+               line => $l, column => $c,
+              };
      }
    } else {
      !!!cp (1027);
-Line 2757 
 sub _tree_construction_root_element ($)
+Line 2756 
 sub _tree_construction_root_element ($)
              !!!cp ('t24');
              $self->{application_cache_selection}
                  ->($token->{attributes}->{manifest}->{value});
-             ## ISSUE: No relative reference resolution?
+             ## ISSUE: Spec is unclear on relative references.
+             ## According to Hixie (#whatwg 2008-03-19), it should be
+             ## resolved against the base URI of the document in HTML
+             ## or xml:base of the element in XHTML.
            } else {
              !!!cp ('t25');
              $self->{application_cache_selection}->(undef);
-Line 5938 
 sub _tree_construction_main ($) {
+Line 5940 
 sub _tree_construction_main ($) {
            if ($prompt_attr) {
              !!!cp ('t390');
              push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
-                            line => $token->{line}, column => $token->{column}};
+                            #line => $token->{line}, column => $token->{column},
+                           };
            } else {
              !!!cp ('t391');
              push @tokens, {type => CHARACTER_TOKEN,
                             data => 'This is a searchable index. Insert your search keywords here: ',
-                            line => $token->{line}, column => $token->{column}}; # SHOULD
+                            #line => $token->{line}, column => $token->{column},
+                           }; # SHOULD
              ## TODO: make this configurable
            }
            push @tokens,

 Legend:



Removed from v.1.116
 


changed lines


 
Added in v.1.120
 Legend:



Removed from v.1.116
 


changed lines


 
Added in v.1.120
-Removed from v.1.116
+Added in v.1.120

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24