/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Diff of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16 by wakaba, Sun May 20 07:12:11 2007 UTC revision 1.17 by wakaba, Sun May 20 11:12:25 2007 UTC
# Line 142  my $ElementDefault = { Line 142  my $ElementDefault = {
142          || $AttrChecker->{$attr_ns}->{''};          || $AttrChecker->{$attr_ns}->{''};
143        if ($checker) {        if ($checker) {
144          $checker->($self, $attr);          $checker->($self, $attr);
145          } else {
146            $self->{onerror}->(node => $attr, type => 'attribute not supported');
147        }        }
       ## Don't check otherwise, since "element type not supported" warning  
       ## will be reported by the element checker.  
148      }      }
149    },    },
150  };  };
# Line 736  my $HTMLIMTAttrChecker = sub { Line 736  my $HTMLIMTAttrChecker = sub {
736    ## TODO: Warn unless registered    ## TODO: Warn unless registered
737  }; # $HTMLIMTAttrChecker  }; # $HTMLIMTAttrChecker
738    
739    my $HTMLLanguageTagAttrChecker = sub {
740      my ($self, $attr) = @_;
741      if ($attr->value eq '') {
742        $self->{onerror}->(node => $attr, type => 'language tag syntax error');
743      }
744      ## TODO: RFC 3066 test
745      ## ISSUE: RFC 4646 (3066bis)?
746    }; # $HTMLLanguageTagAttrChecker
747    
748    ## "A valid media query [MQ]"
749    my $HTMLMQAttrChecker = sub {
750      ## ISSUE: What is "a valid media query"?
751    }; # $HTMLMQAttrChecker
752    
753  my $HTMLEventHandlerAttrChecker = sub {  my $HTMLEventHandlerAttrChecker = sub {
754    ## TODO: MUST contain valid ECMAScript code matching the    ## TODO: MUST contain valid ECMAScript code matching the
755    ## ECMAScript |FunctionBody| production. [ECMA262]    ## ECMAScript |FunctionBody| production. [ECMA262]
# Line 744  my $HTMLEventHandlerAttrChecker = sub { Line 758  my $HTMLEventHandlerAttrChecker = sub {
758    ## ISSUE: Other script languages?    ## ISSUE: Other script languages?
759  }; # $HTMLEventHandlerAttrChecker  }; # $HTMLEventHandlerAttrChecker
760    
761    my $HTMLUsemapAttrChecker = sub {
762      my ($self, $attr) = @_;
763      ## MUST be a valid hashed ID reference to a |map| element
764      my $value = $attr->value;
765      if ($value =~ s/^#//) {
766        ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
767        push @{$self->{usemap}}, [$value => $attr];
768      } else {
769        $self->{onerror}->(node => $attr, type => 'hashed idref syntax error');
770      }
771    }; # $HTMLUsemapAttrChecker
772    
773    my $HTMLTargetAttrChecker = sub {
774      my ($self, $attr) = @_;
775      my $value = $attr->value;
776      if ($value =~ /^_/) {
777        $value = lc $value; ## ISSUE: ASCII case-insentitive?
778        unless ({
779                 _self => 1, _parent => 1, _top => 1,
780                }->{$value}) {
781          $self->{onerror}->(node => $attr,
782                             type => 'reserved browsing context name');
783        }
784      } else {
785        #$ ISSUE: An empty string is conforming?
786      }
787    }; # $HTMLTargetAttrChecker
788    
789  my $HTMLAttrChecker = {  my $HTMLAttrChecker = {
790    id => sub {    id => sub {
791        ## NOTE: |map| has its own variant of |id=""| checker
792      my ($self, $attr) = @_;      my ($self, $attr) = @_;
793      my $value = $attr->value;      my $value = $attr->value;
794      unless (length $value > 0) {      if (length $value > 0) {
       ## NOTE: MUST contain at least one character  
       $self->{onerror}->(node => $attr, type => 'attribute value is empty');  
     } else {  
795        if ($self->{id}->{$value}) {        if ($self->{id}->{$value}) {
796          $self->{onerror}->(node => $attr, type => 'duplicate ID');          $self->{onerror}->(node => $attr, type => 'duplicate ID');
797        } else {        } else {
798          $self->{id}->{$value} = 1;          $self->{id}->{$value} = 1;
799        }        }
800        } else {
801          ## NOTE: MUST contain at least one character
802          $self->{onerror}->(node => $attr, type => 'attribute value is empty');
803      }      }
804    },    },
805    title => sub {}, ## NOTE: No conformance creteria    title => sub {}, ## NOTE: No conformance creteria
806    lang => sub {    lang => sub {
807      ## TODO: RFC 3066 test      ## TODO: RFC 3066 or empty test
808      ## ISSUE: RFC 4646 (3066bis)?      ## ISSUE: RFC 4646 (3066bis)?
809      ## TODO: HTML vs XHTML      ## TODO: HTML vs XHTML
810    },    },
# Line 963  $Element->{$HTML_NS}->{title} = { Line 1006  $Element->{$HTML_NS}->{title} = {
1006  $Element->{$HTML_NS}->{base} = {  $Element->{$HTML_NS}->{base} = {
1007    attrs_checker => $GetHTMLAttrsChecker->({    attrs_checker => $GetHTMLAttrsChecker->({
1008      href => $HTMLURIAttrChecker,      href => $HTMLURIAttrChecker,
1009      ## TODO: target      target => $HTMLTargetAttrChecker,
1010    }),    }),
1011    checker => $HTMLEmptyChecker,    checker => $HTMLEmptyChecker,
1012  };  };
# Line 974  $Element->{$HTML_NS}->{link} = { Line 1017  $Element->{$HTML_NS}->{link} = {
1017      $GetHTMLAttrsChecker->({      $GetHTMLAttrsChecker->({
1018        href => $HTMLURIAttrChecker,        href => $HTMLURIAttrChecker,
1019        rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check        rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check
1020        ## TODO: media        media => $HTMLMQAttrChecker,
1021        ## TODO: hreflang        hreflang => $HTMLLanguageTagAttrChecker,
1022        type => $HTMLIMTAttrChecker,        type => $HTMLIMTAttrChecker,
1023        ## NOTE: Though |title| has special semantics,        ## NOTE: Though |title| has special semantics,
1024        ## syntactically same as the |title| as global attribute.        ## syntactically same as the |title| as global attribute.
# Line 1103  $Element->{$HTML_NS}->{meta} = { Line 1146  $Element->{$HTML_NS}->{meta} = {
1146  $Element->{$HTML_NS}->{style} = {  $Element->{$HTML_NS}->{style} = {
1147    attrs_checker => $GetHTMLAttrsChecker->({    attrs_checker => $GetHTMLAttrsChecker->({
1148      type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language      type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1149      ## TODO: media      media => $HTMLMQAttrChecker,
1150      scoped => $GetHTMLBooleanAttrChecker->('scoped'),      scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1151      ## NOTE: |title| has special semantics for |style|s, but is syntactically      ## NOTE: |title| has special semantics for |style|s, but is syntactically
1152      ## not different      ## not different
# Line 1504  $Element->{$HTML_NS}->{dd} = { Line 1547  $Element->{$HTML_NS}->{dd} = {
1547  };  };
1548    
1549  $Element->{$HTML_NS}->{a} = {  $Element->{$HTML_NS}->{a} = {
1550    attrs_checker => do {    attrs_checker => sub {
1551        my ($self, $todo) = @_;
1552      my %attr;      my %attr;
1553      my $all_checker = $GetHTMLAttrsChecker->({      for my $attr (@{$todo->{node}->attributes}) {
1554        href => sub { $HTMLURIAttrChecker->(@_); $attr{href} = $_[1] },        my $attr_ns = $attr->namespace_uri;
1555        ## TODO: target        $attr_ns = '' unless defined $attr_ns;
1556        ping => sub { $HTMLSpaceURIsAttrChecker->(@_); $attr{ping} = $_[1] },        my $attr_ln = $attr->manakai_local_name;
1557        rel => sub {        my $checker;
1558          $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker->(@_); ## TODO: registered? check        if ($attr_ns eq '') {
1559          $attr{rel} = $_[1];          $checker = {
1560        },                       target => $HTMLTargetAttrChecker,
1561        ## TODO: media                       href => $HTMLURIAttrChecker,
1562        ## TODO: hreflang                       ping => $HTMLSpaceURIsAttrChecker,
1563        type => sub { $HTMLIMTAttrChecker->(@_); $attr{type} = $_[1] },                       rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check
1564      });                       media => $HTMLMQAttrChecker,
1565      sub {                       hreflang => $HTMLLanguageTagAttrChecker,
1566        $all_checker->(@_);                       type => $HTMLIMTAttrChecker,
1567        unless (defined $attr{href}) {                     }->{$attr_ln};
1568          for (qw/target ping rel media hreflang type/) {          if ($checker) {
1569            if (defined $attr{$_}) {            $attr{$attr_ln} = $attr;
1570              $_[0]->{onerror}->(node => $attr{$_},          } else {
1571                                 type => 'attribute not allowed');            $checker = $HTMLAttrChecker->{$attr_ln};
           }  
1572          }          }
1573        }        }
1574        %attr = ();        $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1575      };          || $AttrChecker->{$attr_ns}->{''};
1576          if ($checker) {
1577            $checker->($self, $attr) if ref $checker;
1578          } else {
1579            $self->{onerror}->(node => $attr, type => 'attribute not supported');
1580            ## ISSUE: No comformance createria for unknown attributes in the spec
1581          }
1582        }
1583    
1584        unless (defined $attr{href}) {
1585          for (qw/target ping rel media hreflang type/) {
1586            if (defined $attr{$_}) {
1587              $self->{onerror}->(node => $attr{$_},
1588                                 type => 'attribute not allowed');
1589            }
1590          }
1591        }
1592    },    },
1593    checker => sub {    checker => sub {
1594      my ($self, $todo) = @_;      my ($self, $todo) = @_;
# Line 1722  $Element->{$HTML_NS}->{del} = { Line 1781  $Element->{$HTML_NS}->{del} = {
1781  ## TODO: figure  ## TODO: figure
1782    
1783  $Element->{$HTML_NS}->{img} = {  $Element->{$HTML_NS}->{img} = {
1784    attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO    attrs_checker => sub {
1785        my ($self, $todo) = @_;
1786        $GetHTMLAttrsChecker->({
1787          alt => sub { }, ## NOTE: No syntactical requirement
1788          src => $HTMLURIAttrChecker,
1789          usemap => $HTMLUsemapAttrChecker,
1790          ismap => $GetHTMLBooleanAttrChecker->('ismap'), ## TODO: MUST ancestor <a>
1791          ## TODO: height
1792          ## TODO: width
1793        })->($self, $todo);
1794        unless ($todo->{node}->has_attribute_ns (undef, 'alt')) {
1795          $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:alt');
1796        }
1797        unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
1798          $self->{onerror}->(node => $todo->{node}, type => 'attribute missing:src');
1799        }
1800      },
1801    checker => $HTMLEmptyChecker,    checker => $HTMLEmptyChecker,
1802  };  };
1803    
# Line 1774  $Element->{$HTML_NS}->{embed} = { Line 1849  $Element->{$HTML_NS}->{embed} = {
1849  };  };
1850    
1851  $Element->{$HTML_NS}->{object} = {  $Element->{$HTML_NS}->{object} = {
1852    attrs_checker => $GetHTMLAttrsChecker->({    attrs_checker => sub {
1853      data => $HTMLURIAttrChecker,      my ($self, $todo) = @_;
1854      type => $HTMLIMTAttrChecker, ## TODO: one of |data| and |type| is required      $GetHTMLAttrsChecker->({
1855      ## TODO: usemap        data => $HTMLURIAttrChecker,
1856      ## TODO: width        type => $HTMLIMTAttrChecker,
1857      ## TODO: height        usemap => $HTMLUsemapAttrChecker,
1858    }),        ## TODO: width
1859          ## TODO: height
1860        })->($self, $todo);
1861        unless ($todo->{node}->has_attribute_ns (undef, 'data')) {
1862          unless ($todo->{node}->has_attribute_ns (undef, 'type')) {
1863            $self->{onerror}->(node => $todo->{node},
1864                               type => 'attribute missing:data|type');
1865          }
1866        }
1867      },
1868    checker => $ElementDefault->{checker}, ## TODO    checker => $ElementDefault->{checker}, ## TODO
1869  };  };
1870    
# Line 1830  $Element->{$HTML_NS}->{audio} = { Line 1914  $Element->{$HTML_NS}->{audio} = {
1914  };  };
1915    
1916  $Element->{$HTML_NS}->{source} = {  $Element->{$HTML_NS}->{source} = {
1917    attrs_checker => $GetHTMLAttrsChecker->({    attrs_checker => sub {
1918      src => $HTMLURIAttrChecker, # TODO: REQUIRED      my ($self, $todo) = @_;
1919      type => $HTMLIMTAttrChecker,      $GetHTMLAttrsChecker->({
1920      ## TODO: media        src => $HTMLURIAttrChecker,
1921    }),        type => $HTMLIMTAttrChecker,
1922          media => $HTMLMQAttrChecker,
1923        })->($self, $todo);
1924        unless ($todo->{node}->has_attribute_ns (undef, 'src')) {
1925          $self->{onerror}->(node => $todo->{node},
1926                             type => 'attribute missing:src');
1927        }
1928      },
1929    checker => $HTMLEmptyChecker,    checker => $HTMLEmptyChecker,
1930  };  };
1931    
# Line 1847  $Element->{$HTML_NS}->{canvas} = { Line 1938  $Element->{$HTML_NS}->{canvas} = {
1938  };  };
1939    
1940  $Element->{$HTML_NS}->{map} = {  $Element->{$HTML_NS}->{map} = {
1941    attrs_checker => $GetHTMLAttrsChecker->({}),    attrs_checker => $GetHTMLAttrsChecker->({
1942        id => sub {
1943          ## NOTE: same as global |id=""|, with |$self->{map}| registeration
1944          my ($self, $attr) = @_;
1945          my $value = $attr->value;
1946          if (length $value > 0) {
1947            if ($self->{id}->{$value}) {
1948              $self->{onerror}->(node => $attr, type => 'duplicate ID');
1949            } else {
1950              $self->{id}->{$value} = 1;
1951            }
1952          } else {
1953            ## NOTE: MUST contain at least one character
1954            $self->{onerror}->(node => $attr, type => 'attribute value is empty');
1955          }
1956          $self->{map}->{$value} ||= $attr;
1957        },
1958      }),
1959    checker => $HTMLBlockChecker,    checker => $HTMLBlockChecker,
1960  };  };
1961    
# Line 1881  $Element->{$HTML_NS}->{area} = { Line 1989  $Element->{$HTML_NS}->{area} = {
1989                                              type => 'syntax error');                                              type => 'syntax error');
1990                         }                         }
1991                       },                       },
1992                       ## TODO: coords                       target => $HTMLTargetAttrChecker,
                      target => sub { $self->{onerror}->(node => $attr, type => 'attribute not supported') }, ## TODO  
1993                       href => $HTMLURIAttrChecker,                       href => $HTMLURIAttrChecker,
1994                       ping => $HTMLSpaceURIsAttrChecker,                       ping => $HTMLSpaceURIsAttrChecker,
1995                       rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check                       rel => $HTMLUnorderedSetOfSpaceSeparatedTokensAttrChecker, ## TODO: registered? check
1996                       media => sub { $self->{onerror}->(node => $attr, type => 'attribute not supported') }, ## TODO                       media => $HTMLMQAttrChecker,
1997                       hreflang => sub { $self->{onerror}->(node => $attr, type => 'attribute not supported') }, ## TODO                       hreflang => $HTMLLanguageTagAttrChecker,
1998                       type => $HTMLIMTAttrChecker,                       type => $HTMLIMTAttrChecker,
1999                     }->{$attr_ln};                     }->{$attr_ln};
2000          if ($checker) {          if ($checker) {
# Line 2110  $Element->{$HTML_NS}->{caption} = { Line 2217  $Element->{$HTML_NS}->{caption} = {
2217  };  };
2218    
2219  $Element->{$HTML_NS}->{colgroup} = {  $Element->{$HTML_NS}->{colgroup} = {
2220    attrs_checker => $GetHTMLAttrsChecker->({}), ## TODO    attrs_checker => $GetHTMLAttrsChecker->({
2221        span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2222          ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2223          ## TODO: "attribute not supported" if |col|.
2224          ## ISSUE: MUST NOT if any |col|?
2225          ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2226      }),
2227    checker => sub {    checker => sub {
2228      my ($self, $todo) = @_;      my ($self, $todo) = @_;
2229      my $el = $todo->{node};      my $el = $todo->{node};
# Line 2445  sub check_element ($$$) { Line 2558  sub check_element ($$$) {
2558    $self->{minuses} = {};    $self->{minuses} = {};
2559    $self->{onerror} = $onerror;    $self->{onerror} = $onerror;
2560    $self->{id} = {};    $self->{id} = {};
2561      $self->{usemap} = [];
2562      $self->{map} = {};
2563    
2564    my @todo = ({type => 'element', node => $el});    my @todo = ({type => 'element', node => $el});
2565    while (@todo) {    while (@todo) {
# Line 2483  sub check_element ($$$) { Line 2598  sub check_element ($$$) {
2598        $self->_remove_minuses ($todo);        $self->_remove_minuses ($todo);
2599      }      }
2600    }    }
2601    
2602      for (@{$self->{usemap}}) {
2603        unless ($self->{map}->{$_->[0]}) {
2604          $self->{onerror}->(node => $_->[1], type => 'no referenced map');
2605        }
2606      }
2607    
2608      delete $self->{minuses};
2609      delete $self->{onerror};
2610      delete $self->{id};
2611      delete $self->{usemap};
2612      delete $self->{map};
2613  } # check_element  } # check_element
2614    
2615  sub _add_minuses ($@) {  sub _add_minuses ($@) {

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.17

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24