/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Diff of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13 by wakaba, Thu Mar 20 08:27:38 2008 UTC revision 1.17 by wakaba, Thu Mar 20 10:58:17 2008 UTC
# Line 5  require Whatpm::ContentChecker; Line 5  require Whatpm::ContentChecker;
5  require Whatpm::URIChecker;  require Whatpm::URIChecker;
6    
7  my $ATOM_NS = q<http://www.w3.org/2005/Atom>;  my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8    my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9    my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10  my $LINK_REL = q<http://www.iana.org/assignments/relation/>;  my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11    
12  sub FEATURE_RFC4287 () {  sub FEATURE_RFC4287 () {
# Line 12  sub FEATURE_RFC4287 () { Line 14  sub FEATURE_RFC4287 () {
14    Whatpm::ContentChecker::FEATURE_ALLOWED    Whatpm::ContentChecker::FEATURE_ALLOWED
15  }  }
16    
17    sub FEATURE_RFC4685 () {
18      Whatpm::ContentChecker::FEATURE_STATUS_CR |
19      Whatpm::ContentChecker::FEATURE_ALLOWED
20    }
21    
22  ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)  ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23    
24  ## NOTE: Commants and PIs are not explicitly allowed.  ## NOTE: Commants and PIs are not explicitly allowed.
# Line 398  $Element->{$ATOM_NS}->{entry} = { Line 405  $Element->{$ATOM_NS}->{entry} = {
405             rights => 1,             rights => 1,
406             source => 1,             source => 1,
407             summary => 1,             summary => 1,
            ## TODO: MUST if child::content/@src | child::content/@type = IMT, !text/ !/xml !+xml  
408             title => 1,             title => 1,
409             updated => 1,             updated => 1,
410            }->{$child_ln}) {            }->{$child_ln}) {
# Line 441  $Element->{$ATOM_NS}->{entry} = { Line 447  $Element->{$ATOM_NS}->{entry} = {
447        if ($not_allowed) {        if ($not_allowed) {
448          $self->{onerror}->(node => $child_el, type => 'element not allowed');          $self->{onerror}->(node => $child_el, type => 'element not allowed');
449        }        }
450        } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
451          ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
452          ## defined in RFC 4685.
453          #
454        } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
455          #
456      } else {      } else {
457        ## TODO: extension element        ## TODO: extension element
458        $self->{onerror}->(node => $child_el, type => 'element not allowed');        $self->{onerror}->(node => $child_el, type => 'element not allowed');
# Line 509  $Element->{$ATOM_NS}->{entry} = { Line 521  $Element->{$ATOM_NS}->{entry} = {
521        $self->{onerror}->(node => $item->{node},        $self->{onerror}->(node => $item->{node},
522                           type => 'element missing:atom|link|alternate');                           type => 'element missing:atom|link|alternate');
523      }      }
524    
525        if ($element_state->{require_summary} and
526            not $element_state->{has_element}->{summary}) {
527          $self->{onerror}->(node => $item->{node},
528                             type => 'element missing:atom|summary',
529                             level => $self->{must_level});
530        }
531    },    },
532  };  };
533    
# Line 639  $Element->{$ATOM_NS}->{content} = { Line 658  $Element->{$ATOM_NS}->{content} = {
658        my ($self, $attr, $item, $element_state) = @_;        my ($self, $attr, $item, $element_state) = @_;
659    
660        $element_state->{has_src} = 1;        $element_state->{has_src} = 1;
661          $item->{parent_state}->{require_summary} = 1;
662    
663        ## NOTE: There MUST NOT be any white space.        ## NOTE: There MUST NOT be any white space.
664        Whatpm::URIChecker->check_iri_reference ($attr->value, sub {        Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
# Line 686  $Element->{$ATOM_NS}->{content} = { Line 706  $Element->{$ATOM_NS}->{content} = {
706          }          }
707        }        }
708    
709        if ($value =~ m![+/][Xx][Mm][Ll]\z!) {        if ({text => 1, html => 1, xhtml => 1}->{$value}) {
710            #
711          } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
712          ## ISSUE: There is no definition for "XML media type" in RFC 3023.          ## ISSUE: There is no definition for "XML media type" in RFC 3023.
713          ## Is |application/xml-dtd| an XML media type?          ## Is |application/xml-dtd| an XML media type?
714          $value = 'xml';          $value = 'xml';
# Line 695  $Element->{$ATOM_NS}->{content} = { Line 717  $Element->{$ATOM_NS}->{content} = {
717        } elsif ($value =~ m!^(?>message|multipart)/!i) {        } elsif ($value =~ m!^(?>message|multipart)/!i) {
718          $self->{onerror}->(node => $attr, type => 'IMT:composite',          $self->{onerror}->(node => $attr, type => 'IMT:composite',
719                             level => $self->{must_level});                             level => $self->{must_level});
720            $item->{parent_state}->{require_summary} = 1;
721          } else {
722            $item->{parent_state}->{require_summary} = 1;
723        }        }
724    
725        $element_state->{type} = $value;        $element_state->{type} = $value;
# Line 775  $Element->{$ATOM_NS}->{content} = { Line 800  $Element->{$ATOM_NS}->{content} = {
800          $self->{onerror}->(node => $item->{node},          $self->{onerror}->(node => $item->{node},
801                             type => 'attribute missing:type',                             type => 'attribute missing:type',
802                             level => $self->{should_level});                             level => $self->{should_level});
803        }        } elsif ($element_state->{type} eq 'text' or
804        if ($element_state->{type} eq 'text' or                 $element_state->{type} eq 'html' or
805            $element_state->{type} eq 'html' or                 $element_state->{type} eq 'xhtml') {
           $element_state->{type} eq 'xhtml') {  
806          $self->{onerror}          $self->{onerror}
807              ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),              ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
808                 type => 'not IMT', level => $self->{must_level});                 type => 'not IMT', level => $self->{must_level});
# Line 956  $Element->{$ATOM_NS}->{id} = { Line 980  $Element->{$ATOM_NS}->{id} = {
980    },    },
981  };  };
982    
983  $Element->{$ATOM_NS}->{link} = {  my $AtomIMTAttrChecker = sub {
   %AtomChecker,  
   check_attrs => $GetAtomAttrsChecker->({  
     href => sub {  
       my ($self, $attr) = @_;  
       ## NOTE: There MUST NOT be any white space.  
       Whatpm::URIChecker->check_iri_reference ($attr->value, sub {  
         my %opt = @_;  
         $self->{onerror}->(node => $attr, level => $opt{level},  
                            type => 'URI::'.$opt{type}.  
                            (defined $opt{position} ? ':'.$opt{position} : ''));  
       });  
     },  
     hreflang => $AtomLanguageTagAttrChecker,  
     length => sub { }, # No MUST; in octets.  
     rel => sub { # MUST  
       my ($self, $attr) = @_;  
       my $value = $attr->value;  
       if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {  
         $value = $LINK_REL . $value;  
       }  
   
       ## NOTE: There MUST NOT be any white space.  
       Whatpm::URIChecker->check_iri ($value, sub {  
         my %opt = @_;  
         $self->{onerror}->(node => $attr, level => $opt{level},  
                            type => 'URI::'.$opt{type}.  
                            (defined $opt{position} ? ':'.$opt{position} : ''));  
       });  
   
       ## TODO: Warn if unregistered  
     },  
     title => sub { }, # No MUST  
     type => sub {  
       ## NOTE: MUST be a MIME media type.  What is "MIME media type"?  
984        my ($self, $attr) = @_;        my ($self, $attr) = @_;
985        my $value = $attr->value;        my $value = $attr->value;
986        my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;        my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
# Line 1018  $Element->{$ATOM_NS}->{link} = { Line 1008  $Element->{$ATOM_NS}->{link} = {
1008        } else {        } else {
1009          $self->{onerror}->(node => $attr, type => 'IMT:syntax error');          $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1010        }        }
1011    }; # $AtomIMTAttrChecker
1012    
1013    my $AtomIRIReferenceAttrChecker = sub {
1014      my ($self, $attr) = @_;
1015      ## NOTE: There MUST NOT be any white space.
1016      Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1017        my %opt = @_;
1018        $self->{onerror}->(node => $attr, level => $opt{level},
1019                           type => 'URI::'.$opt{type}.
1020                           (defined $opt{position} ? ':'.$opt{position} : ''));
1021      });
1022    }; # $AtomIRIReferenceAttrChecker
1023    
1024    $Element->{$ATOM_NS}->{link} = {
1025      %AtomChecker,
1026      check_attrs => $GetAtomAttrsChecker->({
1027        href => $AtomIRIReferenceAttrChecker,
1028        hreflang => $AtomLanguageTagAttrChecker,
1029        length => sub { }, # No MUST; in octets.
1030        rel => sub { # MUST
1031          my ($self, $attr) = @_;
1032          my $value = $attr->value;
1033          if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1034            $value = $LINK_REL . $value;
1035          }
1036    
1037          ## NOTE: There MUST NOT be any white space.
1038          Whatpm::URIChecker->check_iri ($value, sub {
1039            my %opt = @_;
1040            $self->{onerror}->(node => $attr, level => $opt{level},
1041                               type => 'URI::'.$opt{type}.
1042                               (defined $opt{position} ? ':'.$opt{position} : ''));
1043          });
1044    
1045          ## TODO: Warn if unregistered
1046    
1047          ## TODO: rel=license [RFC 4946]
1048          ## MUST NOT multiple rel=license with same href="",type="" pairs
1049          ## href="" SHOULD be dereferencable
1050          ## title="" SHOULD be there if multiple rel=license
1051          ## MUST NOT "unspecified" and other rel=license
1052      },      },
1053        title => sub { }, # No MUST
1054        type => $AtomIMTAttrChecker,
1055        ## NOTE: MUST be a MIME media type.  What is "MIME media type"?
1056    }, {    }, {
1057      href => FEATURE_RFC4287,      href => FEATURE_RFC4287,
1058      hreflang => FEATURE_RFC4287,      hreflang => FEATURE_RFC4287,
# Line 1026  $Element->{$ATOM_NS}->{link} = { Line 1060  $Element->{$ATOM_NS}->{link} = {
1060      rel => FEATURE_RFC4287,      rel => FEATURE_RFC4287,
1061      title => FEATURE_RFC4287,      title => FEATURE_RFC4287,
1062      type => FEATURE_RFC4287,      type => FEATURE_RFC4287,
1063    
1064        ## TODO: thr:count
1065        ## TODO: thr:updated
1066    }),    }),
1067    check_start =>  sub {    check_start =>  sub {
1068      my ($self, $item, $element_state) = @_;      my ($self, $item, $element_state) = @_;
# Line 1074  $Element->{$ATOM_NS}->{logo} = { Line 1111  $Element->{$ATOM_NS}->{logo} = {
1111    
1112  $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;  $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1113    
1114  $Element->{$ATOM_NS}->{rights} = \%AtomDateConstruct;  $Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct;
1115  ## NOTE: SHOULD NOT be used to convey machine-readable information.  ## NOTE: SHOULD NOT be used to convey machine-readable information.
1116    
1117  $Element->{$ATOM_NS}->{source} = {  $Element->{$ATOM_NS}->{source} = {
# Line 1110  $Element->{$ATOM_NS}->{source} = { Line 1147  $Element->{$ATOM_NS}->{source} = {
1147            $not_allowed = 1;            $not_allowed = 1;
1148          }          }
1149        } elsif ($child_ln eq 'link') {        } elsif ($child_ln eq 'link') {
1150          if ($child_ln->rel eq $LINK_REL . 'alternate') {          if ($child_el->rel eq $LINK_REL . 'alternate') {
1151            my $type = $child_ln->get_attribute_ns (undef, 'type');            my $type = $child_el->get_attribute_ns (undef, 'type');
1152            $type = '' unless defined $type;            $type = '' unless defined $type;
1153            my $hreflang = $child_ln->get_attribute_ns (undef, 'hreflang');            my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
1154            $hreflang = '' unless defined $hreflang;            $hreflang = '' unless defined $hreflang;
1155            my $key = 'link:'.(defined $type ? ':'.$type : '').':'.            my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1156                (defined $hreflang ? ':'.$hreflang : '');                (defined $hreflang ? ':'.$hreflang : '');
# Line 1164  $Element->{$ATOM_NS}->{updated} = \%Atom Line 1201  $Element->{$ATOM_NS}->{updated} = \%Atom
1201    
1202  ## TODO: simple extension element and structured extension element  ## TODO: simple extension element and structured extension element
1203    
1204    ## -- Atom Threading 1.0 [RFC 4685]
1205    
1206    $Element->{$THR_NS}->{''} = {
1207      %AtomChecker,
1208      status => 0,
1209    };
1210    
1211    ## ISSUE: Strictly speaking, thr:* element/attribute,
1212    ## where * is an undefined local name, is not disallowed.
1213    
1214    $Element->{$THR_NS}->{'in-reply-to'} = {
1215      %AtomChecker,
1216      status => FEATURE_RFC4685,
1217      check_attrs => $GetAtomAttrsChecker->({
1218        href => $AtomIRIReferenceAttrChecker,
1219            ## TODO: fact-level.
1220            ## TODO: MUST be dereferencable.
1221        ref => sub {
1222          my ($self, $attr, $item, $element_state) = @_;
1223          $element_state->{has_ref} = 1;
1224    
1225          ## NOTE: Same as |atom:id|.
1226          ## NOTE: There MUST NOT be any white space.
1227          Whatpm::URIChecker->check_iri ($attr->value, sub {
1228            my %opt = @_;
1229            $self->{onerror}->(node => $attr, level => $opt{level},
1230                               type => 'URI::'.$opt{type}.
1231                               (defined $opt{position} ? ':'.$opt{position} : ''));
1232          });
1233    
1234          ## TODO: Check against ID guideline...
1235        },
1236        source => $AtomIRIReferenceAttrChecker,
1237            ## TODO: fact-level.
1238            ## TODO: MUST be dereferencable.
1239        type => $AtomIMTAttrChecker,
1240            ## TODO: fact-level.
1241      }, {
1242        href => FEATURE_RFC4685,
1243        source => FEATURE_RFC4685,
1244        ref => FEATURE_RFC4685,
1245        type => FEATURE_RFC4685,
1246      }),
1247      check_end => sub {
1248        my ($self, $item, $element_state) = @_;
1249      
1250        unless ($element_state->{has_ref}) {
1251          $self->{onerror}->(node => $item->{node},
1252                             type => 'attribute missing:ref',
1253                             level => $self->{must_level});
1254        }
1255    
1256        $AtomChecker{check_end}->(@_);
1257      },
1258      ## NOTE: Content model has no constraint.
1259    };
1260    
1261    $Element->{$THR_NS}->{total} = {
1262      %AtomChecker,
1263      check_start =>  sub {
1264        my ($self, $item, $element_state) = @_;
1265        $element_state->{value} = '';
1266      },
1267      check_child_element => sub {
1268        my ($self, $item, $child_el, $child_nsuri, $child_ln,
1269            $child_is_transparent, $element_state) = @_;
1270    
1271        if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1272          $self->{onerror}->(node => $child_el,
1273                             type => 'element not allowed:minus',
1274                             level => $self->{must_level});
1275        } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1276          #
1277        } else {
1278          $self->{onerror}->(node => $child_el,
1279                             type => 'element not allowed',
1280                             level => $self->{must_level});
1281        }
1282      },
1283      check_child_text => sub {
1284        my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1285        $element_state->{value} .= $child_node->data;
1286      },
1287      check_end => sub {
1288        my ($self, $item, $element_state) = @_;
1289    
1290        ## NOTE: xsd:nonNegativeInteger
1291        unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1292          $self->{onerror}->(node => $item->{node},
1293                             type => 'syntax error', ## TODO:
1294                             level => $self->{must_level});
1295        }
1296    
1297        $AtomChecker{check_end}->(@_);
1298      },
1299    };
1300    
1301    ## TODO: fh:complete
1302    
1303    ## TODO: fh:archive
1304    
1305    ## TODO: Check as archive document, page feed document, ...
1306    
1307    ## TODO: APP [RFC 5023]
1308    
1309  $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;  $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1310    $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1311    
1312  1;  1;

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.17

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24