5 |
require Whatpm::URIChecker; |
require Whatpm::URIChecker; |
6 |
|
|
7 |
my $ATOM_NS = q<http://www.w3.org/2005/Atom>; |
my $ATOM_NS = q<http://www.w3.org/2005/Atom>; |
8 |
|
my $THR_NS = q<http://purl.org/syndication/thread/1.0>; |
9 |
|
my $FH_NS = q<http://purl.org/syndication/history/1.0>; |
10 |
my $LINK_REL = q<http://www.iana.org/assignments/relation/>; |
my $LINK_REL = q<http://www.iana.org/assignments/relation/>; |
11 |
|
|
12 |
sub FEATURE_RFC4287 () { |
sub FEATURE_RFC4287 () { |
14 |
Whatpm::ContentChecker::FEATURE_ALLOWED |
Whatpm::ContentChecker::FEATURE_ALLOWED |
15 |
} |
} |
16 |
|
|
17 |
|
sub FEATURE_RFC4685 () { |
18 |
|
Whatpm::ContentChecker::FEATURE_STATUS_CR | |
19 |
|
Whatpm::ContentChecker::FEATURE_ALLOWED |
20 |
|
} |
21 |
|
|
22 |
## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204) |
## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204) |
23 |
|
|
24 |
## NOTE: Commants and PIs are not explicitly allowed. |
## NOTE: Commants and PIs are not explicitly allowed. |
405 |
rights => 1, |
rights => 1, |
406 |
source => 1, |
source => 1, |
407 |
summary => 1, |
summary => 1, |
|
## TODO: MUST if child::content/@src | child::content/@type = IMT, !text/ !/xml !+xml |
|
408 |
title => 1, |
title => 1, |
409 |
updated => 1, |
updated => 1, |
410 |
}->{$child_ln}) { |
}->{$child_ln}) { |
447 |
if ($not_allowed) { |
if ($not_allowed) { |
448 |
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
449 |
} |
} |
450 |
|
} elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') { |
451 |
|
## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y |
452 |
|
## defined in RFC 4685. |
453 |
|
# |
454 |
|
} elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') { |
455 |
|
# |
456 |
} else { |
} else { |
457 |
## TODO: extension element |
## TODO: extension element |
458 |
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
521 |
$self->{onerror}->(node => $item->{node}, |
$self->{onerror}->(node => $item->{node}, |
522 |
type => 'element missing:atom|link|alternate'); |
type => 'element missing:atom|link|alternate'); |
523 |
} |
} |
524 |
|
|
525 |
|
if ($element_state->{require_summary} and |
526 |
|
not $element_state->{has_element}->{summary}) { |
527 |
|
$self->{onerror}->(node => $item->{node}, |
528 |
|
type => 'element missing:atom|summary', |
529 |
|
level => $self->{must_level}); |
530 |
|
} |
531 |
}, |
}, |
532 |
}; |
}; |
533 |
|
|
658 |
my ($self, $attr, $item, $element_state) = @_; |
my ($self, $attr, $item, $element_state) = @_; |
659 |
|
|
660 |
$element_state->{has_src} = 1; |
$element_state->{has_src} = 1; |
661 |
|
$item->{parent_state}->{require_summary} = 1; |
662 |
|
|
663 |
## NOTE: There MUST NOT be any white space. |
## NOTE: There MUST NOT be any white space. |
664 |
Whatpm::URIChecker->check_iri_reference ($attr->value, sub { |
Whatpm::URIChecker->check_iri_reference ($attr->value, sub { |
706 |
} |
} |
707 |
} |
} |
708 |
|
|
709 |
if ($value =~ m![+/][Xx][Mm][Ll]\z!) { |
if ({text => 1, html => 1, xhtml => 1}->{$value}) { |
710 |
|
# |
711 |
|
} elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) { |
712 |
## ISSUE: There is no definition for "XML media type" in RFC 3023. |
## ISSUE: There is no definition for "XML media type" in RFC 3023. |
713 |
## Is |application/xml-dtd| an XML media type? |
## Is |application/xml-dtd| an XML media type? |
714 |
$value = 'xml'; |
$value = 'xml'; |
717 |
} elsif ($value =~ m!^(?>message|multipart)/!i) { |
} elsif ($value =~ m!^(?>message|multipart)/!i) { |
718 |
$self->{onerror}->(node => $attr, type => 'IMT:composite', |
$self->{onerror}->(node => $attr, type => 'IMT:composite', |
719 |
level => $self->{must_level}); |
level => $self->{must_level}); |
720 |
|
$item->{parent_state}->{require_summary} = 1; |
721 |
|
} else { |
722 |
|
$item->{parent_state}->{require_summary} = 1; |
723 |
} |
} |
724 |
|
|
725 |
$element_state->{type} = $value; |
$element_state->{type} = $value; |
800 |
$self->{onerror}->(node => $item->{node}, |
$self->{onerror}->(node => $item->{node}, |
801 |
type => 'attribute missing:type', |
type => 'attribute missing:type', |
802 |
level => $self->{should_level}); |
level => $self->{should_level}); |
803 |
} |
} elsif ($element_state->{type} eq 'text' or |
804 |
if ($element_state->{type} eq 'text' or |
$element_state->{type} eq 'html' or |
805 |
$element_state->{type} eq 'html' or |
$element_state->{type} eq 'xhtml') { |
|
$element_state->{type} eq 'xhtml') { |
|
806 |
$self->{onerror} |
$self->{onerror} |
807 |
->(node => $item->{node}->get_attribute_node_ns (undef, 'type'), |
->(node => $item->{node}->get_attribute_node_ns (undef, 'type'), |
808 |
type => 'not IMT', level => $self->{must_level}); |
type => 'not IMT', level => $self->{must_level}); |
980 |
}, |
}, |
981 |
}; |
}; |
982 |
|
|
983 |
$Element->{$ATOM_NS}->{link} = { |
my $AtomIMTAttrChecker = sub { |
|
%AtomChecker, |
|
|
check_attrs => $GetAtomAttrsChecker->({ |
|
|
href => sub { |
|
|
my ($self, $attr) = @_; |
|
|
## NOTE: There MUST NOT be any white space. |
|
|
Whatpm::URIChecker->check_iri_reference ($attr->value, sub { |
|
|
my %opt = @_; |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
|
|
type => 'URI::'.$opt{type}. |
|
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
|
|
}); |
|
|
}, |
|
|
hreflang => $AtomLanguageTagAttrChecker, |
|
|
length => sub { }, # No MUST; in octets. |
|
|
rel => sub { # MUST |
|
|
my ($self, $attr) = @_; |
|
|
my $value = $attr->value; |
|
|
if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) { |
|
|
$value = $LINK_REL . $value; |
|
|
} |
|
|
|
|
|
## NOTE: There MUST NOT be any white space. |
|
|
Whatpm::URIChecker->check_iri ($value, sub { |
|
|
my %opt = @_; |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
|
|
type => 'URI::'.$opt{type}. |
|
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
|
|
}); |
|
|
|
|
|
## TODO: Warn if unregistered |
|
|
}, |
|
|
title => sub { }, # No MUST |
|
|
type => sub { |
|
|
## NOTE: MUST be a MIME media type. What is "MIME media type"? |
|
984 |
my ($self, $attr) = @_; |
my ($self, $attr) = @_; |
985 |
my $value = $attr->value; |
my $value = $attr->value; |
986 |
my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/; |
my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/; |
1008 |
} else { |
} else { |
1009 |
$self->{onerror}->(node => $attr, type => 'IMT:syntax error'); |
$self->{onerror}->(node => $attr, type => 'IMT:syntax error'); |
1010 |
} |
} |
1011 |
|
}; # $AtomIMTAttrChecker |
1012 |
|
|
1013 |
|
my $AtomIRIReferenceAttrChecker = sub { |
1014 |
|
my ($self, $attr) = @_; |
1015 |
|
## NOTE: There MUST NOT be any white space. |
1016 |
|
Whatpm::URIChecker->check_iri_reference ($attr->value, sub { |
1017 |
|
my %opt = @_; |
1018 |
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
1019 |
|
type => 'URI::'.$opt{type}. |
1020 |
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
1021 |
|
}); |
1022 |
|
}; # $AtomIRIReferenceAttrChecker |
1023 |
|
|
1024 |
|
$Element->{$ATOM_NS}->{link} = { |
1025 |
|
%AtomChecker, |
1026 |
|
check_attrs => $GetAtomAttrsChecker->({ |
1027 |
|
href => $AtomIRIReferenceAttrChecker, |
1028 |
|
hreflang => $AtomLanguageTagAttrChecker, |
1029 |
|
length => sub { }, # No MUST; in octets. |
1030 |
|
rel => sub { # MUST |
1031 |
|
my ($self, $attr) = @_; |
1032 |
|
my $value = $attr->value; |
1033 |
|
if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) { |
1034 |
|
$value = $LINK_REL . $value; |
1035 |
|
} |
1036 |
|
|
1037 |
|
## NOTE: There MUST NOT be any white space. |
1038 |
|
Whatpm::URIChecker->check_iri ($value, sub { |
1039 |
|
my %opt = @_; |
1040 |
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
1041 |
|
type => 'URI::'.$opt{type}. |
1042 |
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
1043 |
|
}); |
1044 |
|
|
1045 |
|
## TODO: Warn if unregistered |
1046 |
|
|
1047 |
|
## TODO: rel=license [RFC 4946] |
1048 |
|
## MUST NOT multiple rel=license with same href="",type="" pairs |
1049 |
|
## href="" SHOULD be dereferencable |
1050 |
|
## title="" SHOULD be there if multiple rel=license |
1051 |
|
## MUST NOT "unspecified" and other rel=license |
1052 |
}, |
}, |
1053 |
|
title => sub { }, # No MUST |
1054 |
|
type => $AtomIMTAttrChecker, |
1055 |
|
## NOTE: MUST be a MIME media type. What is "MIME media type"? |
1056 |
}, { |
}, { |
1057 |
href => FEATURE_RFC4287, |
href => FEATURE_RFC4287, |
1058 |
hreflang => FEATURE_RFC4287, |
hreflang => FEATURE_RFC4287, |
1060 |
rel => FEATURE_RFC4287, |
rel => FEATURE_RFC4287, |
1061 |
title => FEATURE_RFC4287, |
title => FEATURE_RFC4287, |
1062 |
type => FEATURE_RFC4287, |
type => FEATURE_RFC4287, |
1063 |
|
|
1064 |
|
## TODO: thr:count |
1065 |
|
## TODO: thr:updated |
1066 |
}), |
}), |
1067 |
check_start => sub { |
check_start => sub { |
1068 |
my ($self, $item, $element_state) = @_; |
my ($self, $item, $element_state) = @_; |
1111 |
|
|
1112 |
$Element->{$ATOM_NS}->{published} = \%AtomDateConstruct; |
$Element->{$ATOM_NS}->{published} = \%AtomDateConstruct; |
1113 |
|
|
1114 |
$Element->{$ATOM_NS}->{rights} = \%AtomDateConstruct; |
$Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct; |
1115 |
## NOTE: SHOULD NOT be used to convey machine-readable information. |
## NOTE: SHOULD NOT be used to convey machine-readable information. |
1116 |
|
|
1117 |
$Element->{$ATOM_NS}->{source} = { |
$Element->{$ATOM_NS}->{source} = { |
1147 |
$not_allowed = 1; |
$not_allowed = 1; |
1148 |
} |
} |
1149 |
} elsif ($child_ln eq 'link') { |
} elsif ($child_ln eq 'link') { |
1150 |
if ($child_ln->rel eq $LINK_REL . 'alternate') { |
if ($child_el->rel eq $LINK_REL . 'alternate') { |
1151 |
my $type = $child_ln->get_attribute_ns (undef, 'type'); |
my $type = $child_el->get_attribute_ns (undef, 'type'); |
1152 |
$type = '' unless defined $type; |
$type = '' unless defined $type; |
1153 |
my $hreflang = $child_ln->get_attribute_ns (undef, 'hreflang'); |
my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang'); |
1154 |
$hreflang = '' unless defined $hreflang; |
$hreflang = '' unless defined $hreflang; |
1155 |
my $key = 'link:'.(defined $type ? ':'.$type : '').':'. |
my $key = 'link:'.(defined $type ? ':'.$type : '').':'. |
1156 |
(defined $hreflang ? ':'.$hreflang : ''); |
(defined $hreflang ? ':'.$hreflang : ''); |
1201 |
|
|
1202 |
## TODO: simple extension element and structured extension element |
## TODO: simple extension element and structured extension element |
1203 |
|
|
1204 |
|
## -- Atom Threading 1.0 [RFC 4685] |
1205 |
|
|
1206 |
|
$Element->{$THR_NS}->{''} = { |
1207 |
|
%AtomChecker, |
1208 |
|
status => 0, |
1209 |
|
}; |
1210 |
|
|
1211 |
|
## ISSUE: Strictly speaking, thr:* element/attribute, |
1212 |
|
## where * is an undefined local name, is not disallowed. |
1213 |
|
|
1214 |
|
$Element->{$THR_NS}->{'in-reply-to'} = { |
1215 |
|
%AtomChecker, |
1216 |
|
status => FEATURE_RFC4685, |
1217 |
|
check_attrs => $GetAtomAttrsChecker->({ |
1218 |
|
href => $AtomIRIReferenceAttrChecker, |
1219 |
|
## TODO: fact-level. |
1220 |
|
## TODO: MUST be dereferencable. |
1221 |
|
ref => sub { |
1222 |
|
my ($self, $attr, $item, $element_state) = @_; |
1223 |
|
$element_state->{has_ref} = 1; |
1224 |
|
|
1225 |
|
## NOTE: Same as |atom:id|. |
1226 |
|
## NOTE: There MUST NOT be any white space. |
1227 |
|
Whatpm::URIChecker->check_iri ($attr->value, sub { |
1228 |
|
my %opt = @_; |
1229 |
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
1230 |
|
type => 'URI::'.$opt{type}. |
1231 |
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
1232 |
|
}); |
1233 |
|
|
1234 |
|
## TODO: Check against ID guideline... |
1235 |
|
}, |
1236 |
|
source => $AtomIRIReferenceAttrChecker, |
1237 |
|
## TODO: fact-level. |
1238 |
|
## TODO: MUST be dereferencable. |
1239 |
|
type => $AtomIMTAttrChecker, |
1240 |
|
## TODO: fact-level. |
1241 |
|
}, { |
1242 |
|
href => FEATURE_RFC4685, |
1243 |
|
source => FEATURE_RFC4685, |
1244 |
|
ref => FEATURE_RFC4685, |
1245 |
|
type => FEATURE_RFC4685, |
1246 |
|
}), |
1247 |
|
check_end => sub { |
1248 |
|
my ($self, $item, $element_state) = @_; |
1249 |
|
|
1250 |
|
unless ($element_state->{has_ref}) { |
1251 |
|
$self->{onerror}->(node => $item->{node}, |
1252 |
|
type => 'attribute missing:ref', |
1253 |
|
level => $self->{must_level}); |
1254 |
|
} |
1255 |
|
|
1256 |
|
$AtomChecker{check_end}->(@_); |
1257 |
|
}, |
1258 |
|
## NOTE: Content model has no constraint. |
1259 |
|
}; |
1260 |
|
|
1261 |
|
$Element->{$THR_NS}->{total} = { |
1262 |
|
%AtomChecker, |
1263 |
|
check_start => sub { |
1264 |
|
my ($self, $item, $element_state) = @_; |
1265 |
|
$element_state->{value} = ''; |
1266 |
|
}, |
1267 |
|
check_child_element => sub { |
1268 |
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
1269 |
|
$child_is_transparent, $element_state) = @_; |
1270 |
|
|
1271 |
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
1272 |
|
$self->{onerror}->(node => $child_el, |
1273 |
|
type => 'element not allowed:minus', |
1274 |
|
level => $self->{must_level}); |
1275 |
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
1276 |
|
# |
1277 |
|
} else { |
1278 |
|
$self->{onerror}->(node => $child_el, |
1279 |
|
type => 'element not allowed', |
1280 |
|
level => $self->{must_level}); |
1281 |
|
} |
1282 |
|
}, |
1283 |
|
check_child_text => sub { |
1284 |
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
1285 |
|
$element_state->{value} .= $child_node->data; |
1286 |
|
}, |
1287 |
|
check_end => sub { |
1288 |
|
my ($self, $item, $element_state) = @_; |
1289 |
|
|
1290 |
|
## NOTE: xsd:nonNegativeInteger |
1291 |
|
unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) { |
1292 |
|
$self->{onerror}->(node => $item->{node}, |
1293 |
|
type => 'syntax error', ## TODO: |
1294 |
|
level => $self->{must_level}); |
1295 |
|
} |
1296 |
|
|
1297 |
|
$AtomChecker{check_end}->(@_); |
1298 |
|
}, |
1299 |
|
}; |
1300 |
|
|
1301 |
|
## TODO: fh:complete |
1302 |
|
|
1303 |
|
## TODO: fh:archive |
1304 |
|
|
1305 |
|
## TODO: Check as archive document, page feed document, ... |
1306 |
|
|
1307 |
|
## TODO: APP [RFC 5023] |
1308 |
|
|
1309 |
$Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1; |
$Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1; |
1310 |
|
$Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1; |
1311 |
|
|
1312 |
1; |
1; |