/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.19 - (hide annotations) (download)
Fri Aug 15 16:02:02 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.18: +11 -11 lines
++ whatpm/Whatpm/ChangeLog	15 Aug 2008 16:01:09 -0000
2008-08-16  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm, RDFXML.pm: Pass {level} object to language tag
	and URL checkers.  Support for more error levels for bogus
	langauge tag and URL "standards".

	* LangTag.pm, URIChecker.pm: Support for new style error
	level reporting.

++ whatpm/Whatpm/ContentChecker/ChangeLog	15 Aug 2008 16:01:45 -0000
2008-08-16  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm, Atom.pm: Pass {level} object to language tag
	and URL checkers.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.15 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 wakaba 1.16 my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11 wakaba 1.1
12 wakaba 1.8 sub FEATURE_RFC4287 () {
13     Whatpm::ContentChecker::FEATURE_STATUS_CR |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16    
17 wakaba 1.15 sub FEATURE_RFC4685 () {
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21    
22 wakaba 1.1 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23    
24     ## NOTE: Commants and PIs are not explicitly allowed.
25    
26     our $AttrChecker;
27    
28     ## Any element MAY have xml:base, xml:lang
29     my $GetAtomAttrsChecker = sub {
30     my $element_specific_checker = shift;
31 wakaba 1.8 my $element_specific_status = shift;
32 wakaba 1.1 return sub {
33 wakaba 1.8 my ($self, $todo, $element_state) = @_;
34 wakaba 1.1 for my $attr (@{$todo->{node}->attributes}) {
35     my $attr_ns = $attr->namespace_uri;
36     $attr_ns = '' unless defined $attr_ns;
37     my $attr_ln = $attr->manakai_local_name;
38     my $checker;
39     if ($attr_ns eq '') {
40     $checker = $element_specific_checker->{$attr_ln};
41     } else {
42     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
43     || $AttrChecker->{$attr_ns}->{''};
44     }
45     if ($checker) {
46 wakaba 1.8 $checker->($self, $attr, $todo, $element_state);
47     } elsif ($attr_ln eq '') {
48     #
49 wakaba 1.1 } else {
50 wakaba 1.18 $self->{onerror}->(node => $attr,
51     type => 'unknown attribute',
52     level => $self->{level}->{uncertain});
53 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
54     }
55 wakaba 1.8
56     if ($attr_ns eq '') {
57     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
58     }
59     ## TODO: global attribute
60 wakaba 1.1 }
61     };
62     }; # $GetAtomAttrsChecker
63    
64 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
65     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
66    
67     my ($self, $attr) = @_;
68     my $value = $attr->value;
69     require Whatpm::LangTag;
70     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
71 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
72 wakaba 1.19 }, $self->{level});
73 wakaba 1.4 ## ISSUE: RFC 4646 (3066bis)?
74     }; # $AtomLanguageTagAttrChecker
75    
76 wakaba 1.8 my %AtomChecker = (
77     %Whatpm::ContentChecker::AnyChecker,
78     status => FEATURE_RFC4287,
79     check_attrs => $GetAtomAttrsChecker->({}, {}),
80     );
81    
82     my %AtomTextConstruct = (
83     %AtomChecker,
84     check_start => sub {
85     my ($self, $item, $element_state) = @_;
86     $element_state->{type} = 'text';
87     $element_state->{value} = '';
88     },
89     check_attrs => $GetAtomAttrsChecker->({
90     type => sub {
91     my ($self, $attr, $item, $element_state) = @_;
92     my $value = $attr->value;
93     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
94     $element_state->{type} = $value;
95 wakaba 1.1 } else {
96 wakaba 1.8 ## NOTE: IMT MUST NOT be used here.
97 wakaba 1.18 $self->{onerror}->(node => $attr,
98     type => 'invalid attribute value',
99     level => $self->{level}->{must});
100 wakaba 1.1 }
101 wakaba 1.8 }, # checked in |checker|
102     }, {
103     type => FEATURE_RFC4287,
104     }),
105     check_child_element => sub {
106     my ($self, $item, $child_el, $child_nsuri, $child_ln,
107     $child_is_transparent, $element_state) = @_;
108     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
109     $self->{onerror}->(node => $child_el,
110     type => 'element not allowed:minus',
111 wakaba 1.18 level => $self->{level}->{must});
112 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
113     #
114     } else {
115     if ($element_state->{type} eq 'text' or
116     $element_state->{type} eq 'html') { # MUST NOT
117     $self->{onerror}->(node => $child_el,
118     type => 'element not allowed:atom|TextConstruct',
119 wakaba 1.18 level => $self->{level}->{must});
120 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
121     if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
122     $child_ln eq 'div') { # MUST
123     if ($element_state->{has_div}) {
124     $self->{onerror}
125     ->(node => $child_el,
126     type => 'element not allowed:atom|TextConstruct',
127 wakaba 1.18 level => $self->{level}->{must});
128 wakaba 1.8 } else {
129     $element_state->{has_div} = 1;
130 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
131     }
132 wakaba 1.8 } else {
133     $self->{onerror}->(node => $child_el,
134     type => 'element not allowed:atom|TextConstruct',
135 wakaba 1.18 level => $self->{level}->{must});
136 wakaba 1.1 }
137 wakaba 1.8 } else {
138     die "atom:TextConstruct type error: $element_state->{type}";
139 wakaba 1.1 }
140 wakaba 1.8 }
141     },
142     check_child_text => sub {
143     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
144     if ($element_state->{type} eq 'text') {
145     #
146     } elsif ($element_state->{type} eq 'html') {
147     $element_state->{value} .= $child_node->text_content;
148     ## NOTE: Markup MUST be escaped.
149     } elsif ($element_state->{type} eq 'xhtml') {
150     if ($has_significant) {
151     $self->{onerror}->(node => $child_node,
152     type => 'character not allowed:atom|TextConstruct',
153 wakaba 1.18 level => $self->{level}->{must});
154 wakaba 1.1 }
155 wakaba 1.8 } else {
156     die "atom:TextConstruct type error: $element_state->{type}";
157 wakaba 1.1 }
158     },
159 wakaba 1.8 check_end => sub {
160     my ($self, $item, $element_state) = @_;
161 wakaba 1.13 if ($element_state->{type} eq 'xhtml') {
162     unless ($element_state->{has_div}) {
163     $self->{onerror}->(node => $item->{node},
164 wakaba 1.18 type => 'child element missing',
165     text => 'div',
166     level => $self->{level}->{must});
167 wakaba 1.13 }
168     } elsif ($element_state->{type} eq 'html') {
169     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
170     # markup MUST be escaped
171     $self->{onsubdoc}->({s => $element_state->{value},
172     container_node => $item->{node},
173     media_type => 'text/html',
174     inner_html_element => 'div',
175     is_char_string => 1});
176 wakaba 1.8 }
177 wakaba 1.1
178 wakaba 1.8 $AtomChecker{check_end}->(@_);
179     },
180     ); # %AtomTextConstruct
181 wakaba 1.1
182 wakaba 1.8 my %AtomPersonConstruct = (
183     %AtomChecker,
184     check_child_element => sub {
185     my ($self, $item, $child_el, $child_nsuri, $child_ln,
186     $child_is_transparent, $element_state) = @_;
187     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
188     $self->{onerror}->(node => $child_el,
189     type => 'element not allowed:minus',
190 wakaba 1.18 level => $self->{level}->{must});
191 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
192     #
193     } elsif ($child_nsuri eq $ATOM_NS) {
194     if ($child_ln eq 'name') {
195     if ($element_state->{has_name}) {
196     $self->{onerror}
197     ->(node => $child_el,
198     type => 'element not allowed:atom|PersonConstruct',
199 wakaba 1.18 level => $self->{level}->{must});
200 wakaba 1.8 } else {
201     $element_state->{has_name} = 1;
202     }
203     } elsif ($child_ln eq 'uri') {
204     if ($element_state->{has_uri}) {
205     $self->{onerror}
206     ->(node => $child_el,
207     type => 'element not allowed:atom|PersonConstruct',
208 wakaba 1.18 level => $self->{level}->{must});
209 wakaba 1.1 } else {
210 wakaba 1.8 $element_state->{has_uri} = 1;
211 wakaba 1.1 }
212 wakaba 1.8 } elsif ($child_ln eq 'email') {
213     if ($element_state->{has_email}) {
214     $self->{onerror}
215     ->(node => $child_el,
216     type => 'element not allowed:atom|PersonConstruct',
217 wakaba 1.18 level => $self->{level}->{must});
218 wakaba 1.8 } else {
219     $element_state->{has_email} = 1;
220 wakaba 1.7 }
221 wakaba 1.8 } else {
222     $self->{onerror}
223     ->(node => $child_el,
224     type => 'element not allowed:atom|PersonConstruct',
225 wakaba 1.18 level => $self->{level}->{must});
226 wakaba 1.1 }
227 wakaba 1.8 } else {
228     $self->{onerror}
229     ->(node => $child_el,
230     type => 'element not allowed:atom|PersonConstruct',
231 wakaba 1.18 level => $self->{level}->{must});
232 wakaba 1.1 }
233 wakaba 1.8 ## TODO: extension element
234     },
235     check_child_text => sub {
236     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
237     if ($has_significant) {
238     $self->{onerror}->(node => $child_node,
239     type => 'character not allowed:atom|PersonConstruct',
240 wakaba 1.18 level => $self->{level}->{must});
241 wakaba 1.8 }
242     },
243     check_end => sub {
244     my ($self, $item, $element_state) = @_;
245 wakaba 1.1
246 wakaba 1.8 unless ($element_state->{has_name}) {
247     $self->{onerror}->(node => $item->{node},
248 wakaba 1.18 type => 'child element missing:atom',
249     text => 'name',
250     level => $self->{level}->{must});
251 wakaba 1.1 }
252    
253 wakaba 1.8 $AtomChecker{check_end}->(@_);
254 wakaba 1.1 },
255 wakaba 1.8 ); # %AtomPersonConstruct
256 wakaba 1.1
257 wakaba 1.2 our $Element;
258    
259 wakaba 1.8 $Element->{$ATOM_NS}->{''} = {
260     %AtomChecker,
261     status => 0,
262     };
263    
264 wakaba 1.2 $Element->{$ATOM_NS}->{name} = {
265 wakaba 1.8 %AtomChecker,
266    
267 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:name
268     ## element outside of Person construct is not defined.
269    
270 wakaba 1.8 ## NOTE: No constraint.
271 wakaba 1.2 };
272    
273     $Element->{$ATOM_NS}->{uri} = {
274 wakaba 1.8 %AtomChecker,
275    
276 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:uri
277     ## element outside of Person construct is not defined.
278 wakaba 1.8
279     ## NOTE: Elements are not explicitly disallowed.
280    
281     check_start => sub {
282     my ($self, $item, $element_state) = @_;
283     $element_state->{value} = '';
284     },
285     check_child_text => sub {
286     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
287     $element_state->{value} .= $child_node->data;
288     },
289     check_end => sub {
290     my ($self, $item, $element_state) = @_;
291 wakaba 1.2
292     ## NOTE: There MUST NOT be any white space.
293 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
294 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
295 wakaba 1.19 }, $self->{level});
296 wakaba 1.2
297 wakaba 1.8 $AtomChecker{check_end}->(@_);
298 wakaba 1.2 },
299     };
300    
301     $Element->{$ATOM_NS}->{email} = {
302 wakaba 1.8 %AtomChecker,
303    
304 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:email
305     ## element outside of Person construct is not defined.
306 wakaba 1.8
307     ## NOTE: Elements are not explicitly disallowed.
308    
309     check_end => sub {
310     my ($self, $item, $element_state) = @_;
311 wakaba 1.2
312     ## TODO: addr-spec
313 wakaba 1.8 $self->{onerror}->(node => $item->{node},
314     type => 'addr-spec not supported',
315 wakaba 1.18 level => $self->{level}->{uncertain});
316 wakaba 1.2
317 wakaba 1.8 $AtomChecker{check_end}->(@_);
318 wakaba 1.2 },
319     };
320    
321 wakaba 1.1 ## MUST NOT be any white space
322 wakaba 1.8 my %AtomDateConstruct = (
323     %AtomChecker,
324    
325     ## NOTE: It does not explicitly say that there MUST NOT be any element.
326    
327     check_start => sub {
328     my ($self, $item, $element_state) = @_;
329     $element_state->{value} = '';
330     },
331     check_child_text => sub {
332     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
333     $element_state->{value} .= $child_node->data;
334     },
335     check_end => sub {
336     my ($self, $item, $element_state) = @_;
337 wakaba 1.1
338 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
339 wakaba 1.8 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
340 wakaba 1.4 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
341     = ($1, $2, $3, $4, $5, $6, $7, $8);
342 wakaba 1.8 my $node = $item->{node};
343 wakaba 1.4
344     ## Check additional constraints described or referenced in
345     ## comments of ABNF rules for |date-time|.
346     if (0 < $M and $M < 13) {
347     $self->{onerror}->(node => $node, type => 'datetime:bad day',
348 wakaba 1.18 level => $self->{level}->{must})
349 wakaba 1.4 if $d < 1 or
350     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
351     $self->{onerror}->(node => $node, type => 'datetime:bad day',
352 wakaba 1.18 level => $self->{level}->{must})
353 wakaba 1.4 if $M == 2 and $d == 29 and
354     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
355     } else {
356     $self->{onerror}->(node => $node, type => 'datetime:bad month',
357 wakaba 1.18 level => $self->{level}->{must});
358 wakaba 1.4 }
359     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
360 wakaba 1.18 level => $self->{level}->{must}) if $h > 23;
361 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad minute',
362 wakaba 1.18 level => $self->{level}->{must}) if $m > 59;
363 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad second',
364 wakaba 1.18 level => $self->{level}->{must})
365 wakaba 1.4 if $s > 60; ## NOTE: Validness of leap seconds are not checked.
366     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
367 wakaba 1.18 level => $self->{level}->{must}) if $zh > 23;
368 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
369 wakaba 1.18 level => $self->{level}->{must}) if $zm > 59;
370 wakaba 1.4 } else {
371 wakaba 1.8 $self->{onerror}->(node => $item->{node},
372 wakaba 1.4 type => 'datetime:syntax error',
373 wakaba 1.18 level => $self->{level}->{must});
374 wakaba 1.4 }
375     ## NOTE: SHOULD be accurate as possible (cannot be checked)
376 wakaba 1.1
377 wakaba 1.8 $AtomChecker{check_end}->(@_);
378 wakaba 1.1 },
379 wakaba 1.8 ); # %AtomDateConstruct
380 wakaba 1.1
381 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
382 wakaba 1.8 %AtomChecker,
383 wakaba 1.1 is_root => 1,
384 wakaba 1.8 check_child_element => sub {
385     my ($self, $item, $child_el, $child_nsuri, $child_ln,
386     $child_is_transparent, $element_state) = @_;
387    
388     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
389    
390     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
391     $self->{onerror}->(node => $child_el,
392     type => 'element not allowed:minus',
393 wakaba 1.18 level => $self->{level}->{must});
394 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
395     #
396     } elsif ($child_nsuri eq $ATOM_NS) {
397     my $not_allowed;
398     if ({ # MUST (0, 1)
399     content => 1,
400     id => 1,
401     published => 1,
402     rights => 1,
403     source => 1,
404     summary => 1,
405     title => 1,
406     updated => 1,
407     }->{$child_ln}) {
408     unless ($element_state->{has_element}->{$child_ln}) {
409     $element_state->{has_element}->{$child_ln} = 1;
410     $not_allowed = $element_state->{has_element}->{entry};
411     } else {
412     $not_allowed = 1;
413     }
414     } elsif ($child_ln eq 'link') { # MAY
415     if ($child_el->rel eq $LINK_REL . 'alternate') {
416     my $type = $child_el->get_attribute_ns (undef, 'type');
417     $type = '' unless defined $type;
418     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
419     $hreflang = '' unless defined $hreflang;
420     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
421     (defined $hreflang ? ':'.$hreflang : '');
422     unless ($element_state->{has_element}->{$key}) {
423     $element_state->{has_element}->{$key} = 1;
424     $element_state->{has_element}->{'link.alternate'} = 1;
425 wakaba 1.1 } else {
426     $not_allowed = 1;
427     }
428     }
429 wakaba 1.8
430     ## NOTE: MAY
431     $not_allowed ||= $element_state->{has_element}->{entry};
432     } elsif ({ # MAY
433     category => 1,
434     contributor => 1,
435     }->{$child_ln}) {
436     $not_allowed = $element_state->{has_element}->{entry};
437 wakaba 1.9 } elsif ($child_ln eq 'author') { # MAY
438     $not_allowed = $element_state->{has_element}->{entry};
439 wakaba 1.12 $element_state->{has_author} = 1; # ./author | ./source/author
440     $element_state->{has_element}->{$child_ln} = 1; # ./author
441 wakaba 1.8 } else {
442     $not_allowed = 1;
443     }
444     if ($not_allowed) {
445 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
446     level => $self->{level}->{must});
447 wakaba 1.1 }
448 wakaba 1.15 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
449     ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
450     ## defined in RFC 4685.
451     #
452 wakaba 1.16 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
453     #
454 wakaba 1.8 } else {
455     ## TODO: extension element
456 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
457     level => $self->{level}->{must});
458 wakaba 1.8 }
459     },
460     check_child_text => sub {
461     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
462     if ($has_significant) {
463     $self->{onerror}->(node => $child_node, type => 'character not allowed',
464 wakaba 1.18 level => $self->{level}->{must});
465 wakaba 1.1 }
466 wakaba 1.8 },
467     check_end => sub {
468     my ($self, $item, $element_state) = @_;
469 wakaba 1.1
470 wakaba 1.9 if ($element_state->{has_author}) {
471     ## NOTE: There is either a child atom:author element
472     ## or a child atom:source element which contains an atom:author
473     ## child element.
474     #
475     } else {
476     A: {
477     my $root = $item->{node}->owner_document->document_element;
478     if ($root and $root->manakai_local_name eq 'feed') {
479     my $nsuri = $root->namespace_uri;
480     if (defined $nsuri and $nsuri eq $ATOM_NS) {
481     ## NOTE: An Atom Feed Document.
482     for my $root_child (@{$root->child_nodes}) {
483     ## NOTE: Entity references are not supported.
484     next unless $root_child->node_type == 1; # ELEMENT_NODE
485     next unless $root_child->manakai_local_name eq 'author';
486     my $root_child_nsuri = $root_child->namespace_uri;
487     next unless defined $root_child_nsuri;
488     next unless $root_child_nsuri eq $ATOM_NS;
489     last A;
490     }
491     }
492     }
493    
494     $self->{onerror}->(node => $item->{node},
495 wakaba 1.18 type => 'child element missing:atom',
496     text => 'author',
497     level => $self->{level}->{must});
498 wakaba 1.9 } # A
499     }
500    
501 wakaba 1.12 unless ($element_state->{has_element}->{author}) {
502     $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
503     }
504    
505 wakaba 1.1 ## TODO: If entry's with same id, then updated SHOULD be different
506    
507 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
508     $self->{onerror}->(node => $item->{node},
509 wakaba 1.18 type => 'child element missing:atom',
510     text => 'id',
511     level => $self->{level}->{must});
512 wakaba 1.8 }
513     unless ($element_state->{has_element}->{title}) { # MUST
514     $self->{onerror}->(node => $item->{node},
515 wakaba 1.18 type => 'child element missing:atom',
516     text => 'title',
517     level => $self->{level}->{must});
518 wakaba 1.8 }
519     unless ($element_state->{has_element}->{updated}) { # MUST
520     $self->{onerror}->(node => $item->{node},
521 wakaba 1.18 type => 'child element missing:atom',
522     text => 'updated',
523     level => $self->{level}->{must});
524 wakaba 1.8 }
525     if (not $element_state->{has_element}->{content} and
526     not $element_state->{has_element}->{'link.alternate'}) {
527     $self->{onerror}->(node => $item->{node},
528 wakaba 1.18 type => 'child element missing:atom:link:alternate',
529     level => $self->{level}->{must});
530 wakaba 1.1 }
531 wakaba 1.14
532     if ($element_state->{require_summary} and
533     not $element_state->{has_element}->{summary}) {
534     $self->{onerror}->(node => $item->{node},
535 wakaba 1.18 type => 'child element missing:atom',
536     text => 'summary',
537     level => $self->{level}->{must});
538 wakaba 1.14 }
539 wakaba 1.1 },
540     };
541    
542     $Element->{$ATOM_NS}->{feed} = {
543 wakaba 1.8 %AtomChecker,
544 wakaba 1.1 is_root => 1,
545 wakaba 1.8 check_child_element => sub {
546     my ($self, $item, $child_el, $child_nsuri, $child_ln,
547     $child_is_transparent, $element_state) = @_;
548    
549     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
550    
551     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
552     $self->{onerror}->(node => $child_el,
553     type => 'element not allowed:minus',
554 wakaba 1.18 level => $self->{level}->{must});
555 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
556     #
557     } elsif ($child_nsuri eq $ATOM_NS) {
558     my $not_allowed;
559     if ($child_ln eq 'entry') {
560     $element_state->{has_element}->{entry} = 1;
561     } elsif ({ # MUST (0, 1)
562     generator => 1,
563     icon => 1,
564     id => 1,
565     logo => 1,
566     rights => 1,
567     subtitle => 1,
568     title => 1,
569     updated => 1,
570     }->{$child_ln}) {
571     unless ($element_state->{has_element}->{$child_ln}) {
572     $element_state->{has_element}->{$child_ln} = 1;
573     $not_allowed = $element_state->{has_element}->{entry};
574     } else {
575     $not_allowed = 1;
576     }
577     } elsif ($child_ln eq 'link') {
578     my $rel = $child_el->rel;
579     if ($rel eq $LINK_REL . 'alternate') {
580     my $type = $child_el->get_attribute_ns (undef, 'type');
581     $type = '' unless defined $type;
582     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
583     $hreflang = '' unless defined $hreflang;
584     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
585     (defined $hreflang ? ':'.$hreflang : '');
586     unless ($element_state->{has_element}->{$key}) {
587     $element_state->{has_element}->{$key} = 1;
588 wakaba 1.1 } else {
589     $not_allowed = 1;
590     }
591 wakaba 1.8 } elsif ($rel eq $LINK_REL . 'self') {
592     $element_state->{has_element}->{'link.self'} = 1;
593 wakaba 1.1 }
594 wakaba 1.8
595     ## NOTE: MAY
596     $not_allowed = $element_state->{has_element}->{entry};
597     } elsif ({ # MAY
598     category => 1,
599     contributor => 1,
600     }->{$child_ln}) {
601     $not_allowed = $element_state->{has_element}->{entry};
602 wakaba 1.12 } elsif ($child_ln eq 'author') { # MAY
603     $not_allowed = $element_state->{has_element}->{entry};
604     $element_state->{has_element}->{author} = 1;
605 wakaba 1.8 } else {
606     $not_allowed = 1;
607 wakaba 1.1 }
608 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
609     level => $self->{level}->{must})
610 wakaba 1.8 if $not_allowed;
611     } else {
612     ## TODO: extension element
613 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
614     level => $self->{level}->{must});
615 wakaba 1.1 }
616 wakaba 1.8 },
617     check_child_text => sub {
618     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
619     if ($has_significant) {
620     $self->{onerror}->(node => $child_node, type => 'character not allowed',
621 wakaba 1.18 level => $self->{level}->{must});
622 wakaba 1.8 }
623     },
624     check_end => sub {
625     my ($self, $item, $element_state) = @_;
626 wakaba 1.10
627 wakaba 1.12 if ($element_state->{has_no_author_entry} and
628     not $element_state->{has_element}->{author}) {
629 wakaba 1.10 $self->{onerror}->(node => $item->{node},
630 wakaba 1.18 type => 'child element missing:atom',
631     text => 'author',
632     level => $self->{level}->{must});
633 wakaba 1.11 ## ISSUE: If there is no |atom:entry| element,
634     ## there should be an |atom:author| element?
635 wakaba 1.10 }
636 wakaba 1.1
637     ## TODO: If entry's with same id, then updated SHOULD be different
638    
639 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
640     $self->{onerror}->(node => $item->{node},
641 wakaba 1.18 type => 'child element missing:atom',
642     text => 'id',
643     level => $self->{level}->{must});
644 wakaba 1.1 }
645 wakaba 1.8 unless ($element_state->{has_element}->{title}) { # MUST
646     $self->{onerror}->(node => $item->{node},
647 wakaba 1.18 type => 'child element missing:atom',
648     text => 'title',
649     level => $self->{level}->{must});
650 wakaba 1.1 }
651 wakaba 1.8 unless ($element_state->{has_element}->{updated}) { # MUST
652     $self->{onerror}->(node => $item->{node},
653 wakaba 1.18 type => 'element missing:atom',
654     text => 'updated',
655     level => $self->{level}->{must});
656 wakaba 1.1 }
657 wakaba 1.8 unless ($element_state->{has_element}->{'link.self'}) {
658 wakaba 1.18 $self->{onerror}->(node => $item->{node},
659     type => 'element missing:atom:link:self',
660     level => $self->{level}->{should});
661 wakaba 1.3 }
662 wakaba 1.1
663 wakaba 1.8 $AtomChecker{check_end}->(@_);
664 wakaba 1.1 },
665     };
666    
667     $Element->{$ATOM_NS}->{content} = {
668 wakaba 1.8 %AtomChecker,
669     check_start => sub {
670     my ($self, $item, $element_state) = @_;
671     $element_state->{type} = 'text';
672 wakaba 1.13 $element_state->{value} = '';
673 wakaba 1.8 },
674     check_attrs => $GetAtomAttrsChecker->({
675     src => sub {
676     my ($self, $attr, $item, $element_state) = @_;
677    
678     $element_state->{has_src} = 1;
679 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
680 wakaba 1.8
681     ## NOTE: There MUST NOT be any white space.
682     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
683 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
684 wakaba 1.19 }, $self->{level});
685 wakaba 1.8 },
686     type => sub {
687     my ($self, $attr, $item, $element_state) = @_;
688    
689     $element_state->{has_type} = 1;
690 wakaba 1.1
691 wakaba 1.8 my $value = $attr->value;
692 wakaba 1.1 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
693     # MUST
694     } else {
695 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
696     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
697     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
698     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
699     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
700     my @type = ($1, $2);
701     my $param = $3;
702     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
703     if (defined $2) {
704     push @type, $1 => $2;
705     } else {
706     my $n = $1;
707     my $v = $2;
708     $v =~ s/\\(.)/$1/gs;
709     push @type, $n => $v;
710     }
711     }
712     require Whatpm::IMTChecker;
713     Whatpm::IMTChecker->check_imt (sub {
714 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
715 wakaba 1.3 }, @type);
716     } else {
717 wakaba 1.8 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
718 wakaba 1.18 level => $self->{level}->{must});
719 wakaba 1.3 }
720 wakaba 1.1 }
721    
722 wakaba 1.17 if ({text => 1, html => 1, xhtml => 1}->{$value}) {
723     #
724     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
725 wakaba 1.8 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
726     ## Is |application/xml-dtd| an XML media type?
727     $value = 'xml';
728     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
729     $value = 'mime_text';
730     } elsif ($value =~ m!^(?>message|multipart)/!i) {
731     $self->{onerror}->(node => $attr, type => 'IMT:composite',
732 wakaba 1.18 level => $self->{level}->{must});
733 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
734     } else {
735     $item->{parent_state}->{require_summary} = 1;
736 wakaba 1.8 }
737 wakaba 1.1
738 wakaba 1.8 $element_state->{type} = $value;
739     },
740     }, {
741     src => FEATURE_RFC4287,
742     type => FEATURE_RFC4287,
743     }),
744     check_child_element => sub {
745     my ($self, $item, $child_el, $child_nsuri, $child_ln,
746     $child_is_transparent, $element_state) = @_;
747    
748     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
749     $self->{onerror}->(node => $child_el,
750     type => 'element not allowed:minus',
751 wakaba 1.18 level => $self->{level}->{must});
752 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
753     #
754     } else {
755     if ($element_state->{type} eq 'text' or
756     $element_state->{type} eq 'html' or
757     $element_state->{type} eq 'mime_text') {
758     # MUST NOT
759     $self->{onerror}->(node => $child_el,
760     type => 'element not allowed:atom|content',
761 wakaba 1.18 level => $self->{level}->{must});
762 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
763     if ($element_state->{has_div}) {
764     $self->{onerror}->(node => $child_el,
765     type => 'element not allowed:atom|content',
766 wakaba 1.18 level => $self->{level}->{must});
767 wakaba 1.8 } else {
768     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
769     $element_state->{has_div} = 1;
770     }
771     } elsif ($element_state->{type} eq 'xml') {
772     ## MAY contain elements
773     if ($element_state->{has_src}) {
774     $self->{onerror}->(node => $child_el,
775     type => 'element not allowed:atom|content',
776 wakaba 1.18 level => $self->{level}->{must});
777 wakaba 1.1 }
778 wakaba 1.8 } else {
779     ## NOTE: Elements are not explicitly disallowed.
780 wakaba 1.1 }
781 wakaba 1.8 }
782     },
783     ## NOTE: If @src, the element MUST be empty. What is "empty"?
784     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
785     ## empty replacement tree shuld be empty, since Atom is defined
786     ## in terms of XML Information Set where entities are expanded.
787     ## (but what if |&e;| is an unexpanded entity?)
788     check_child_text => sub {
789     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
790     if ($has_significant) {
791     if ($element_state->{has_src}) {
792     $self->{onerror}->(node => $child_node,
793 wakaba 1.18 type => 'character not allowed:empty',
794     level => $self->{level}->{must});
795 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml' or
796     $element_state->{type} eq 'xml') {
797     $self->{onerror}->(node => $child_node,
798     type => 'character not allowed:atom|content',
799 wakaba 1.18 level => $self->{level}->{must});
800 wakaba 1.1 }
801 wakaba 1.8 }
802 wakaba 1.1
803 wakaba 1.13 $element_state->{value} .= $child_node->data;
804 wakaba 1.1
805 wakaba 1.8 ## NOTE: type=text/* has no further restriction (i.e. the content don't
806     ## have to conform to the definition of the type).
807     },
808     check_end => sub {
809     my ($self, $item, $element_state) = @_;
810 wakaba 1.1
811 wakaba 1.8 if ($element_state->{has_src}) {
812     if (not $element_state->{has_type}) {
813     $self->{onerror}->(node => $item->{node},
814 wakaba 1.18 type => 'attribute missing',
815     text => 'type',
816     level => $self->{level}->{should});
817 wakaba 1.14 } elsif ($element_state->{type} eq 'text' or
818     $element_state->{type} eq 'html' or
819     $element_state->{type} eq 'xhtml') {
820 wakaba 1.8 $self->{onerror}
821     ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
822 wakaba 1.18 type => 'not IMT', level => $self->{level}->{must});
823 wakaba 1.1 }
824 wakaba 1.8 }
825 wakaba 1.1
826 wakaba 1.8 if ($element_state->{type} eq 'xhtml') {
827     unless ($element_state->{has_div}) {
828     $self->{onerror}->(node => $item->{node},
829 wakaba 1.18 type => 'element missing',
830     text => 'div',
831     level => $self->{level}->{must});
832 wakaba 1.1 }
833 wakaba 1.11 } elsif ($element_state->{type} eq 'html') {
834     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
835     # markup MUST be escaped
836 wakaba 1.13 $self->{onsubdoc}->({s => $element_state->{value},
837 wakaba 1.11 container_node => $item->{node},
838     media_type => 'text/html',
839     inner_html_element => 'div',
840     is_char_string => 1});
841 wakaba 1.8 } elsif ($element_state->{type} eq 'xml') {
842 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
843 wakaba 1.1 ## If no @src, this would normally mean it contains a
844     ## single child element that would serve as the root element.
845 wakaba 1.8 $self->{onerror}->(node => $item->{node},
846     type => 'atom|content not supported',
847 wakaba 1.18 text => $item->{node}->get_attribute_ns
848     (undef, 'type'),
849     level => $self->{level}->{uncertain});
850 wakaba 1.8 } elsif ($element_state->{type} eq 'text' or
851     $element_state->{type} eq 'mime-text') {
852     #
853 wakaba 1.1 } else {
854     ## TODO: $s = valid Base64ed [RFC 3548] where
855     ## MAY leading and following "white space" (what?)
856     ## and lines separated by a single U+000A
857 wakaba 1.3
858     ## NOTE: SHOULD be suitable for the indicated media type.
859 wakaba 1.8 $self->{onerror}->(node => $item->{node},
860     type => 'atom|content not supported',
861 wakaba 1.18 text => $item->{node}->get_attribute_ns
862     (undef, 'type'),
863     level => $self->{level}->{uncertain});
864 wakaba 1.8 }
865 wakaba 1.1
866 wakaba 1.8 $AtomChecker{check_end}->(@_);
867 wakaba 1.1 },
868     };
869 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
870 wakaba 1.1
871 wakaba 1.8 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
872 wakaba 1.1
873     $Element->{$ATOM_NS}->{category} = {
874 wakaba 1.8 %AtomChecker,
875     check_attrs => $GetAtomAttrsChecker->({
876 wakaba 1.1 label => sub { 1 }, # no value constraint
877 wakaba 1.2 scheme => sub { # NOTE: No MUST.
878     my ($self, $attr) = @_;
879     ## NOTE: There MUST NOT be any white space.
880     Whatpm::URIChecker->check_iri ($attr->value, sub {
881 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
882 wakaba 1.19 }, $self->{level});
883 wakaba 1.2 },
884 wakaba 1.8 term => sub {
885     my ($self, $attr, $item, $element_state) = @_;
886    
887     ## NOTE: No value constraint.
888    
889     $element_state->{has_term} = 1;
890     },
891     }, {
892     label => FEATURE_RFC4287,
893     scheme => FEATURE_RFC4287,
894     term => FEATURE_RFC4287,
895 wakaba 1.1 }),
896 wakaba 1.8 check_end => sub {
897     my ($self, $item, $element_state) = @_;
898     unless ($element_state->{has_term}) {
899     $self->{onerror}->(node => $item->{node},
900 wakaba 1.18 type => 'attribute missing',
901     text => 'term',
902     level => $self->{level}->{must});
903 wakaba 1.1 }
904    
905 wakaba 1.8 $AtomChecker{check_end}->(@_);
906 wakaba 1.1 },
907 wakaba 1.8 ## NOTE: Meaning of content is not defined.
908 wakaba 1.1 };
909    
910 wakaba 1.8 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
911 wakaba 1.6
912     ## TODO: Anything below does not support <html:nest/> yet.
913 wakaba 1.1
914     $Element->{$ATOM_NS}->{generator} = {
915 wakaba 1.8 %AtomChecker,
916     check_attrs => $GetAtomAttrsChecker->({
917 wakaba 1.2 uri => sub { # MUST
918     my ($self, $attr) = @_;
919     ## NOTE: There MUST NOT be any white space.
920     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
921 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
922 wakaba 1.19 }, $self->{level});
923 wakaba 1.2 ## NOTE: Dereferencing SHOULD produce a representation
924     ## that is relevant to the agent.
925     },
926 wakaba 1.1 version => sub { 1 }, # no value constraint
927 wakaba 1.8 }, {
928     uri => FEATURE_RFC4287,
929     version => FEATURE_RFC4287,
930 wakaba 1.1 }),
931    
932 wakaba 1.8 ## NOTE: Elements are not explicitly disallowed.
933 wakaba 1.1
934 wakaba 1.8 ## NOTE: Content MUST be a string that is a human-readable name for
935     ## the generating agent.
936 wakaba 1.1 };
937    
938     $Element->{$ATOM_NS}->{icon} = {
939 wakaba 1.8 %AtomChecker,
940     check_start => sub {
941     my ($self, $item, $element_state) = @_;
942     $element_state->{value} = '';
943     },
944     ## NOTE: Elements are not explicitly disallowed.
945     check_child_text => sub {
946     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
947     $element_state->{value} .= $child_node->data;
948     },
949     check_end => sub {
950     my ($self, $item, $element_state) = @_;
951 wakaba 1.1
952 wakaba 1.2 ## NOTE: No MUST.
953     ## NOTE: There MUST NOT be any white space.
954 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
955 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
956 wakaba 1.19 }, $self->{level});
957 wakaba 1.2
958 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
959    
960 wakaba 1.8 $AtomChecker{check_end}->(@_);
961 wakaba 1.1 },
962     };
963    
964     $Element->{$ATOM_NS}->{id} = {
965 wakaba 1.8 %AtomChecker,
966     check_start => sub {
967     my ($self, $item, $element_state) = @_;
968     $element_state->{value} = '';
969     },
970     ## NOTE: Elements are not explicitly disallowed.
971     check_child_text => sub {
972     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
973     $element_state->{value} .= $child_node->data;
974     },
975     check_end => sub {
976     my ($self, $item, $element_state) = @_;
977 wakaba 1.2
978     ## NOTE: There MUST NOT be any white space.
979 wakaba 1.8 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
980 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
981 wakaba 1.19 }, $self->{level});
982 wakaba 1.2 ## TODO: SHOULD be normalized
983    
984 wakaba 1.8 $AtomChecker{check_end}->(@_);
985 wakaba 1.2 },
986     };
987    
988 wakaba 1.15 my $AtomIMTAttrChecker = sub {
989 wakaba 1.3 my ($self, $attr) = @_;
990     my $value = $attr->value;
991     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
992     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
993     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
994     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
995     my @type = ($1, $2);
996     my $param = $3;
997     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
998     if (defined $2) {
999     push @type, $1 => $2;
1000     } else {
1001     my $n = $1;
1002     my $v = $2;
1003     $v =~ s/\\(.)/$1/gs;
1004     push @type, $n => $v;
1005     }
1006     }
1007     require Whatpm::IMTChecker;
1008     Whatpm::IMTChecker->check_imt (sub {
1009 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1010 wakaba 1.3 }, @type);
1011     } else {
1012 wakaba 1.18 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
1013     level => $self->{level}->{must});
1014 wakaba 1.3 }
1015 wakaba 1.15 }; # $AtomIMTAttrChecker
1016    
1017     my $AtomIRIReferenceAttrChecker = sub {
1018     my ($self, $attr) = @_;
1019     ## NOTE: There MUST NOT be any white space.
1020     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1021 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1022 wakaba 1.19 }, $self->{level});
1023 wakaba 1.15 }; # $AtomIRIReferenceAttrChecker
1024    
1025     $Element->{$ATOM_NS}->{link} = {
1026     %AtomChecker,
1027     check_attrs => $GetAtomAttrsChecker->({
1028     href => $AtomIRIReferenceAttrChecker,
1029     hreflang => $AtomLanguageTagAttrChecker,
1030     length => sub { }, # No MUST; in octets.
1031     rel => sub { # MUST
1032     my ($self, $attr) = @_;
1033     my $value = $attr->value;
1034     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1035     $value = $LINK_REL . $value;
1036     }
1037    
1038     ## NOTE: There MUST NOT be any white space.
1039     Whatpm::URIChecker->check_iri ($value, sub {
1040 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1041 wakaba 1.19 }, $self->{level});
1042 wakaba 1.15
1043     ## TODO: Warn if unregistered
1044 wakaba 1.16
1045     ## TODO: rel=license [RFC 4946]
1046     ## MUST NOT multiple rel=license with same href="",type="" pairs
1047     ## href="" SHOULD be dereferencable
1048     ## title="" SHOULD be there if multiple rel=license
1049     ## MUST NOT "unspecified" and other rel=license
1050 wakaba 1.3 },
1051 wakaba 1.15 title => sub { }, # No MUST
1052     type => $AtomIMTAttrChecker,
1053     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1054 wakaba 1.8 }, {
1055     href => FEATURE_RFC4287,
1056     hreflang => FEATURE_RFC4287,
1057     length => FEATURE_RFC4287,
1058     rel => FEATURE_RFC4287,
1059     title => FEATURE_RFC4287,
1060     type => FEATURE_RFC4287,
1061 wakaba 1.16
1062     ## TODO: thr:count
1063     ## TODO: thr:updated
1064 wakaba 1.2 }),
1065 wakaba 1.8 check_start => sub {
1066     my ($self, $item, $element_state) = @_;
1067 wakaba 1.2
1068 wakaba 1.8 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1069     $self->{onerror}->(node => $item->{node},
1070 wakaba 1.18 type => 'attribute missing',
1071     text => 'href',
1072     level => $self->{level}->{must});
1073 wakaba 1.2 }
1074    
1075 wakaba 1.8 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1076     not $item->{node}->has_attribute_ns (undef, 'length')) {
1077 wakaba 1.18 $self->{onerror}->(node => $item->{node},
1078     type => 'attribute missing',
1079     text => 'length',
1080     level => $self->{level}->{should});
1081 wakaba 1.2 }
1082     },
1083     };
1084    
1085     $Element->{$ATOM_NS}->{logo} = {
1086 wakaba 1.8 %AtomChecker,
1087     ## NOTE: Child elements are not explicitly disallowed
1088     check_start => sub {
1089     my ($self, $item, $element_state) = @_;
1090     $element_state->{value} = '';
1091     },
1092     ## NOTE: Elements are not explicitly disallowed.
1093     check_child_text => sub {
1094     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1095     $element_state->{value} .= $child_node->data;
1096     },
1097     check_end => sub {
1098     my ($self, $item, $element_state) = @_;
1099 wakaba 1.1
1100 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1101 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1102 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
1103 wakaba 1.19 }, $self->{level});
1104 wakaba 1.2
1105     ## NOTE: Image SHOULD be 2:1
1106    
1107 wakaba 1.8 $AtomChecker{check_end}->(@_);
1108 wakaba 1.2 },
1109     };
1110    
1111 wakaba 1.8 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1112 wakaba 1.2
1113 wakaba 1.17 $Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct;
1114 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1115 wakaba 1.2
1116     $Element->{$ATOM_NS}->{source} = {
1117 wakaba 1.8 %AtomChecker,
1118     check_child_element => sub {
1119     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1120     $child_is_transparent, $element_state) = @_;
1121    
1122     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1123     $self->{onerror}->(node => $child_el,
1124     type => 'element not allowed:minus',
1125 wakaba 1.18 level => $self->{level}->{must});
1126 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1127     #
1128     } elsif ($child_nsuri eq $ATOM_NS) {
1129     my $not_allowed;
1130     if ($child_ln eq 'entry') {
1131     $element_state->{has_element}->{entry} = 1;
1132     } elsif ({
1133     generator => 1,
1134     icon => 1,
1135     id => 1,
1136     logo => 1,
1137     rights => 1,
1138     subtitle => 1,
1139     title => 1,
1140     updated => 1,
1141     }->{$child_ln}) {
1142     unless ($element_state->{has_element}->{$child_ln}) {
1143     $element_state->{has_element}->{$child_ln} = 1;
1144     $not_allowed = $element_state->{has_element}->{entry};
1145     } else {
1146     $not_allowed = 1;
1147     }
1148     } elsif ($child_ln eq 'link') {
1149 wakaba 1.17 if ($child_el->rel eq $LINK_REL . 'alternate') {
1150     my $type = $child_el->get_attribute_ns (undef, 'type');
1151 wakaba 1.8 $type = '' unless defined $type;
1152 wakaba 1.17 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
1153 wakaba 1.8 $hreflang = '' unless defined $hreflang;
1154     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1155     (defined $hreflang ? ':'.$hreflang : '');
1156     unless ($element_state->{has_element}->{$key}) {
1157     $element_state->{has_element}->{$key} = 1;
1158 wakaba 1.2 } else {
1159     $not_allowed = 1;
1160     }
1161     }
1162 wakaba 1.8 $not_allowed ||= $element_state->{has_element}->{entry};
1163     } elsif ({
1164     category => 1,
1165     contributor => 1,
1166     }->{$child_ln}) {
1167     $not_allowed = $element_state->{has_element}->{entry};
1168 wakaba 1.9 } elsif ($child_ln eq 'author') {
1169     $not_allowed = $element_state->{has_element}->{entry};
1170     $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1171 wakaba 1.8 } else {
1172     $not_allowed = 1;
1173     }
1174     if ($not_allowed) {
1175 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1176     level => $self->{level}->{must});
1177 wakaba 1.2 }
1178 wakaba 1.8 } else {
1179     ## TODO: extension element
1180 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1181     level => $self->{level}->{must});
1182 wakaba 1.8 }
1183     },
1184     check_child_text => sub {
1185     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1186     if ($has_significant) {
1187     $self->{onerror}->(node => $child_node, type => 'character not allowed',
1188 wakaba 1.18 level => $self->{level}->{must});
1189 wakaba 1.2 }
1190 wakaba 1.1 },
1191     };
1192 wakaba 1.2
1193 wakaba 1.8 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1194 wakaba 1.2
1195 wakaba 1.8 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1196 wakaba 1.2
1197 wakaba 1.8 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1198 wakaba 1.2
1199 wakaba 1.8 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1200 wakaba 1.2
1201     ## TODO: signature element
1202    
1203     ## TODO: simple extension element and structured extension element
1204 wakaba 1.1
1205 wakaba 1.15 ## -- Atom Threading 1.0 [RFC 4685]
1206    
1207     $Element->{$THR_NS}->{''} = {
1208     %AtomChecker,
1209     status => 0,
1210     };
1211    
1212     ## ISSUE: Strictly speaking, thr:* element/attribute,
1213     ## where * is an undefined local name, is not disallowed.
1214    
1215     $Element->{$THR_NS}->{'in-reply-to'} = {
1216     %AtomChecker,
1217     status => FEATURE_RFC4685,
1218     check_attrs => $GetAtomAttrsChecker->({
1219     href => $AtomIRIReferenceAttrChecker,
1220     ## TODO: fact-level.
1221     ## TODO: MUST be dereferencable.
1222     ref => sub {
1223     my ($self, $attr, $item, $element_state) = @_;
1224     $element_state->{has_ref} = 1;
1225    
1226     ## NOTE: Same as |atom:id|.
1227     ## NOTE: There MUST NOT be any white space.
1228     Whatpm::URIChecker->check_iri ($attr->value, sub {
1229 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1230 wakaba 1.19 }, $self->{level});
1231 wakaba 1.15
1232     ## TODO: Check against ID guideline...
1233     },
1234     source => $AtomIRIReferenceAttrChecker,
1235     ## TODO: fact-level.
1236     ## TODO: MUST be dereferencable.
1237     type => $AtomIMTAttrChecker,
1238     ## TODO: fact-level.
1239     }, {
1240     href => FEATURE_RFC4685,
1241     source => FEATURE_RFC4685,
1242     ref => FEATURE_RFC4685,
1243     type => FEATURE_RFC4685,
1244     }),
1245     check_end => sub {
1246     my ($self, $item, $element_state) = @_;
1247    
1248     unless ($element_state->{has_ref}) {
1249     $self->{onerror}->(node => $item->{node},
1250 wakaba 1.18 type => 'attribute missing',
1251     text => 'ref',
1252     level => $self->{level}->{must});
1253 wakaba 1.15 }
1254    
1255     $AtomChecker{check_end}->(@_);
1256     },
1257     ## NOTE: Content model has no constraint.
1258     };
1259 wakaba 1.16
1260     $Element->{$THR_NS}->{total} = {
1261     %AtomChecker,
1262     check_start => sub {
1263     my ($self, $item, $element_state) = @_;
1264     $element_state->{value} = '';
1265     },
1266     check_child_element => sub {
1267     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1268     $child_is_transparent, $element_state) = @_;
1269    
1270     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1271     $self->{onerror}->(node => $child_el,
1272     type => 'element not allowed:minus',
1273 wakaba 1.18 level => $self->{level}->{must});
1274 wakaba 1.16 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1275     #
1276     } else {
1277     $self->{onerror}->(node => $child_el,
1278     type => 'element not allowed',
1279 wakaba 1.18 level => $self->{level}->{must});
1280 wakaba 1.16 }
1281     },
1282     check_child_text => sub {
1283     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1284     $element_state->{value} .= $child_node->data;
1285     },
1286     check_end => sub {
1287     my ($self, $item, $element_state) = @_;
1288    
1289     ## NOTE: xsd:nonNegativeInteger
1290     unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1291     $self->{onerror}->(node => $item->{node},
1292 wakaba 1.18 type => 'invalid attribute value',
1293     level => $self->{level}->{must});
1294 wakaba 1.16 }
1295    
1296     $AtomChecker{check_end}->(@_);
1297     },
1298     };
1299    
1300     ## TODO: fh:complete
1301    
1302     ## TODO: fh:archive
1303    
1304     ## TODO: Check as archive document, page feed document, ...
1305    
1306     ## TODO: APP [RFC 5023]
1307 wakaba 1.15
1308 wakaba 1.1 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1309 wakaba 1.15 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1310 wakaba 1.1
1311     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24