/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (hide annotations) (download)
Thu Mar 20 10:58:17 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.16: +7 -5 lines
++ whatpm/t/ChangeLog	20 Mar 2008 10:58:07 -0000
	* content-model-atom-1.dat: New test data are added.

	* content-model-atom-2.dat: New test data are
	added from Atom test suite.

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Mar 2008 10:57:00 -0000
	* Atom.pm: Don't raise an error even if there is
	no |atom:summary| element child in the |atom:entry|
	element when the |type| attribute of the |atom:content|
	element is set to |html|, |xhtml|, or |text|.
	The |atom:rights| elements were implemented
	as Date construct (!?).
	Typo in code for |rel=alternate| are fixed.

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.15 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 wakaba 1.16 my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11 wakaba 1.1
12 wakaba 1.8 sub FEATURE_RFC4287 () {
13     Whatpm::ContentChecker::FEATURE_STATUS_CR |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16    
17 wakaba 1.15 sub FEATURE_RFC4685 () {
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21    
22 wakaba 1.1 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23    
24     ## NOTE: Commants and PIs are not explicitly allowed.
25    
26     our $AttrChecker;
27    
28     ## Any element MAY have xml:base, xml:lang
29     my $GetAtomAttrsChecker = sub {
30     my $element_specific_checker = shift;
31 wakaba 1.8 my $element_specific_status = shift;
32 wakaba 1.1 return sub {
33 wakaba 1.8 my ($self, $todo, $element_state) = @_;
34 wakaba 1.1 for my $attr (@{$todo->{node}->attributes}) {
35     my $attr_ns = $attr->namespace_uri;
36     $attr_ns = '' unless defined $attr_ns;
37     my $attr_ln = $attr->manakai_local_name;
38     my $checker;
39     if ($attr_ns eq '') {
40     $checker = $element_specific_checker->{$attr_ln};
41     } else {
42     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
43     || $AttrChecker->{$attr_ns}->{''};
44     }
45     if ($checker) {
46 wakaba 1.8 $checker->($self, $attr, $todo, $element_state);
47     } elsif ($attr_ln eq '') {
48     #
49 wakaba 1.1 } else {
50     $self->{onerror}->(node => $attr, level => 'unsupported',
51     type => 'attribute');
52     ## ISSUE: No comformance createria for unknown attributes in the spec
53     }
54 wakaba 1.8
55     if ($attr_ns eq '') {
56     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
57     }
58     ## TODO: global attribute
59 wakaba 1.1 }
60     };
61     }; # $GetAtomAttrsChecker
62    
63 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
64     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
65    
66     my ($self, $attr) = @_;
67     my $value = $attr->value;
68     require Whatpm::LangTag;
69     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
70     my %opt = @_;
71     my $type = 'LangTag:'.$opt{type};
72     $type .= ':' . $opt{subtag} if defined $opt{subtag};
73     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
74     level => $opt{level});
75     });
76     ## ISSUE: RFC 4646 (3066bis)?
77     }; # $AtomLanguageTagAttrChecker
78    
79 wakaba 1.8 my %AtomChecker = (
80     %Whatpm::ContentChecker::AnyChecker,
81     status => FEATURE_RFC4287,
82     check_attrs => $GetAtomAttrsChecker->({}, {}),
83     );
84    
85     my %AtomTextConstruct = (
86     %AtomChecker,
87     check_start => sub {
88     my ($self, $item, $element_state) = @_;
89     $element_state->{type} = 'text';
90     $element_state->{value} = '';
91     },
92     check_attrs => $GetAtomAttrsChecker->({
93     type => sub {
94     my ($self, $attr, $item, $element_state) = @_;
95     my $value = $attr->value;
96     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
97     $element_state->{type} = $value;
98 wakaba 1.1 } else {
99 wakaba 1.8 ## NOTE: IMT MUST NOT be used here.
100 wakaba 1.1 $self->{onerror}->(node => $attr, type => 'keyword:invalid');
101     }
102 wakaba 1.8 }, # checked in |checker|
103     }, {
104     type => FEATURE_RFC4287,
105     }),
106     check_child_element => sub {
107     my ($self, $item, $child_el, $child_nsuri, $child_ln,
108     $child_is_transparent, $element_state) = @_;
109     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
110     $self->{onerror}->(node => $child_el,
111     type => 'element not allowed:minus',
112     level => $self->{must_level});
113     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
114     #
115     } else {
116     if ($element_state->{type} eq 'text' or
117     $element_state->{type} eq 'html') { # MUST NOT
118     $self->{onerror}->(node => $child_el,
119     type => 'element not allowed:atom|TextConstruct',
120     level => $self->{must_level});
121     } elsif ($element_state->{type} eq 'xhtml') {
122     if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
123     $child_ln eq 'div') { # MUST
124     if ($element_state->{has_div}) {
125     $self->{onerror}
126     ->(node => $child_el,
127     type => 'element not allowed:atom|TextConstruct',
128     level => $self->{must_level});
129     } else {
130     $element_state->{has_div} = 1;
131 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
132     }
133 wakaba 1.8 } else {
134     $self->{onerror}->(node => $child_el,
135     type => 'element not allowed:atom|TextConstruct',
136     level => $self->{must_level});
137 wakaba 1.1 }
138 wakaba 1.8 } else {
139     die "atom:TextConstruct type error: $element_state->{type}";
140 wakaba 1.1 }
141 wakaba 1.8 }
142     },
143     check_child_text => sub {
144     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
145     if ($element_state->{type} eq 'text') {
146     #
147     } elsif ($element_state->{type} eq 'html') {
148     $element_state->{value} .= $child_node->text_content;
149     ## NOTE: Markup MUST be escaped.
150     } elsif ($element_state->{type} eq 'xhtml') {
151     if ($has_significant) {
152     $self->{onerror}->(node => $child_node,
153     type => 'character not allowed:atom|TextConstruct',
154     level => $self->{must_level});
155 wakaba 1.1 }
156 wakaba 1.8 } else {
157     die "atom:TextConstruct type error: $element_state->{type}";
158 wakaba 1.1 }
159     },
160 wakaba 1.8 check_end => sub {
161     my ($self, $item, $element_state) = @_;
162 wakaba 1.13 if ($element_state->{type} eq 'xhtml') {
163     unless ($element_state->{has_div}) {
164     $self->{onerror}->(node => $item->{node},
165     type => 'element missing:div',
166     level => $self->{must_level});
167     }
168     } elsif ($element_state->{type} eq 'html') {
169     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
170     # markup MUST be escaped
171     $self->{onsubdoc}->({s => $element_state->{value},
172     container_node => $item->{node},
173     media_type => 'text/html',
174     inner_html_element => 'div',
175     is_char_string => 1});
176 wakaba 1.8 }
177 wakaba 1.1
178 wakaba 1.8 $AtomChecker{check_end}->(@_);
179     },
180     ); # %AtomTextConstruct
181 wakaba 1.1
182 wakaba 1.8 my %AtomPersonConstruct = (
183     %AtomChecker,
184     check_child_element => sub {
185     my ($self, $item, $child_el, $child_nsuri, $child_ln,
186     $child_is_transparent, $element_state) = @_;
187     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
188     $self->{onerror}->(node => $child_el,
189     type => 'element not allowed:minus',
190     level => $self->{must_level});
191     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
192     #
193     } elsif ($child_nsuri eq $ATOM_NS) {
194     if ($child_ln eq 'name') {
195     if ($element_state->{has_name}) {
196     $self->{onerror}
197     ->(node => $child_el,
198     type => 'element not allowed:atom|PersonConstruct',
199     level => $self->{must_level});
200     } else {
201     $element_state->{has_name} = 1;
202     }
203     } elsif ($child_ln eq 'uri') {
204     if ($element_state->{has_uri}) {
205     $self->{onerror}
206     ->(node => $child_el,
207     type => 'element not allowed:atom|PersonConstruct',
208     level => $self->{must_level});
209 wakaba 1.1 } else {
210 wakaba 1.8 $element_state->{has_uri} = 1;
211 wakaba 1.1 }
212 wakaba 1.8 } elsif ($child_ln eq 'email') {
213     if ($element_state->{has_email}) {
214     $self->{onerror}
215     ->(node => $child_el,
216     type => 'element not allowed:atom|PersonConstruct',
217     level => $self->{must_level});
218     } else {
219     $element_state->{has_email} = 1;
220 wakaba 1.7 }
221 wakaba 1.8 } else {
222     $self->{onerror}
223     ->(node => $child_el,
224     type => 'element not allowed:atom|PersonConstruct',
225     level => $self->{must_level});
226 wakaba 1.1 }
227 wakaba 1.8 } else {
228     $self->{onerror}
229     ->(node => $child_el,
230     type => 'element not allowed:atom|PersonConstruct',
231     level => $self->{must_level});
232 wakaba 1.1 }
233 wakaba 1.8 ## TODO: extension element
234     },
235     check_child_text => sub {
236     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
237     if ($has_significant) {
238     $self->{onerror}->(node => $child_node,
239     type => 'character not allowed:atom|PersonConstruct',
240     level => $self->{must_level});
241     }
242     },
243     check_end => sub {
244     my ($self, $item, $element_state) = @_;
245 wakaba 1.1
246 wakaba 1.8 unless ($element_state->{has_name}) {
247     $self->{onerror}->(node => $item->{node},
248     type => 'element missing:atom|name',
249     level => $self->{must_level});
250 wakaba 1.1 }
251    
252 wakaba 1.8 $AtomChecker{check_end}->(@_);
253 wakaba 1.1 },
254 wakaba 1.8 ); # %AtomPersonConstruct
255 wakaba 1.1
256 wakaba 1.2 our $Element;
257    
258 wakaba 1.8 $Element->{$ATOM_NS}->{''} = {
259     %AtomChecker,
260     status => 0,
261     };
262    
263 wakaba 1.2 $Element->{$ATOM_NS}->{name} = {
264 wakaba 1.8 %AtomChecker,
265    
266 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:name
267     ## element outside of Person construct is not defined.
268    
269 wakaba 1.8 ## NOTE: No constraint.
270 wakaba 1.2 };
271    
272     $Element->{$ATOM_NS}->{uri} = {
273 wakaba 1.8 %AtomChecker,
274    
275 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:uri
276     ## element outside of Person construct is not defined.
277 wakaba 1.8
278     ## NOTE: Elements are not explicitly disallowed.
279    
280     check_start => sub {
281     my ($self, $item, $element_state) = @_;
282     $element_state->{value} = '';
283     },
284     check_child_text => sub {
285     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
286     $element_state->{value} .= $child_node->data;
287     },
288     check_end => sub {
289     my ($self, $item, $element_state) = @_;
290 wakaba 1.2
291     ## NOTE: There MUST NOT be any white space.
292 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
293 wakaba 1.2 my %opt = @_;
294 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
295 wakaba 1.2 type => 'URI::'.$opt{type}.
296     (defined $opt{position} ? ':'.$opt{position} : ''));
297     });
298    
299 wakaba 1.8 $AtomChecker{check_end}->(@_);
300 wakaba 1.2 },
301     };
302    
303     $Element->{$ATOM_NS}->{email} = {
304 wakaba 1.8 %AtomChecker,
305    
306 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:email
307     ## element outside of Person construct is not defined.
308 wakaba 1.8
309     ## NOTE: Elements are not explicitly disallowed.
310    
311     check_end => sub {
312     my ($self, $item, $element_state) = @_;
313 wakaba 1.2
314     ## TODO: addr-spec
315 wakaba 1.8 $self->{onerror}->(node => $item->{node},
316     type => 'addr-spec not supported',
317     level => $self->{unsupported_level});
318 wakaba 1.2
319 wakaba 1.8 $AtomChecker{check_end}->(@_);
320 wakaba 1.2 },
321     };
322    
323 wakaba 1.1 ## MUST NOT be any white space
324 wakaba 1.8 my %AtomDateConstruct = (
325     %AtomChecker,
326    
327     ## NOTE: It does not explicitly say that there MUST NOT be any element.
328    
329     check_start => sub {
330     my ($self, $item, $element_state) = @_;
331     $element_state->{value} = '';
332     },
333     check_child_text => sub {
334     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
335     $element_state->{value} .= $child_node->data;
336     },
337     check_end => sub {
338     my ($self, $item, $element_state) = @_;
339 wakaba 1.1
340 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
341 wakaba 1.8 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
342 wakaba 1.4 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
343     = ($1, $2, $3, $4, $5, $6, $7, $8);
344 wakaba 1.8 my $node = $item->{node};
345 wakaba 1.4
346     ## Check additional constraints described or referenced in
347     ## comments of ABNF rules for |date-time|.
348     my $level = $self->{must_level};
349     if (0 < $M and $M < 13) {
350     $self->{onerror}->(node => $node, type => 'datetime:bad day',
351     level => $level)
352     if $d < 1 or
353     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
354     $self->{onerror}->(node => $node, type => 'datetime:bad day',
355     level => $level)
356     if $M == 2 and $d == 29 and
357     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
358     } else {
359     $self->{onerror}->(node => $node, type => 'datetime:bad month',
360     level => $level);
361     }
362     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
363     level => $level) if $h > 23;
364     $self->{onerror}->(node => $node, type => 'datetime:bad minute',
365     level => $level) if $m > 59;
366     $self->{onerror}->(node => $node, type => 'datetime:bad second',
367     level => $level)
368     if $s > 60; ## NOTE: Validness of leap seconds are not checked.
369     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
370     level => $level) if $zh > 23;
371     $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
372     level => $level) if $zm > 59;
373     } else {
374 wakaba 1.8 $self->{onerror}->(node => $item->{node},
375 wakaba 1.4 type => 'datetime:syntax error',
376     level => $self->{must_level});
377     }
378     ## NOTE: SHOULD be accurate as possible (cannot be checked)
379 wakaba 1.1
380 wakaba 1.8 $AtomChecker{check_end}->(@_);
381 wakaba 1.1 },
382 wakaba 1.8 ); # %AtomDateConstruct
383 wakaba 1.1
384 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
385 wakaba 1.8 %AtomChecker,
386 wakaba 1.1 is_root => 1,
387 wakaba 1.8 check_child_element => sub {
388     my ($self, $item, $child_el, $child_nsuri, $child_ln,
389     $child_is_transparent, $element_state) = @_;
390    
391     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
392    
393     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
394     $self->{onerror}->(node => $child_el,
395     type => 'element not allowed:minus',
396     level => $self->{must_level});
397     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
398     #
399     } elsif ($child_nsuri eq $ATOM_NS) {
400     my $not_allowed;
401     if ({ # MUST (0, 1)
402     content => 1,
403     id => 1,
404     published => 1,
405     rights => 1,
406     source => 1,
407     summary => 1,
408     title => 1,
409     updated => 1,
410     }->{$child_ln}) {
411     unless ($element_state->{has_element}->{$child_ln}) {
412     $element_state->{has_element}->{$child_ln} = 1;
413     $not_allowed = $element_state->{has_element}->{entry};
414     } else {
415     $not_allowed = 1;
416     }
417     } elsif ($child_ln eq 'link') { # MAY
418     if ($child_el->rel eq $LINK_REL . 'alternate') {
419     my $type = $child_el->get_attribute_ns (undef, 'type');
420     $type = '' unless defined $type;
421     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
422     $hreflang = '' unless defined $hreflang;
423     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
424     (defined $hreflang ? ':'.$hreflang : '');
425     unless ($element_state->{has_element}->{$key}) {
426     $element_state->{has_element}->{$key} = 1;
427     $element_state->{has_element}->{'link.alternate'} = 1;
428 wakaba 1.1 } else {
429     $not_allowed = 1;
430     }
431     }
432 wakaba 1.8
433     ## NOTE: MAY
434     $not_allowed ||= $element_state->{has_element}->{entry};
435     } elsif ({ # MAY
436     category => 1,
437     contributor => 1,
438     }->{$child_ln}) {
439     $not_allowed = $element_state->{has_element}->{entry};
440 wakaba 1.9 } elsif ($child_ln eq 'author') { # MAY
441     $not_allowed = $element_state->{has_element}->{entry};
442 wakaba 1.12 $element_state->{has_author} = 1; # ./author | ./source/author
443     $element_state->{has_element}->{$child_ln} = 1; # ./author
444 wakaba 1.8 } else {
445     $not_allowed = 1;
446     }
447     if ($not_allowed) {
448     $self->{onerror}->(node => $child_el, type => 'element not allowed');
449 wakaba 1.1 }
450 wakaba 1.15 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
451     ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
452     ## defined in RFC 4685.
453     #
454 wakaba 1.16 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
455     #
456 wakaba 1.8 } else {
457     ## TODO: extension element
458     $self->{onerror}->(node => $child_el, type => 'element not allowed');
459     }
460     },
461     check_child_text => sub {
462     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
463     if ($has_significant) {
464     $self->{onerror}->(node => $child_node, type => 'character not allowed',
465     level => $self->{must_level});
466 wakaba 1.1 }
467 wakaba 1.8 },
468     check_end => sub {
469     my ($self, $item, $element_state) = @_;
470 wakaba 1.1
471 wakaba 1.9 if ($element_state->{has_author}) {
472     ## NOTE: There is either a child atom:author element
473     ## or a child atom:source element which contains an atom:author
474     ## child element.
475     #
476     } else {
477     A: {
478     my $root = $item->{node}->owner_document->document_element;
479     if ($root and $root->manakai_local_name eq 'feed') {
480     my $nsuri = $root->namespace_uri;
481     if (defined $nsuri and $nsuri eq $ATOM_NS) {
482     ## NOTE: An Atom Feed Document.
483     for my $root_child (@{$root->child_nodes}) {
484     ## NOTE: Entity references are not supported.
485     next unless $root_child->node_type == 1; # ELEMENT_NODE
486     next unless $root_child->manakai_local_name eq 'author';
487     my $root_child_nsuri = $root_child->namespace_uri;
488     next unless defined $root_child_nsuri;
489     next unless $root_child_nsuri eq $ATOM_NS;
490     last A;
491     }
492     }
493     }
494    
495     $self->{onerror}->(node => $item->{node},
496     type => 'element missing:atom|author',
497     level => $self->{must_level});
498     } # A
499     }
500    
501 wakaba 1.12 unless ($element_state->{has_element}->{author}) {
502     $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
503     }
504    
505 wakaba 1.1 ## TODO: If entry's with same id, then updated SHOULD be different
506    
507 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
508     $self->{onerror}->(node => $item->{node},
509     type => 'element missing:atom|id');
510     }
511     unless ($element_state->{has_element}->{title}) { # MUST
512     $self->{onerror}->(node => $item->{node},
513     type => 'element missing:atom|title');
514     }
515     unless ($element_state->{has_element}->{updated}) { # MUST
516     $self->{onerror}->(node => $item->{node},
517     type => 'element missing:atom|updated');
518     }
519     if (not $element_state->{has_element}->{content} and
520     not $element_state->{has_element}->{'link.alternate'}) {
521     $self->{onerror}->(node => $item->{node},
522     type => 'element missing:atom|link|alternate');
523 wakaba 1.1 }
524 wakaba 1.14
525     if ($element_state->{require_summary} and
526     not $element_state->{has_element}->{summary}) {
527     $self->{onerror}->(node => $item->{node},
528     type => 'element missing:atom|summary',
529     level => $self->{must_level});
530     }
531 wakaba 1.1 },
532     };
533    
534     $Element->{$ATOM_NS}->{feed} = {
535 wakaba 1.8 %AtomChecker,
536 wakaba 1.1 is_root => 1,
537 wakaba 1.8 check_child_element => sub {
538     my ($self, $item, $child_el, $child_nsuri, $child_ln,
539     $child_is_transparent, $element_state) = @_;
540    
541     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
542    
543     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
544     $self->{onerror}->(node => $child_el,
545     type => 'element not allowed:minus',
546     level => $self->{must_level});
547     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
548     #
549     } elsif ($child_nsuri eq $ATOM_NS) {
550     my $not_allowed;
551     if ($child_ln eq 'entry') {
552     $element_state->{has_element}->{entry} = 1;
553     } elsif ({ # MUST (0, 1)
554     generator => 1,
555     icon => 1,
556     id => 1,
557     logo => 1,
558     rights => 1,
559     subtitle => 1,
560     title => 1,
561     updated => 1,
562     }->{$child_ln}) {
563     unless ($element_state->{has_element}->{$child_ln}) {
564     $element_state->{has_element}->{$child_ln} = 1;
565     $not_allowed = $element_state->{has_element}->{entry};
566     } else {
567     $not_allowed = 1;
568     }
569     } elsif ($child_ln eq 'link') {
570     my $rel = $child_el->rel;
571     if ($rel eq $LINK_REL . 'alternate') {
572     my $type = $child_el->get_attribute_ns (undef, 'type');
573     $type = '' unless defined $type;
574     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
575     $hreflang = '' unless defined $hreflang;
576     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
577     (defined $hreflang ? ':'.$hreflang : '');
578     unless ($element_state->{has_element}->{$key}) {
579     $element_state->{has_element}->{$key} = 1;
580 wakaba 1.1 } else {
581     $not_allowed = 1;
582     }
583 wakaba 1.8 } elsif ($rel eq $LINK_REL . 'self') {
584     $element_state->{has_element}->{'link.self'} = 1;
585 wakaba 1.1 }
586 wakaba 1.8
587     ## NOTE: MAY
588     $not_allowed = $element_state->{has_element}->{entry};
589     } elsif ({ # MAY
590     category => 1,
591     contributor => 1,
592     }->{$child_ln}) {
593     $not_allowed = $element_state->{has_element}->{entry};
594 wakaba 1.12 } elsif ($child_ln eq 'author') { # MAY
595     $not_allowed = $element_state->{has_element}->{entry};
596     $element_state->{has_element}->{author} = 1;
597 wakaba 1.8 } else {
598     $not_allowed = 1;
599 wakaba 1.1 }
600 wakaba 1.8 $self->{onerror}->(node => $child_el, type => 'element not allowed')
601     if $not_allowed;
602     } else {
603     ## TODO: extension element
604     $self->{onerror}->(node => $child_el, type => 'element not allowed');
605 wakaba 1.1 }
606 wakaba 1.8 },
607     check_child_text => sub {
608     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
609     if ($has_significant) {
610     $self->{onerror}->(node => $child_node, type => 'character not allowed',
611     level => $self->{must_level});
612     }
613     },
614     check_end => sub {
615     my ($self, $item, $element_state) = @_;
616 wakaba 1.10
617 wakaba 1.12 if ($element_state->{has_no_author_entry} and
618     not $element_state->{has_element}->{author}) {
619 wakaba 1.10 $self->{onerror}->(node => $item->{node},
620     type => 'element missing:atom|author',
621     level => $self->{must_level});
622 wakaba 1.11 ## ISSUE: If there is no |atom:entry| element,
623     ## there should be an |atom:author| element?
624 wakaba 1.10 }
625 wakaba 1.1
626     ## TODO: If entry's with same id, then updated SHOULD be different
627    
628 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
629     $self->{onerror}->(node => $item->{node},
630     type => 'element missing:atom|id');
631 wakaba 1.1 }
632 wakaba 1.8 unless ($element_state->{has_element}->{title}) { # MUST
633     $self->{onerror}->(node => $item->{node},
634     type => 'element missing:atom|title');
635 wakaba 1.1 }
636 wakaba 1.8 unless ($element_state->{has_element}->{updated}) { # MUST
637     $self->{onerror}->(node => $item->{node},
638     type => 'element missing:atom|updated');
639 wakaba 1.1 }
640 wakaba 1.8 unless ($element_state->{has_element}->{'link.self'}) {
641     $self->{onerror}->(node => $item->{node}, level => 's',
642     type => 'element missing:atom|link|self');
643 wakaba 1.3 }
644 wakaba 1.1
645 wakaba 1.8 $AtomChecker{check_end}->(@_);
646 wakaba 1.1 },
647     };
648    
649     $Element->{$ATOM_NS}->{content} = {
650 wakaba 1.8 %AtomChecker,
651     check_start => sub {
652     my ($self, $item, $element_state) = @_;
653     $element_state->{type} = 'text';
654 wakaba 1.13 $element_state->{value} = '';
655 wakaba 1.8 },
656     check_attrs => $GetAtomAttrsChecker->({
657     src => sub {
658     my ($self, $attr, $item, $element_state) = @_;
659    
660     $element_state->{has_src} = 1;
661 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
662 wakaba 1.8
663     ## NOTE: There MUST NOT be any white space.
664     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
665     my %opt = @_;
666     $self->{onerror}->(node => $item->{node}, level => $opt{level},
667     type => 'URI::'.$opt{type}.
668     (defined $opt{position} ? ':'.$opt{position} : ''));
669     });
670     },
671     type => sub {
672     my ($self, $attr, $item, $element_state) = @_;
673    
674     $element_state->{has_type} = 1;
675 wakaba 1.1
676 wakaba 1.8 my $value = $attr->value;
677 wakaba 1.1 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
678     # MUST
679     } else {
680 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
681     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
682     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
683     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
684     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
685     my @type = ($1, $2);
686     my $param = $3;
687     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
688     if (defined $2) {
689     push @type, $1 => $2;
690     } else {
691     my $n = $1;
692     my $v = $2;
693     $v =~ s/\\(.)/$1/gs;
694     push @type, $n => $v;
695     }
696     }
697     require Whatpm::IMTChecker;
698     Whatpm::IMTChecker->check_imt (sub {
699     my %opt = @_;
700     $self->{onerror}->(node => $attr, level => $opt{level},
701     type => 'IMT:'.$opt{type});
702     }, @type);
703     } else {
704 wakaba 1.8 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
705     level => $self->{must_level});
706 wakaba 1.3 }
707 wakaba 1.1 }
708    
709 wakaba 1.17 if ({text => 1, html => 1, xhtml => 1}->{$value}) {
710     #
711     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
712 wakaba 1.8 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
713     ## Is |application/xml-dtd| an XML media type?
714     $value = 'xml';
715     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
716     $value = 'mime_text';
717     } elsif ($value =~ m!^(?>message|multipart)/!i) {
718     $self->{onerror}->(node => $attr, type => 'IMT:composite',
719     level => $self->{must_level});
720 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
721     } else {
722     $item->{parent_state}->{require_summary} = 1;
723 wakaba 1.8 }
724 wakaba 1.1
725 wakaba 1.8 $element_state->{type} = $value;
726     },
727     }, {
728     src => FEATURE_RFC4287,
729     type => FEATURE_RFC4287,
730     }),
731     check_child_element => sub {
732     my ($self, $item, $child_el, $child_nsuri, $child_ln,
733     $child_is_transparent, $element_state) = @_;
734    
735     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
736     $self->{onerror}->(node => $child_el,
737     type => 'element not allowed:minus',
738     level => $self->{must_level});
739     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
740     #
741     } else {
742     if ($element_state->{type} eq 'text' or
743     $element_state->{type} eq 'html' or
744     $element_state->{type} eq 'mime_text') {
745     # MUST NOT
746     $self->{onerror}->(node => $child_el,
747     type => 'element not allowed:atom|content',
748     level => $self->{must_level});
749     } elsif ($element_state->{type} eq 'xhtml') {
750     if ($element_state->{has_div}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:atom|content',
753     level => $self->{must_level});
754     } else {
755     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
756     $element_state->{has_div} = 1;
757     }
758     } elsif ($element_state->{type} eq 'xml') {
759     ## MAY contain elements
760     if ($element_state->{has_src}) {
761     $self->{onerror}->(node => $child_el,
762     type => 'element not allowed:atom|content',
763     level => $self->{must_level});
764 wakaba 1.1 }
765 wakaba 1.8 } else {
766     ## NOTE: Elements are not explicitly disallowed.
767 wakaba 1.1 }
768 wakaba 1.8 }
769     },
770     ## NOTE: If @src, the element MUST be empty. What is "empty"?
771     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
772     ## empty replacement tree shuld be empty, since Atom is defined
773     ## in terms of XML Information Set where entities are expanded.
774     ## (but what if |&e;| is an unexpanded entity?)
775     check_child_text => sub {
776     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
777     if ($has_significant) {
778     if ($element_state->{has_src}) {
779     $self->{onerror}->(node => $child_node,
780     type => 'character not allowed',
781     level => $self->{must_level});
782     } elsif ($element_state->{type} eq 'xhtml' or
783     $element_state->{type} eq 'xml') {
784     $self->{onerror}->(node => $child_node,
785     type => 'character not allowed:atom|content',
786     level => $self->{must_level});
787 wakaba 1.1 }
788 wakaba 1.8 }
789 wakaba 1.1
790 wakaba 1.13 $element_state->{value} .= $child_node->data;
791 wakaba 1.1
792 wakaba 1.8 ## NOTE: type=text/* has no further restriction (i.e. the content don't
793     ## have to conform to the definition of the type).
794     },
795     check_end => sub {
796     my ($self, $item, $element_state) = @_;
797 wakaba 1.1
798 wakaba 1.8 if ($element_state->{has_src}) {
799     if (not $element_state->{has_type}) {
800     $self->{onerror}->(node => $item->{node},
801     type => 'attribute missing:type',
802     level => $self->{should_level});
803 wakaba 1.14 } elsif ($element_state->{type} eq 'text' or
804     $element_state->{type} eq 'html' or
805     $element_state->{type} eq 'xhtml') {
806 wakaba 1.8 $self->{onerror}
807     ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
808     type => 'not IMT', level => $self->{must_level});
809 wakaba 1.1 }
810 wakaba 1.8 }
811 wakaba 1.1
812 wakaba 1.8 if ($element_state->{type} eq 'xhtml') {
813     unless ($element_state->{has_div}) {
814     $self->{onerror}->(node => $item->{node},
815     type => 'element missing:div',
816     level => $self->{must_level});
817 wakaba 1.1 }
818 wakaba 1.11 } elsif ($element_state->{type} eq 'html') {
819     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
820     # markup MUST be escaped
821 wakaba 1.13 $self->{onsubdoc}->({s => $element_state->{value},
822 wakaba 1.11 container_node => $item->{node},
823     media_type => 'text/html',
824     inner_html_element => 'div',
825     is_char_string => 1});
826 wakaba 1.8 } elsif ($element_state->{type} eq 'xml') {
827 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
828 wakaba 1.1 ## If no @src, this would normally mean it contains a
829     ## single child element that would serve as the root element.
830 wakaba 1.8 $self->{onerror}->(node => $item->{node},
831     level => $self->{unsupported_level},
832     type => 'atom|content not supported',
833     value => $item->{node}->get_attribute_ns
834     (undef, 'type'));
835     } elsif ($element_state->{type} eq 'text' or
836     $element_state->{type} eq 'mime-text') {
837     #
838 wakaba 1.1 } else {
839     ## TODO: $s = valid Base64ed [RFC 3548] where
840     ## MAY leading and following "white space" (what?)
841     ## and lines separated by a single U+000A
842 wakaba 1.3
843     ## NOTE: SHOULD be suitable for the indicated media type.
844 wakaba 1.8 $self->{onerror}->(node => $item->{node},
845     level => $self->{unsupported_level},
846     type => 'atom|content not supported',
847     value => $item->{node}->get_attribute_ns
848     (undef, 'type'));
849     }
850 wakaba 1.1
851 wakaba 1.8 $AtomChecker{check_end}->(@_);
852 wakaba 1.1 },
853     };
854 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
855 wakaba 1.1
856 wakaba 1.8 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
857 wakaba 1.1
858     $Element->{$ATOM_NS}->{category} = {
859 wakaba 1.8 %AtomChecker,
860     check_attrs => $GetAtomAttrsChecker->({
861 wakaba 1.1 label => sub { 1 }, # no value constraint
862 wakaba 1.2 scheme => sub { # NOTE: No MUST.
863     my ($self, $attr) = @_;
864     ## NOTE: There MUST NOT be any white space.
865     Whatpm::URIChecker->check_iri ($attr->value, sub {
866     my %opt = @_;
867     $self->{onerror}->(node => $attr, level => $opt{level},
868     type => 'URI::'.$opt{type}.
869     (defined $opt{position} ? ':'.$opt{position} : ''));
870     });
871     },
872 wakaba 1.8 term => sub {
873     my ($self, $attr, $item, $element_state) = @_;
874    
875     ## NOTE: No value constraint.
876    
877     $element_state->{has_term} = 1;
878     },
879     }, {
880     label => FEATURE_RFC4287,
881     scheme => FEATURE_RFC4287,
882     term => FEATURE_RFC4287,
883 wakaba 1.1 }),
884 wakaba 1.8 check_end => sub {
885     my ($self, $item, $element_state) = @_;
886     unless ($element_state->{has_term}) {
887     $self->{onerror}->(node => $item->{node},
888 wakaba 1.1 type => 'attribute missing:term');
889     }
890    
891 wakaba 1.8 $AtomChecker{check_end}->(@_);
892 wakaba 1.1 },
893 wakaba 1.8 ## NOTE: Meaning of content is not defined.
894 wakaba 1.1 };
895    
896 wakaba 1.8 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
897 wakaba 1.6
898     ## TODO: Anything below does not support <html:nest/> yet.
899 wakaba 1.1
900     $Element->{$ATOM_NS}->{generator} = {
901 wakaba 1.8 %AtomChecker,
902     check_attrs => $GetAtomAttrsChecker->({
903 wakaba 1.2 uri => sub { # MUST
904     my ($self, $attr) = @_;
905     ## NOTE: There MUST NOT be any white space.
906     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
907     my %opt = @_;
908     $self->{onerror}->(node => $attr, level => $opt{level},
909     type => 'URI::'.$opt{type}.
910     (defined $opt{position} ? ':'.$opt{position} : ''));
911     });
912     ## NOTE: Dereferencing SHOULD produce a representation
913     ## that is relevant to the agent.
914     },
915 wakaba 1.1 version => sub { 1 }, # no value constraint
916 wakaba 1.8 }, {
917     uri => FEATURE_RFC4287,
918     version => FEATURE_RFC4287,
919 wakaba 1.1 }),
920    
921 wakaba 1.8 ## NOTE: Elements are not explicitly disallowed.
922 wakaba 1.1
923 wakaba 1.8 ## NOTE: Content MUST be a string that is a human-readable name for
924     ## the generating agent.
925 wakaba 1.1 };
926    
927     $Element->{$ATOM_NS}->{icon} = {
928 wakaba 1.8 %AtomChecker,
929     check_start => sub {
930     my ($self, $item, $element_state) = @_;
931     $element_state->{value} = '';
932     },
933     ## NOTE: Elements are not explicitly disallowed.
934     check_child_text => sub {
935     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
936     $element_state->{value} .= $child_node->data;
937     },
938     check_end => sub {
939     my ($self, $item, $element_state) = @_;
940 wakaba 1.1
941 wakaba 1.2 ## NOTE: No MUST.
942     ## NOTE: There MUST NOT be any white space.
943 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
944 wakaba 1.2 my %opt = @_;
945 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
946 wakaba 1.2 type => 'URI::'.$opt{type}.
947     (defined $opt{position} ? ':'.$opt{position} : ''));
948     });
949    
950 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
951    
952 wakaba 1.8 $AtomChecker{check_end}->(@_);
953 wakaba 1.1 },
954     };
955    
956     $Element->{$ATOM_NS}->{id} = {
957 wakaba 1.8 %AtomChecker,
958     check_start => sub {
959     my ($self, $item, $element_state) = @_;
960     $element_state->{value} = '';
961     },
962     ## NOTE: Elements are not explicitly disallowed.
963     check_child_text => sub {
964     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
965     $element_state->{value} .= $child_node->data;
966     },
967     check_end => sub {
968     my ($self, $item, $element_state) = @_;
969 wakaba 1.2
970     ## NOTE: There MUST NOT be any white space.
971 wakaba 1.8 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
972 wakaba 1.2 my %opt = @_;
973 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
974 wakaba 1.2 type => 'URI::'.$opt{type}.
975     (defined $opt{position} ? ':'.$opt{position} : ''));
976     });
977     ## TODO: SHOULD be normalized
978    
979 wakaba 1.8 $AtomChecker{check_end}->(@_);
980 wakaba 1.2 },
981     };
982    
983 wakaba 1.15 my $AtomIMTAttrChecker = sub {
984 wakaba 1.3 my ($self, $attr) = @_;
985     my $value = $attr->value;
986     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
987     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
988     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
989     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
990     my @type = ($1, $2);
991     my $param = $3;
992     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
993     if (defined $2) {
994     push @type, $1 => $2;
995     } else {
996     my $n = $1;
997     my $v = $2;
998     $v =~ s/\\(.)/$1/gs;
999     push @type, $n => $v;
1000     }
1001     }
1002     require Whatpm::IMTChecker;
1003     Whatpm::IMTChecker->check_imt (sub {
1004     my %opt = @_;
1005     $self->{onerror}->(node => $attr, level => $opt{level},
1006     type => 'IMT:'.$opt{type});
1007     }, @type);
1008     } else {
1009     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1010     }
1011 wakaba 1.15 }; # $AtomIMTAttrChecker
1012    
1013     my $AtomIRIReferenceAttrChecker = sub {
1014     my ($self, $attr) = @_;
1015     ## NOTE: There MUST NOT be any white space.
1016     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1017     my %opt = @_;
1018     $self->{onerror}->(node => $attr, level => $opt{level},
1019     type => 'URI::'.$opt{type}.
1020     (defined $opt{position} ? ':'.$opt{position} : ''));
1021     });
1022     }; # $AtomIRIReferenceAttrChecker
1023    
1024     $Element->{$ATOM_NS}->{link} = {
1025     %AtomChecker,
1026     check_attrs => $GetAtomAttrsChecker->({
1027     href => $AtomIRIReferenceAttrChecker,
1028     hreflang => $AtomLanguageTagAttrChecker,
1029     length => sub { }, # No MUST; in octets.
1030     rel => sub { # MUST
1031     my ($self, $attr) = @_;
1032     my $value = $attr->value;
1033     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1034     $value = $LINK_REL . $value;
1035     }
1036    
1037     ## NOTE: There MUST NOT be any white space.
1038     Whatpm::URIChecker->check_iri ($value, sub {
1039     my %opt = @_;
1040     $self->{onerror}->(node => $attr, level => $opt{level},
1041     type => 'URI::'.$opt{type}.
1042     (defined $opt{position} ? ':'.$opt{position} : ''));
1043     });
1044    
1045     ## TODO: Warn if unregistered
1046 wakaba 1.16
1047     ## TODO: rel=license [RFC 4946]
1048     ## MUST NOT multiple rel=license with same href="",type="" pairs
1049     ## href="" SHOULD be dereferencable
1050     ## title="" SHOULD be there if multiple rel=license
1051     ## MUST NOT "unspecified" and other rel=license
1052 wakaba 1.3 },
1053 wakaba 1.15 title => sub { }, # No MUST
1054     type => $AtomIMTAttrChecker,
1055     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1056 wakaba 1.8 }, {
1057     href => FEATURE_RFC4287,
1058     hreflang => FEATURE_RFC4287,
1059     length => FEATURE_RFC4287,
1060     rel => FEATURE_RFC4287,
1061     title => FEATURE_RFC4287,
1062     type => FEATURE_RFC4287,
1063 wakaba 1.16
1064     ## TODO: thr:count
1065     ## TODO: thr:updated
1066 wakaba 1.2 }),
1067 wakaba 1.8 check_start => sub {
1068     my ($self, $item, $element_state) = @_;
1069 wakaba 1.2
1070 wakaba 1.8 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1071     $self->{onerror}->(node => $item->{node},
1072 wakaba 1.2 type => 'attribute missing:href');
1073     }
1074    
1075 wakaba 1.8 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1076     not $item->{node}->has_attribute_ns (undef, 'length')) {
1077     $self->{onerror}->(node => $item->{node}, level => 's',
1078 wakaba 1.2 type => 'attribute missing:length');
1079     }
1080     },
1081     };
1082    
1083     $Element->{$ATOM_NS}->{logo} = {
1084 wakaba 1.8 %AtomChecker,
1085     ## NOTE: Child elements are not explicitly disallowed
1086     check_start => sub {
1087     my ($self, $item, $element_state) = @_;
1088     $element_state->{value} = '';
1089     },
1090     ## NOTE: Elements are not explicitly disallowed.
1091     check_child_text => sub {
1092     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1093     $element_state->{value} .= $child_node->data;
1094     },
1095     check_end => sub {
1096     my ($self, $item, $element_state) = @_;
1097 wakaba 1.1
1098 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1099 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1100 wakaba 1.2 my %opt = @_;
1101 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
1102 wakaba 1.2 type => 'URI::'.$opt{type}.
1103     (defined $opt{position} ? ':'.$opt{position} : ''));
1104     });
1105    
1106     ## NOTE: Image SHOULD be 2:1
1107    
1108 wakaba 1.8 $AtomChecker{check_end}->(@_);
1109 wakaba 1.2 },
1110     };
1111    
1112 wakaba 1.8 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1113 wakaba 1.2
1114 wakaba 1.17 $Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct;
1115 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1116 wakaba 1.2
1117     $Element->{$ATOM_NS}->{source} = {
1118 wakaba 1.8 %AtomChecker,
1119     check_child_element => sub {
1120     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1121     $child_is_transparent, $element_state) = @_;
1122    
1123     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1124     $self->{onerror}->(node => $child_el,
1125     type => 'element not allowed:minus',
1126     level => $self->{must_level});
1127     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1128     #
1129     } elsif ($child_nsuri eq $ATOM_NS) {
1130     my $not_allowed;
1131     if ($child_ln eq 'entry') {
1132     $element_state->{has_element}->{entry} = 1;
1133     } elsif ({
1134     generator => 1,
1135     icon => 1,
1136     id => 1,
1137     logo => 1,
1138     rights => 1,
1139     subtitle => 1,
1140     title => 1,
1141     updated => 1,
1142     }->{$child_ln}) {
1143     unless ($element_state->{has_element}->{$child_ln}) {
1144     $element_state->{has_element}->{$child_ln} = 1;
1145     $not_allowed = $element_state->{has_element}->{entry};
1146     } else {
1147     $not_allowed = 1;
1148     }
1149     } elsif ($child_ln eq 'link') {
1150 wakaba 1.17 if ($child_el->rel eq $LINK_REL . 'alternate') {
1151     my $type = $child_el->get_attribute_ns (undef, 'type');
1152 wakaba 1.8 $type = '' unless defined $type;
1153 wakaba 1.17 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
1154 wakaba 1.8 $hreflang = '' unless defined $hreflang;
1155     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1156     (defined $hreflang ? ':'.$hreflang : '');
1157     unless ($element_state->{has_element}->{$key}) {
1158     $element_state->{has_element}->{$key} = 1;
1159 wakaba 1.2 } else {
1160     $not_allowed = 1;
1161     }
1162     }
1163 wakaba 1.8 $not_allowed ||= $element_state->{has_element}->{entry};
1164     } elsif ({
1165     category => 1,
1166     contributor => 1,
1167     }->{$child_ln}) {
1168     $not_allowed = $element_state->{has_element}->{entry};
1169 wakaba 1.9 } elsif ($child_ln eq 'author') {
1170     $not_allowed = $element_state->{has_element}->{entry};
1171     $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1172 wakaba 1.8 } else {
1173     $not_allowed = 1;
1174     }
1175     if ($not_allowed) {
1176     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1177 wakaba 1.2 }
1178 wakaba 1.8 } else {
1179     ## TODO: extension element
1180     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1181     }
1182     },
1183     check_child_text => sub {
1184     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1185     if ($has_significant) {
1186     $self->{onerror}->(node => $child_node, type => 'character not allowed',
1187     level => $self->{must_level});
1188 wakaba 1.2 }
1189 wakaba 1.1 },
1190     };
1191 wakaba 1.2
1192 wakaba 1.8 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1193 wakaba 1.2
1194 wakaba 1.8 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1195 wakaba 1.2
1196 wakaba 1.8 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1197 wakaba 1.2
1198 wakaba 1.8 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1199 wakaba 1.2
1200     ## TODO: signature element
1201    
1202     ## TODO: simple extension element and structured extension element
1203 wakaba 1.1
1204 wakaba 1.15 ## -- Atom Threading 1.0 [RFC 4685]
1205    
1206     $Element->{$THR_NS}->{''} = {
1207     %AtomChecker,
1208     status => 0,
1209     };
1210    
1211     ## ISSUE: Strictly speaking, thr:* element/attribute,
1212     ## where * is an undefined local name, is not disallowed.
1213    
1214     $Element->{$THR_NS}->{'in-reply-to'} = {
1215     %AtomChecker,
1216     status => FEATURE_RFC4685,
1217     check_attrs => $GetAtomAttrsChecker->({
1218     href => $AtomIRIReferenceAttrChecker,
1219     ## TODO: fact-level.
1220     ## TODO: MUST be dereferencable.
1221     ref => sub {
1222     my ($self, $attr, $item, $element_state) = @_;
1223     $element_state->{has_ref} = 1;
1224    
1225     ## NOTE: Same as |atom:id|.
1226     ## NOTE: There MUST NOT be any white space.
1227     Whatpm::URIChecker->check_iri ($attr->value, sub {
1228     my %opt = @_;
1229     $self->{onerror}->(node => $attr, level => $opt{level},
1230     type => 'URI::'.$opt{type}.
1231     (defined $opt{position} ? ':'.$opt{position} : ''));
1232     });
1233    
1234     ## TODO: Check against ID guideline...
1235     },
1236     source => $AtomIRIReferenceAttrChecker,
1237     ## TODO: fact-level.
1238     ## TODO: MUST be dereferencable.
1239     type => $AtomIMTAttrChecker,
1240     ## TODO: fact-level.
1241     }, {
1242     href => FEATURE_RFC4685,
1243     source => FEATURE_RFC4685,
1244     ref => FEATURE_RFC4685,
1245     type => FEATURE_RFC4685,
1246     }),
1247     check_end => sub {
1248     my ($self, $item, $element_state) = @_;
1249    
1250     unless ($element_state->{has_ref}) {
1251     $self->{onerror}->(node => $item->{node},
1252     type => 'attribute missing:ref',
1253     level => $self->{must_level});
1254     }
1255    
1256     $AtomChecker{check_end}->(@_);
1257     },
1258     ## NOTE: Content model has no constraint.
1259     };
1260 wakaba 1.16
1261     $Element->{$THR_NS}->{total} = {
1262     %AtomChecker,
1263     check_start => sub {
1264     my ($self, $item, $element_state) = @_;
1265     $element_state->{value} = '';
1266     },
1267     check_child_element => sub {
1268     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1269     $child_is_transparent, $element_state) = @_;
1270    
1271     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1272     $self->{onerror}->(node => $child_el,
1273     type => 'element not allowed:minus',
1274     level => $self->{must_level});
1275     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1276     #
1277     } else {
1278     $self->{onerror}->(node => $child_el,
1279     type => 'element not allowed',
1280     level => $self->{must_level});
1281     }
1282     },
1283     check_child_text => sub {
1284     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1285     $element_state->{value} .= $child_node->data;
1286     },
1287     check_end => sub {
1288     my ($self, $item, $element_state) = @_;
1289    
1290     ## NOTE: xsd:nonNegativeInteger
1291     unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1292     $self->{onerror}->(node => $item->{node},
1293     type => 'syntax error', ## TODO:
1294     level => $self->{must_level});
1295     }
1296    
1297     $AtomChecker{check_end}->(@_);
1298     },
1299     };
1300    
1301     ## TODO: fh:complete
1302    
1303     ## TODO: fh:archive
1304    
1305     ## TODO: Check as archive document, page feed document, ...
1306    
1307     ## TODO: APP [RFC 5023]
1308 wakaba 1.15
1309 wakaba 1.1 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1310 wakaba 1.15 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1311 wakaba 1.1
1312     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24