/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (hide annotations) (download)
Fri Aug 29 13:34:36 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.19: +6 -2 lines
++ whatpm/Whatpm/ChangeLog	29 Aug 2008 13:33:31 -0000
2008-08-29  Wakaba  <wakaba@suika.fam.cx>

	* IMTChecker.pm: Updated for the new error reporting architecture.

	* ContentChecker.pm: Error levels for IMTs are added.

++ whatpm/Whatpm/ContentChecker/ChangeLog	29 Aug 2008 13:34:24 -0000
2008-08-29  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm, HTML.pm: Made {level} inherited to the IMT checker.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.15 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 wakaba 1.16 my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11 wakaba 1.1
12 wakaba 1.8 sub FEATURE_RFC4287 () {
13     Whatpm::ContentChecker::FEATURE_STATUS_CR |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16    
17 wakaba 1.15 sub FEATURE_RFC4685 () {
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21    
22 wakaba 1.1 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23    
24     ## NOTE: Commants and PIs are not explicitly allowed.
25    
26     our $AttrChecker;
27    
28     ## Any element MAY have xml:base, xml:lang
29     my $GetAtomAttrsChecker = sub {
30     my $element_specific_checker = shift;
31 wakaba 1.8 my $element_specific_status = shift;
32 wakaba 1.1 return sub {
33 wakaba 1.8 my ($self, $todo, $element_state) = @_;
34 wakaba 1.1 for my $attr (@{$todo->{node}->attributes}) {
35     my $attr_ns = $attr->namespace_uri;
36     $attr_ns = '' unless defined $attr_ns;
37     my $attr_ln = $attr->manakai_local_name;
38     my $checker;
39     if ($attr_ns eq '') {
40     $checker = $element_specific_checker->{$attr_ln};
41     } else {
42     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
43     || $AttrChecker->{$attr_ns}->{''};
44     }
45     if ($checker) {
46 wakaba 1.8 $checker->($self, $attr, $todo, $element_state);
47     } elsif ($attr_ln eq '') {
48     #
49 wakaba 1.1 } else {
50 wakaba 1.18 $self->{onerror}->(node => $attr,
51     type => 'unknown attribute',
52     level => $self->{level}->{uncertain});
53 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
54     }
55 wakaba 1.8
56     if ($attr_ns eq '') {
57     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
58     }
59     ## TODO: global attribute
60 wakaba 1.1 }
61     };
62     }; # $GetAtomAttrsChecker
63    
64 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
65     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
66    
67     my ($self, $attr) = @_;
68     my $value = $attr->value;
69     require Whatpm::LangTag;
70     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
71 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
72 wakaba 1.19 }, $self->{level});
73 wakaba 1.4 ## ISSUE: RFC 4646 (3066bis)?
74     }; # $AtomLanguageTagAttrChecker
75    
76 wakaba 1.8 my %AtomChecker = (
77     %Whatpm::ContentChecker::AnyChecker,
78     status => FEATURE_RFC4287,
79     check_attrs => $GetAtomAttrsChecker->({}, {}),
80     );
81    
82     my %AtomTextConstruct = (
83     %AtomChecker,
84     check_start => sub {
85     my ($self, $item, $element_state) = @_;
86     $element_state->{type} = 'text';
87     $element_state->{value} = '';
88     },
89     check_attrs => $GetAtomAttrsChecker->({
90     type => sub {
91     my ($self, $attr, $item, $element_state) = @_;
92     my $value = $attr->value;
93     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
94     $element_state->{type} = $value;
95 wakaba 1.1 } else {
96 wakaba 1.8 ## NOTE: IMT MUST NOT be used here.
97 wakaba 1.18 $self->{onerror}->(node => $attr,
98     type => 'invalid attribute value',
99     level => $self->{level}->{must});
100 wakaba 1.1 }
101 wakaba 1.8 }, # checked in |checker|
102     }, {
103     type => FEATURE_RFC4287,
104     }),
105     check_child_element => sub {
106     my ($self, $item, $child_el, $child_nsuri, $child_ln,
107     $child_is_transparent, $element_state) = @_;
108     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
109     $self->{onerror}->(node => $child_el,
110     type => 'element not allowed:minus',
111 wakaba 1.18 level => $self->{level}->{must});
112 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
113     #
114     } else {
115     if ($element_state->{type} eq 'text' or
116     $element_state->{type} eq 'html') { # MUST NOT
117     $self->{onerror}->(node => $child_el,
118     type => 'element not allowed:atom|TextConstruct',
119 wakaba 1.18 level => $self->{level}->{must});
120 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
121     if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
122     $child_ln eq 'div') { # MUST
123     if ($element_state->{has_div}) {
124     $self->{onerror}
125     ->(node => $child_el,
126     type => 'element not allowed:atom|TextConstruct',
127 wakaba 1.18 level => $self->{level}->{must});
128 wakaba 1.8 } else {
129     $element_state->{has_div} = 1;
130 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
131     }
132 wakaba 1.8 } else {
133     $self->{onerror}->(node => $child_el,
134     type => 'element not allowed:atom|TextConstruct',
135 wakaba 1.18 level => $self->{level}->{must});
136 wakaba 1.1 }
137 wakaba 1.8 } else {
138     die "atom:TextConstruct type error: $element_state->{type}";
139 wakaba 1.1 }
140 wakaba 1.8 }
141     },
142     check_child_text => sub {
143     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
144     if ($element_state->{type} eq 'text') {
145     #
146     } elsif ($element_state->{type} eq 'html') {
147     $element_state->{value} .= $child_node->text_content;
148     ## NOTE: Markup MUST be escaped.
149     } elsif ($element_state->{type} eq 'xhtml') {
150     if ($has_significant) {
151     $self->{onerror}->(node => $child_node,
152     type => 'character not allowed:atom|TextConstruct',
153 wakaba 1.18 level => $self->{level}->{must});
154 wakaba 1.1 }
155 wakaba 1.8 } else {
156     die "atom:TextConstruct type error: $element_state->{type}";
157 wakaba 1.1 }
158     },
159 wakaba 1.8 check_end => sub {
160     my ($self, $item, $element_state) = @_;
161 wakaba 1.13 if ($element_state->{type} eq 'xhtml') {
162     unless ($element_state->{has_div}) {
163     $self->{onerror}->(node => $item->{node},
164 wakaba 1.18 type => 'child element missing',
165     text => 'div',
166     level => $self->{level}->{must});
167 wakaba 1.13 }
168     } elsif ($element_state->{type} eq 'html') {
169     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
170     # markup MUST be escaped
171     $self->{onsubdoc}->({s => $element_state->{value},
172     container_node => $item->{node},
173     media_type => 'text/html',
174     inner_html_element => 'div',
175     is_char_string => 1});
176 wakaba 1.8 }
177 wakaba 1.1
178 wakaba 1.8 $AtomChecker{check_end}->(@_);
179     },
180     ); # %AtomTextConstruct
181 wakaba 1.1
182 wakaba 1.8 my %AtomPersonConstruct = (
183     %AtomChecker,
184     check_child_element => sub {
185     my ($self, $item, $child_el, $child_nsuri, $child_ln,
186     $child_is_transparent, $element_state) = @_;
187     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
188     $self->{onerror}->(node => $child_el,
189     type => 'element not allowed:minus',
190 wakaba 1.18 level => $self->{level}->{must});
191 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
192     #
193     } elsif ($child_nsuri eq $ATOM_NS) {
194     if ($child_ln eq 'name') {
195     if ($element_state->{has_name}) {
196     $self->{onerror}
197     ->(node => $child_el,
198     type => 'element not allowed:atom|PersonConstruct',
199 wakaba 1.18 level => $self->{level}->{must});
200 wakaba 1.8 } else {
201     $element_state->{has_name} = 1;
202     }
203     } elsif ($child_ln eq 'uri') {
204     if ($element_state->{has_uri}) {
205     $self->{onerror}
206     ->(node => $child_el,
207     type => 'element not allowed:atom|PersonConstruct',
208 wakaba 1.18 level => $self->{level}->{must});
209 wakaba 1.1 } else {
210 wakaba 1.8 $element_state->{has_uri} = 1;
211 wakaba 1.1 }
212 wakaba 1.8 } elsif ($child_ln eq 'email') {
213     if ($element_state->{has_email}) {
214     $self->{onerror}
215     ->(node => $child_el,
216     type => 'element not allowed:atom|PersonConstruct',
217 wakaba 1.18 level => $self->{level}->{must});
218 wakaba 1.8 } else {
219     $element_state->{has_email} = 1;
220 wakaba 1.7 }
221 wakaba 1.8 } else {
222     $self->{onerror}
223     ->(node => $child_el,
224     type => 'element not allowed:atom|PersonConstruct',
225 wakaba 1.18 level => $self->{level}->{must});
226 wakaba 1.1 }
227 wakaba 1.8 } else {
228     $self->{onerror}
229     ->(node => $child_el,
230     type => 'element not allowed:atom|PersonConstruct',
231 wakaba 1.18 level => $self->{level}->{must});
232 wakaba 1.1 }
233 wakaba 1.8 ## TODO: extension element
234     },
235     check_child_text => sub {
236     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
237     if ($has_significant) {
238     $self->{onerror}->(node => $child_node,
239     type => 'character not allowed:atom|PersonConstruct',
240 wakaba 1.18 level => $self->{level}->{must});
241 wakaba 1.8 }
242     },
243     check_end => sub {
244     my ($self, $item, $element_state) = @_;
245 wakaba 1.1
246 wakaba 1.8 unless ($element_state->{has_name}) {
247     $self->{onerror}->(node => $item->{node},
248 wakaba 1.18 type => 'child element missing:atom',
249     text => 'name',
250     level => $self->{level}->{must});
251 wakaba 1.1 }
252    
253 wakaba 1.8 $AtomChecker{check_end}->(@_);
254 wakaba 1.1 },
255 wakaba 1.8 ); # %AtomPersonConstruct
256 wakaba 1.1
257 wakaba 1.2 our $Element;
258    
259 wakaba 1.8 $Element->{$ATOM_NS}->{''} = {
260     %AtomChecker,
261     status => 0,
262     };
263    
264 wakaba 1.2 $Element->{$ATOM_NS}->{name} = {
265 wakaba 1.8 %AtomChecker,
266    
267 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:name
268     ## element outside of Person construct is not defined.
269    
270 wakaba 1.8 ## NOTE: No constraint.
271 wakaba 1.2 };
272    
273     $Element->{$ATOM_NS}->{uri} = {
274 wakaba 1.8 %AtomChecker,
275    
276 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:uri
277     ## element outside of Person construct is not defined.
278 wakaba 1.8
279     ## NOTE: Elements are not explicitly disallowed.
280    
281     check_start => sub {
282     my ($self, $item, $element_state) = @_;
283     $element_state->{value} = '';
284     },
285     check_child_text => sub {
286     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
287     $element_state->{value} .= $child_node->data;
288     },
289     check_end => sub {
290     my ($self, $item, $element_state) = @_;
291 wakaba 1.2
292     ## NOTE: There MUST NOT be any white space.
293 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
294 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
295 wakaba 1.19 }, $self->{level});
296 wakaba 1.2
297 wakaba 1.8 $AtomChecker{check_end}->(@_);
298 wakaba 1.2 },
299     };
300    
301     $Element->{$ATOM_NS}->{email} = {
302 wakaba 1.8 %AtomChecker,
303    
304 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:email
305     ## element outside of Person construct is not defined.
306 wakaba 1.8
307     ## NOTE: Elements are not explicitly disallowed.
308    
309     check_end => sub {
310     my ($self, $item, $element_state) = @_;
311 wakaba 1.2
312     ## TODO: addr-spec
313 wakaba 1.8 $self->{onerror}->(node => $item->{node},
314     type => 'addr-spec not supported',
315 wakaba 1.18 level => $self->{level}->{uncertain});
316 wakaba 1.2
317 wakaba 1.8 $AtomChecker{check_end}->(@_);
318 wakaba 1.2 },
319     };
320    
321 wakaba 1.1 ## MUST NOT be any white space
322 wakaba 1.8 my %AtomDateConstruct = (
323     %AtomChecker,
324    
325     ## NOTE: It does not explicitly say that there MUST NOT be any element.
326    
327     check_start => sub {
328     my ($self, $item, $element_state) = @_;
329     $element_state->{value} = '';
330     },
331     check_child_text => sub {
332     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
333     $element_state->{value} .= $child_node->data;
334     },
335     check_end => sub {
336     my ($self, $item, $element_state) = @_;
337 wakaba 1.1
338 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
339 wakaba 1.8 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
340 wakaba 1.4 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
341     = ($1, $2, $3, $4, $5, $6, $7, $8);
342 wakaba 1.8 my $node = $item->{node};
343 wakaba 1.4
344     ## Check additional constraints described or referenced in
345     ## comments of ABNF rules for |date-time|.
346     if (0 < $M and $M < 13) {
347     $self->{onerror}->(node => $node, type => 'datetime:bad day',
348 wakaba 1.18 level => $self->{level}->{must})
349 wakaba 1.4 if $d < 1 or
350     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
351     $self->{onerror}->(node => $node, type => 'datetime:bad day',
352 wakaba 1.18 level => $self->{level}->{must})
353 wakaba 1.4 if $M == 2 and $d == 29 and
354     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
355     } else {
356     $self->{onerror}->(node => $node, type => 'datetime:bad month',
357 wakaba 1.18 level => $self->{level}->{must});
358 wakaba 1.4 }
359     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
360 wakaba 1.18 level => $self->{level}->{must}) if $h > 23;
361 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad minute',
362 wakaba 1.18 level => $self->{level}->{must}) if $m > 59;
363 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad second',
364 wakaba 1.18 level => $self->{level}->{must})
365 wakaba 1.4 if $s > 60; ## NOTE: Validness of leap seconds are not checked.
366     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
367 wakaba 1.18 level => $self->{level}->{must}) if $zh > 23;
368 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
369 wakaba 1.18 level => $self->{level}->{must}) if $zm > 59;
370 wakaba 1.4 } else {
371 wakaba 1.8 $self->{onerror}->(node => $item->{node},
372 wakaba 1.4 type => 'datetime:syntax error',
373 wakaba 1.18 level => $self->{level}->{must});
374 wakaba 1.4 }
375     ## NOTE: SHOULD be accurate as possible (cannot be checked)
376 wakaba 1.1
377 wakaba 1.8 $AtomChecker{check_end}->(@_);
378 wakaba 1.1 },
379 wakaba 1.8 ); # %AtomDateConstruct
380 wakaba 1.1
381 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
382 wakaba 1.8 %AtomChecker,
383 wakaba 1.1 is_root => 1,
384 wakaba 1.8 check_child_element => sub {
385     my ($self, $item, $child_el, $child_nsuri, $child_ln,
386     $child_is_transparent, $element_state) = @_;
387    
388     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
389    
390     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
391     $self->{onerror}->(node => $child_el,
392     type => 'element not allowed:minus',
393 wakaba 1.18 level => $self->{level}->{must});
394 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
395     #
396     } elsif ($child_nsuri eq $ATOM_NS) {
397     my $not_allowed;
398     if ({ # MUST (0, 1)
399     content => 1,
400     id => 1,
401     published => 1,
402     rights => 1,
403     source => 1,
404     summary => 1,
405     title => 1,
406     updated => 1,
407     }->{$child_ln}) {
408     unless ($element_state->{has_element}->{$child_ln}) {
409     $element_state->{has_element}->{$child_ln} = 1;
410     $not_allowed = $element_state->{has_element}->{entry};
411     } else {
412     $not_allowed = 1;
413     }
414     } elsif ($child_ln eq 'link') { # MAY
415     if ($child_el->rel eq $LINK_REL . 'alternate') {
416     my $type = $child_el->get_attribute_ns (undef, 'type');
417     $type = '' unless defined $type;
418     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
419     $hreflang = '' unless defined $hreflang;
420     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
421     (defined $hreflang ? ':'.$hreflang : '');
422     unless ($element_state->{has_element}->{$key}) {
423     $element_state->{has_element}->{$key} = 1;
424     $element_state->{has_element}->{'link.alternate'} = 1;
425 wakaba 1.1 } else {
426     $not_allowed = 1;
427     }
428     }
429 wakaba 1.8
430     ## NOTE: MAY
431     $not_allowed ||= $element_state->{has_element}->{entry};
432     } elsif ({ # MAY
433     category => 1,
434     contributor => 1,
435     }->{$child_ln}) {
436     $not_allowed = $element_state->{has_element}->{entry};
437 wakaba 1.9 } elsif ($child_ln eq 'author') { # MAY
438     $not_allowed = $element_state->{has_element}->{entry};
439 wakaba 1.12 $element_state->{has_author} = 1; # ./author | ./source/author
440     $element_state->{has_element}->{$child_ln} = 1; # ./author
441 wakaba 1.8 } else {
442     $not_allowed = 1;
443     }
444     if ($not_allowed) {
445 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
446     level => $self->{level}->{must});
447 wakaba 1.1 }
448 wakaba 1.15 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
449     ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
450     ## defined in RFC 4685.
451     #
452 wakaba 1.16 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
453     #
454 wakaba 1.8 } else {
455     ## TODO: extension element
456 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
457     level => $self->{level}->{must});
458 wakaba 1.8 }
459     },
460     check_child_text => sub {
461     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
462     if ($has_significant) {
463     $self->{onerror}->(node => $child_node, type => 'character not allowed',
464 wakaba 1.18 level => $self->{level}->{must});
465 wakaba 1.1 }
466 wakaba 1.8 },
467     check_end => sub {
468     my ($self, $item, $element_state) = @_;
469 wakaba 1.1
470 wakaba 1.9 if ($element_state->{has_author}) {
471     ## NOTE: There is either a child atom:author element
472     ## or a child atom:source element which contains an atom:author
473     ## child element.
474     #
475     } else {
476     A: {
477     my $root = $item->{node}->owner_document->document_element;
478     if ($root and $root->manakai_local_name eq 'feed') {
479     my $nsuri = $root->namespace_uri;
480     if (defined $nsuri and $nsuri eq $ATOM_NS) {
481     ## NOTE: An Atom Feed Document.
482     for my $root_child (@{$root->child_nodes}) {
483     ## NOTE: Entity references are not supported.
484     next unless $root_child->node_type == 1; # ELEMENT_NODE
485     next unless $root_child->manakai_local_name eq 'author';
486     my $root_child_nsuri = $root_child->namespace_uri;
487     next unless defined $root_child_nsuri;
488     next unless $root_child_nsuri eq $ATOM_NS;
489     last A;
490     }
491     }
492     }
493    
494     $self->{onerror}->(node => $item->{node},
495 wakaba 1.18 type => 'child element missing:atom',
496     text => 'author',
497     level => $self->{level}->{must});
498 wakaba 1.9 } # A
499     }
500    
501 wakaba 1.12 unless ($element_state->{has_element}->{author}) {
502     $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
503     }
504    
505 wakaba 1.1 ## TODO: If entry's with same id, then updated SHOULD be different
506    
507 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
508     $self->{onerror}->(node => $item->{node},
509 wakaba 1.18 type => 'child element missing:atom',
510     text => 'id',
511     level => $self->{level}->{must});
512 wakaba 1.8 }
513     unless ($element_state->{has_element}->{title}) { # MUST
514     $self->{onerror}->(node => $item->{node},
515 wakaba 1.18 type => 'child element missing:atom',
516     text => 'title',
517     level => $self->{level}->{must});
518 wakaba 1.8 }
519     unless ($element_state->{has_element}->{updated}) { # MUST
520     $self->{onerror}->(node => $item->{node},
521 wakaba 1.18 type => 'child element missing:atom',
522     text => 'updated',
523     level => $self->{level}->{must});
524 wakaba 1.8 }
525     if (not $element_state->{has_element}->{content} and
526     not $element_state->{has_element}->{'link.alternate'}) {
527     $self->{onerror}->(node => $item->{node},
528 wakaba 1.18 type => 'child element missing:atom:link:alternate',
529     level => $self->{level}->{must});
530 wakaba 1.1 }
531 wakaba 1.14
532     if ($element_state->{require_summary} and
533     not $element_state->{has_element}->{summary}) {
534     $self->{onerror}->(node => $item->{node},
535 wakaba 1.18 type => 'child element missing:atom',
536     text => 'summary',
537     level => $self->{level}->{must});
538 wakaba 1.14 }
539 wakaba 1.1 },
540     };
541    
542     $Element->{$ATOM_NS}->{feed} = {
543 wakaba 1.8 %AtomChecker,
544 wakaba 1.1 is_root => 1,
545 wakaba 1.8 check_child_element => sub {
546     my ($self, $item, $child_el, $child_nsuri, $child_ln,
547     $child_is_transparent, $element_state) = @_;
548    
549     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
550    
551     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
552     $self->{onerror}->(node => $child_el,
553     type => 'element not allowed:minus',
554 wakaba 1.18 level => $self->{level}->{must});
555 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
556     #
557     } elsif ($child_nsuri eq $ATOM_NS) {
558     my $not_allowed;
559     if ($child_ln eq 'entry') {
560     $element_state->{has_element}->{entry} = 1;
561     } elsif ({ # MUST (0, 1)
562     generator => 1,
563     icon => 1,
564     id => 1,
565     logo => 1,
566     rights => 1,
567     subtitle => 1,
568     title => 1,
569     updated => 1,
570     }->{$child_ln}) {
571     unless ($element_state->{has_element}->{$child_ln}) {
572     $element_state->{has_element}->{$child_ln} = 1;
573     $not_allowed = $element_state->{has_element}->{entry};
574     } else {
575     $not_allowed = 1;
576     }
577     } elsif ($child_ln eq 'link') {
578     my $rel = $child_el->rel;
579     if ($rel eq $LINK_REL . 'alternate') {
580     my $type = $child_el->get_attribute_ns (undef, 'type');
581     $type = '' unless defined $type;
582     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
583     $hreflang = '' unless defined $hreflang;
584     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
585     (defined $hreflang ? ':'.$hreflang : '');
586     unless ($element_state->{has_element}->{$key}) {
587     $element_state->{has_element}->{$key} = 1;
588 wakaba 1.1 } else {
589     $not_allowed = 1;
590     }
591 wakaba 1.8 } elsif ($rel eq $LINK_REL . 'self') {
592     $element_state->{has_element}->{'link.self'} = 1;
593 wakaba 1.1 }
594 wakaba 1.8
595     ## NOTE: MAY
596     $not_allowed = $element_state->{has_element}->{entry};
597     } elsif ({ # MAY
598     category => 1,
599     contributor => 1,
600     }->{$child_ln}) {
601     $not_allowed = $element_state->{has_element}->{entry};
602 wakaba 1.12 } elsif ($child_ln eq 'author') { # MAY
603     $not_allowed = $element_state->{has_element}->{entry};
604     $element_state->{has_element}->{author} = 1;
605 wakaba 1.8 } else {
606     $not_allowed = 1;
607 wakaba 1.1 }
608 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
609     level => $self->{level}->{must})
610 wakaba 1.8 if $not_allowed;
611     } else {
612     ## TODO: extension element
613 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
614     level => $self->{level}->{must});
615 wakaba 1.1 }
616 wakaba 1.8 },
617     check_child_text => sub {
618     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
619     if ($has_significant) {
620     $self->{onerror}->(node => $child_node, type => 'character not allowed',
621 wakaba 1.18 level => $self->{level}->{must});
622 wakaba 1.8 }
623     },
624     check_end => sub {
625     my ($self, $item, $element_state) = @_;
626 wakaba 1.10
627 wakaba 1.12 if ($element_state->{has_no_author_entry} and
628     not $element_state->{has_element}->{author}) {
629 wakaba 1.10 $self->{onerror}->(node => $item->{node},
630 wakaba 1.18 type => 'child element missing:atom',
631     text => 'author',
632     level => $self->{level}->{must});
633 wakaba 1.11 ## ISSUE: If there is no |atom:entry| element,
634     ## there should be an |atom:author| element?
635 wakaba 1.10 }
636 wakaba 1.1
637     ## TODO: If entry's with same id, then updated SHOULD be different
638    
639 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
640     $self->{onerror}->(node => $item->{node},
641 wakaba 1.18 type => 'child element missing:atom',
642     text => 'id',
643     level => $self->{level}->{must});
644 wakaba 1.1 }
645 wakaba 1.8 unless ($element_state->{has_element}->{title}) { # MUST
646     $self->{onerror}->(node => $item->{node},
647 wakaba 1.18 type => 'child element missing:atom',
648     text => 'title',
649     level => $self->{level}->{must});
650 wakaba 1.1 }
651 wakaba 1.8 unless ($element_state->{has_element}->{updated}) { # MUST
652     $self->{onerror}->(node => $item->{node},
653 wakaba 1.18 type => 'element missing:atom',
654     text => 'updated',
655     level => $self->{level}->{must});
656 wakaba 1.1 }
657 wakaba 1.8 unless ($element_state->{has_element}->{'link.self'}) {
658 wakaba 1.18 $self->{onerror}->(node => $item->{node},
659     type => 'element missing:atom:link:self',
660     level => $self->{level}->{should});
661 wakaba 1.3 }
662 wakaba 1.1
663 wakaba 1.8 $AtomChecker{check_end}->(@_);
664 wakaba 1.1 },
665     };
666    
667     $Element->{$ATOM_NS}->{content} = {
668 wakaba 1.8 %AtomChecker,
669     check_start => sub {
670     my ($self, $item, $element_state) = @_;
671     $element_state->{type} = 'text';
672 wakaba 1.13 $element_state->{value} = '';
673 wakaba 1.8 },
674     check_attrs => $GetAtomAttrsChecker->({
675     src => sub {
676     my ($self, $attr, $item, $element_state) = @_;
677    
678     $element_state->{has_src} = 1;
679 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
680 wakaba 1.8
681     ## NOTE: There MUST NOT be any white space.
682     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
683 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
684 wakaba 1.19 }, $self->{level});
685 wakaba 1.8 },
686     type => sub {
687     my ($self, $attr, $item, $element_state) = @_;
688    
689     $element_state->{has_type} = 1;
690 wakaba 1.1
691 wakaba 1.8 my $value = $attr->value;
692 wakaba 1.1 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
693     # MUST
694     } else {
695 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
696     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
697     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
698     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
699     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
700     my @type = ($1, $2);
701     my $param = $3;
702     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
703     if (defined $2) {
704     push @type, $1 => $2;
705     } else {
706     my $n = $1;
707     my $v = $2;
708     $v =~ s/\\(.)/$1/gs;
709     push @type, $n => $v;
710     }
711     }
712     require Whatpm::IMTChecker;
713 wakaba 1.20 my $ic = Whatpm::IMTChecker->new;
714     $ic->{level} = $self->{level};
715     $ic->check_imt (sub {
716 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
717 wakaba 1.3 }, @type);
718     } else {
719 wakaba 1.8 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
720 wakaba 1.18 level => $self->{level}->{must});
721 wakaba 1.3 }
722 wakaba 1.1 }
723    
724 wakaba 1.17 if ({text => 1, html => 1, xhtml => 1}->{$value}) {
725     #
726     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
727 wakaba 1.8 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
728     ## Is |application/xml-dtd| an XML media type?
729     $value = 'xml';
730     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
731     $value = 'mime_text';
732     } elsif ($value =~ m!^(?>message|multipart)/!i) {
733     $self->{onerror}->(node => $attr, type => 'IMT:composite',
734 wakaba 1.18 level => $self->{level}->{must});
735 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
736     } else {
737     $item->{parent_state}->{require_summary} = 1;
738 wakaba 1.8 }
739 wakaba 1.1
740 wakaba 1.8 $element_state->{type} = $value;
741     },
742     }, {
743     src => FEATURE_RFC4287,
744     type => FEATURE_RFC4287,
745     }),
746     check_child_element => sub {
747     my ($self, $item, $child_el, $child_nsuri, $child_ln,
748     $child_is_transparent, $element_state) = @_;
749    
750     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:minus',
753 wakaba 1.18 level => $self->{level}->{must});
754 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
755     #
756     } else {
757     if ($element_state->{type} eq 'text' or
758     $element_state->{type} eq 'html' or
759     $element_state->{type} eq 'mime_text') {
760     # MUST NOT
761     $self->{onerror}->(node => $child_el,
762     type => 'element not allowed:atom|content',
763 wakaba 1.18 level => $self->{level}->{must});
764 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
765     if ($element_state->{has_div}) {
766     $self->{onerror}->(node => $child_el,
767     type => 'element not allowed:atom|content',
768 wakaba 1.18 level => $self->{level}->{must});
769 wakaba 1.8 } else {
770     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
771     $element_state->{has_div} = 1;
772     }
773     } elsif ($element_state->{type} eq 'xml') {
774     ## MAY contain elements
775     if ($element_state->{has_src}) {
776     $self->{onerror}->(node => $child_el,
777     type => 'element not allowed:atom|content',
778 wakaba 1.18 level => $self->{level}->{must});
779 wakaba 1.1 }
780 wakaba 1.8 } else {
781     ## NOTE: Elements are not explicitly disallowed.
782 wakaba 1.1 }
783 wakaba 1.8 }
784     },
785     ## NOTE: If @src, the element MUST be empty. What is "empty"?
786     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
787     ## empty replacement tree shuld be empty, since Atom is defined
788     ## in terms of XML Information Set where entities are expanded.
789     ## (but what if |&e;| is an unexpanded entity?)
790     check_child_text => sub {
791     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
792     if ($has_significant) {
793     if ($element_state->{has_src}) {
794     $self->{onerror}->(node => $child_node,
795 wakaba 1.18 type => 'character not allowed:empty',
796     level => $self->{level}->{must});
797 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml' or
798     $element_state->{type} eq 'xml') {
799     $self->{onerror}->(node => $child_node,
800     type => 'character not allowed:atom|content',
801 wakaba 1.18 level => $self->{level}->{must});
802 wakaba 1.1 }
803 wakaba 1.8 }
804 wakaba 1.1
805 wakaba 1.13 $element_state->{value} .= $child_node->data;
806 wakaba 1.1
807 wakaba 1.8 ## NOTE: type=text/* has no further restriction (i.e. the content don't
808     ## have to conform to the definition of the type).
809     },
810     check_end => sub {
811     my ($self, $item, $element_state) = @_;
812 wakaba 1.1
813 wakaba 1.8 if ($element_state->{has_src}) {
814     if (not $element_state->{has_type}) {
815     $self->{onerror}->(node => $item->{node},
816 wakaba 1.18 type => 'attribute missing',
817     text => 'type',
818     level => $self->{level}->{should});
819 wakaba 1.14 } elsif ($element_state->{type} eq 'text' or
820     $element_state->{type} eq 'html' or
821     $element_state->{type} eq 'xhtml') {
822 wakaba 1.8 $self->{onerror}
823     ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
824 wakaba 1.18 type => 'not IMT', level => $self->{level}->{must});
825 wakaba 1.1 }
826 wakaba 1.8 }
827 wakaba 1.1
828 wakaba 1.8 if ($element_state->{type} eq 'xhtml') {
829     unless ($element_state->{has_div}) {
830     $self->{onerror}->(node => $item->{node},
831 wakaba 1.18 type => 'element missing',
832     text => 'div',
833     level => $self->{level}->{must});
834 wakaba 1.1 }
835 wakaba 1.11 } elsif ($element_state->{type} eq 'html') {
836     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
837     # markup MUST be escaped
838 wakaba 1.13 $self->{onsubdoc}->({s => $element_state->{value},
839 wakaba 1.11 container_node => $item->{node},
840     media_type => 'text/html',
841     inner_html_element => 'div',
842     is_char_string => 1});
843 wakaba 1.8 } elsif ($element_state->{type} eq 'xml') {
844 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
845 wakaba 1.1 ## If no @src, this would normally mean it contains a
846     ## single child element that would serve as the root element.
847 wakaba 1.8 $self->{onerror}->(node => $item->{node},
848     type => 'atom|content not supported',
849 wakaba 1.18 text => $item->{node}->get_attribute_ns
850     (undef, 'type'),
851     level => $self->{level}->{uncertain});
852 wakaba 1.8 } elsif ($element_state->{type} eq 'text' or
853     $element_state->{type} eq 'mime-text') {
854     #
855 wakaba 1.1 } else {
856     ## TODO: $s = valid Base64ed [RFC 3548] where
857     ## MAY leading and following "white space" (what?)
858     ## and lines separated by a single U+000A
859 wakaba 1.3
860     ## NOTE: SHOULD be suitable for the indicated media type.
861 wakaba 1.8 $self->{onerror}->(node => $item->{node},
862     type => 'atom|content not supported',
863 wakaba 1.18 text => $item->{node}->get_attribute_ns
864     (undef, 'type'),
865     level => $self->{level}->{uncertain});
866 wakaba 1.8 }
867 wakaba 1.1
868 wakaba 1.8 $AtomChecker{check_end}->(@_);
869 wakaba 1.1 },
870     };
871 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
872 wakaba 1.1
873 wakaba 1.8 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
874 wakaba 1.1
875     $Element->{$ATOM_NS}->{category} = {
876 wakaba 1.8 %AtomChecker,
877     check_attrs => $GetAtomAttrsChecker->({
878 wakaba 1.1 label => sub { 1 }, # no value constraint
879 wakaba 1.2 scheme => sub { # NOTE: No MUST.
880     my ($self, $attr) = @_;
881     ## NOTE: There MUST NOT be any white space.
882     Whatpm::URIChecker->check_iri ($attr->value, sub {
883 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
884 wakaba 1.19 }, $self->{level});
885 wakaba 1.2 },
886 wakaba 1.8 term => sub {
887     my ($self, $attr, $item, $element_state) = @_;
888    
889     ## NOTE: No value constraint.
890    
891     $element_state->{has_term} = 1;
892     },
893     }, {
894     label => FEATURE_RFC4287,
895     scheme => FEATURE_RFC4287,
896     term => FEATURE_RFC4287,
897 wakaba 1.1 }),
898 wakaba 1.8 check_end => sub {
899     my ($self, $item, $element_state) = @_;
900     unless ($element_state->{has_term}) {
901     $self->{onerror}->(node => $item->{node},
902 wakaba 1.18 type => 'attribute missing',
903     text => 'term',
904     level => $self->{level}->{must});
905 wakaba 1.1 }
906    
907 wakaba 1.8 $AtomChecker{check_end}->(@_);
908 wakaba 1.1 },
909 wakaba 1.8 ## NOTE: Meaning of content is not defined.
910 wakaba 1.1 };
911    
912 wakaba 1.8 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
913 wakaba 1.6
914     ## TODO: Anything below does not support <html:nest/> yet.
915 wakaba 1.1
916     $Element->{$ATOM_NS}->{generator} = {
917 wakaba 1.8 %AtomChecker,
918     check_attrs => $GetAtomAttrsChecker->({
919 wakaba 1.2 uri => sub { # MUST
920     my ($self, $attr) = @_;
921     ## NOTE: There MUST NOT be any white space.
922     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
923 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
924 wakaba 1.19 }, $self->{level});
925 wakaba 1.2 ## NOTE: Dereferencing SHOULD produce a representation
926     ## that is relevant to the agent.
927     },
928 wakaba 1.1 version => sub { 1 }, # no value constraint
929 wakaba 1.8 }, {
930     uri => FEATURE_RFC4287,
931     version => FEATURE_RFC4287,
932 wakaba 1.1 }),
933    
934 wakaba 1.8 ## NOTE: Elements are not explicitly disallowed.
935 wakaba 1.1
936 wakaba 1.8 ## NOTE: Content MUST be a string that is a human-readable name for
937     ## the generating agent.
938 wakaba 1.1 };
939    
940     $Element->{$ATOM_NS}->{icon} = {
941 wakaba 1.8 %AtomChecker,
942     check_start => sub {
943     my ($self, $item, $element_state) = @_;
944     $element_state->{value} = '';
945     },
946     ## NOTE: Elements are not explicitly disallowed.
947     check_child_text => sub {
948     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
949     $element_state->{value} .= $child_node->data;
950     },
951     check_end => sub {
952     my ($self, $item, $element_state) = @_;
953 wakaba 1.1
954 wakaba 1.2 ## NOTE: No MUST.
955     ## NOTE: There MUST NOT be any white space.
956 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
957 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
958 wakaba 1.19 }, $self->{level});
959 wakaba 1.2
960 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
961    
962 wakaba 1.8 $AtomChecker{check_end}->(@_);
963 wakaba 1.1 },
964     };
965    
966     $Element->{$ATOM_NS}->{id} = {
967 wakaba 1.8 %AtomChecker,
968     check_start => sub {
969     my ($self, $item, $element_state) = @_;
970     $element_state->{value} = '';
971     },
972     ## NOTE: Elements are not explicitly disallowed.
973     check_child_text => sub {
974     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
975     $element_state->{value} .= $child_node->data;
976     },
977     check_end => sub {
978     my ($self, $item, $element_state) = @_;
979 wakaba 1.2
980     ## NOTE: There MUST NOT be any white space.
981 wakaba 1.8 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
982 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
983 wakaba 1.19 }, $self->{level});
984 wakaba 1.2 ## TODO: SHOULD be normalized
985    
986 wakaba 1.8 $AtomChecker{check_end}->(@_);
987 wakaba 1.2 },
988     };
989    
990 wakaba 1.15 my $AtomIMTAttrChecker = sub {
991 wakaba 1.3 my ($self, $attr) = @_;
992     my $value = $attr->value;
993     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
994     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
995     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
996     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
997     my @type = ($1, $2);
998     my $param = $3;
999     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1000     if (defined $2) {
1001     push @type, $1 => $2;
1002     } else {
1003     my $n = $1;
1004     my $v = $2;
1005     $v =~ s/\\(.)/$1/gs;
1006     push @type, $n => $v;
1007     }
1008     }
1009     require Whatpm::IMTChecker;
1010 wakaba 1.20 my $ic = Whatpm::IMTChecker->new;
1011     $ic->{level} = $self->{level};
1012     $ic->check_imt (sub {
1013 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1014 wakaba 1.3 }, @type);
1015     } else {
1016 wakaba 1.18 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
1017     level => $self->{level}->{must});
1018 wakaba 1.3 }
1019 wakaba 1.15 }; # $AtomIMTAttrChecker
1020    
1021     my $AtomIRIReferenceAttrChecker = sub {
1022     my ($self, $attr) = @_;
1023     ## NOTE: There MUST NOT be any white space.
1024     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1025 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1026 wakaba 1.19 }, $self->{level});
1027 wakaba 1.15 }; # $AtomIRIReferenceAttrChecker
1028    
1029     $Element->{$ATOM_NS}->{link} = {
1030     %AtomChecker,
1031     check_attrs => $GetAtomAttrsChecker->({
1032     href => $AtomIRIReferenceAttrChecker,
1033     hreflang => $AtomLanguageTagAttrChecker,
1034     length => sub { }, # No MUST; in octets.
1035     rel => sub { # MUST
1036     my ($self, $attr) = @_;
1037     my $value = $attr->value;
1038     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1039     $value = $LINK_REL . $value;
1040     }
1041    
1042     ## NOTE: There MUST NOT be any white space.
1043     Whatpm::URIChecker->check_iri ($value, sub {
1044 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1045 wakaba 1.19 }, $self->{level});
1046 wakaba 1.15
1047     ## TODO: Warn if unregistered
1048 wakaba 1.16
1049     ## TODO: rel=license [RFC 4946]
1050     ## MUST NOT multiple rel=license with same href="",type="" pairs
1051     ## href="" SHOULD be dereferencable
1052     ## title="" SHOULD be there if multiple rel=license
1053     ## MUST NOT "unspecified" and other rel=license
1054 wakaba 1.3 },
1055 wakaba 1.15 title => sub { }, # No MUST
1056     type => $AtomIMTAttrChecker,
1057     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1058 wakaba 1.8 }, {
1059     href => FEATURE_RFC4287,
1060     hreflang => FEATURE_RFC4287,
1061     length => FEATURE_RFC4287,
1062     rel => FEATURE_RFC4287,
1063     title => FEATURE_RFC4287,
1064     type => FEATURE_RFC4287,
1065 wakaba 1.16
1066     ## TODO: thr:count
1067     ## TODO: thr:updated
1068 wakaba 1.2 }),
1069 wakaba 1.8 check_start => sub {
1070     my ($self, $item, $element_state) = @_;
1071 wakaba 1.2
1072 wakaba 1.8 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1073     $self->{onerror}->(node => $item->{node},
1074 wakaba 1.18 type => 'attribute missing',
1075     text => 'href',
1076     level => $self->{level}->{must});
1077 wakaba 1.2 }
1078    
1079 wakaba 1.8 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1080     not $item->{node}->has_attribute_ns (undef, 'length')) {
1081 wakaba 1.18 $self->{onerror}->(node => $item->{node},
1082     type => 'attribute missing',
1083     text => 'length',
1084     level => $self->{level}->{should});
1085 wakaba 1.2 }
1086     },
1087     };
1088    
1089     $Element->{$ATOM_NS}->{logo} = {
1090 wakaba 1.8 %AtomChecker,
1091     ## NOTE: Child elements are not explicitly disallowed
1092     check_start => sub {
1093     my ($self, $item, $element_state) = @_;
1094     $element_state->{value} = '';
1095     },
1096     ## NOTE: Elements are not explicitly disallowed.
1097     check_child_text => sub {
1098     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1099     $element_state->{value} .= $child_node->data;
1100     },
1101     check_end => sub {
1102     my ($self, $item, $element_state) = @_;
1103 wakaba 1.1
1104 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1105 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1106 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
1107 wakaba 1.19 }, $self->{level});
1108 wakaba 1.2
1109     ## NOTE: Image SHOULD be 2:1
1110    
1111 wakaba 1.8 $AtomChecker{check_end}->(@_);
1112 wakaba 1.2 },
1113     };
1114    
1115 wakaba 1.8 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1116 wakaba 1.2
1117 wakaba 1.17 $Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct;
1118 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1119 wakaba 1.2
1120     $Element->{$ATOM_NS}->{source} = {
1121 wakaba 1.8 %AtomChecker,
1122     check_child_element => sub {
1123     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1124     $child_is_transparent, $element_state) = @_;
1125    
1126     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1127     $self->{onerror}->(node => $child_el,
1128     type => 'element not allowed:minus',
1129 wakaba 1.18 level => $self->{level}->{must});
1130 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1131     #
1132     } elsif ($child_nsuri eq $ATOM_NS) {
1133     my $not_allowed;
1134     if ($child_ln eq 'entry') {
1135     $element_state->{has_element}->{entry} = 1;
1136     } elsif ({
1137     generator => 1,
1138     icon => 1,
1139     id => 1,
1140     logo => 1,
1141     rights => 1,
1142     subtitle => 1,
1143     title => 1,
1144     updated => 1,
1145     }->{$child_ln}) {
1146     unless ($element_state->{has_element}->{$child_ln}) {
1147     $element_state->{has_element}->{$child_ln} = 1;
1148     $not_allowed = $element_state->{has_element}->{entry};
1149     } else {
1150     $not_allowed = 1;
1151     }
1152     } elsif ($child_ln eq 'link') {
1153 wakaba 1.17 if ($child_el->rel eq $LINK_REL . 'alternate') {
1154     my $type = $child_el->get_attribute_ns (undef, 'type');
1155 wakaba 1.8 $type = '' unless defined $type;
1156 wakaba 1.17 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
1157 wakaba 1.8 $hreflang = '' unless defined $hreflang;
1158     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1159     (defined $hreflang ? ':'.$hreflang : '');
1160     unless ($element_state->{has_element}->{$key}) {
1161     $element_state->{has_element}->{$key} = 1;
1162 wakaba 1.2 } else {
1163     $not_allowed = 1;
1164     }
1165     }
1166 wakaba 1.8 $not_allowed ||= $element_state->{has_element}->{entry};
1167     } elsif ({
1168     category => 1,
1169     contributor => 1,
1170     }->{$child_ln}) {
1171     $not_allowed = $element_state->{has_element}->{entry};
1172 wakaba 1.9 } elsif ($child_ln eq 'author') {
1173     $not_allowed = $element_state->{has_element}->{entry};
1174     $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1175 wakaba 1.8 } else {
1176     $not_allowed = 1;
1177     }
1178     if ($not_allowed) {
1179 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1180     level => $self->{level}->{must});
1181 wakaba 1.2 }
1182 wakaba 1.8 } else {
1183     ## TODO: extension element
1184 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1185     level => $self->{level}->{must});
1186 wakaba 1.8 }
1187     },
1188     check_child_text => sub {
1189     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1190     if ($has_significant) {
1191     $self->{onerror}->(node => $child_node, type => 'character not allowed',
1192 wakaba 1.18 level => $self->{level}->{must});
1193 wakaba 1.2 }
1194 wakaba 1.1 },
1195     };
1196 wakaba 1.2
1197 wakaba 1.8 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1198 wakaba 1.2
1199 wakaba 1.8 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1200 wakaba 1.2
1201 wakaba 1.8 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1202 wakaba 1.2
1203 wakaba 1.8 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1204 wakaba 1.2
1205     ## TODO: signature element
1206    
1207     ## TODO: simple extension element and structured extension element
1208 wakaba 1.1
1209 wakaba 1.15 ## -- Atom Threading 1.0 [RFC 4685]
1210    
1211     $Element->{$THR_NS}->{''} = {
1212     %AtomChecker,
1213     status => 0,
1214     };
1215    
1216     ## ISSUE: Strictly speaking, thr:* element/attribute,
1217     ## where * is an undefined local name, is not disallowed.
1218    
1219     $Element->{$THR_NS}->{'in-reply-to'} = {
1220     %AtomChecker,
1221     status => FEATURE_RFC4685,
1222     check_attrs => $GetAtomAttrsChecker->({
1223     href => $AtomIRIReferenceAttrChecker,
1224     ## TODO: fact-level.
1225     ## TODO: MUST be dereferencable.
1226     ref => sub {
1227     my ($self, $attr, $item, $element_state) = @_;
1228     $element_state->{has_ref} = 1;
1229    
1230     ## NOTE: Same as |atom:id|.
1231     ## NOTE: There MUST NOT be any white space.
1232     Whatpm::URIChecker->check_iri ($attr->value, sub {
1233 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1234 wakaba 1.19 }, $self->{level});
1235 wakaba 1.15
1236     ## TODO: Check against ID guideline...
1237     },
1238     source => $AtomIRIReferenceAttrChecker,
1239     ## TODO: fact-level.
1240     ## TODO: MUST be dereferencable.
1241     type => $AtomIMTAttrChecker,
1242     ## TODO: fact-level.
1243     }, {
1244     href => FEATURE_RFC4685,
1245     source => FEATURE_RFC4685,
1246     ref => FEATURE_RFC4685,
1247     type => FEATURE_RFC4685,
1248     }),
1249     check_end => sub {
1250     my ($self, $item, $element_state) = @_;
1251    
1252     unless ($element_state->{has_ref}) {
1253     $self->{onerror}->(node => $item->{node},
1254 wakaba 1.18 type => 'attribute missing',
1255     text => 'ref',
1256     level => $self->{level}->{must});
1257 wakaba 1.15 }
1258    
1259     $AtomChecker{check_end}->(@_);
1260     },
1261     ## NOTE: Content model has no constraint.
1262     };
1263 wakaba 1.16
1264     $Element->{$THR_NS}->{total} = {
1265     %AtomChecker,
1266     check_start => sub {
1267     my ($self, $item, $element_state) = @_;
1268     $element_state->{value} = '';
1269     },
1270     check_child_element => sub {
1271     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1272     $child_is_transparent, $element_state) = @_;
1273    
1274     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1275     $self->{onerror}->(node => $child_el,
1276     type => 'element not allowed:minus',
1277 wakaba 1.18 level => $self->{level}->{must});
1278 wakaba 1.16 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1279     #
1280     } else {
1281     $self->{onerror}->(node => $child_el,
1282     type => 'element not allowed',
1283 wakaba 1.18 level => $self->{level}->{must});
1284 wakaba 1.16 }
1285     },
1286     check_child_text => sub {
1287     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1288     $element_state->{value} .= $child_node->data;
1289     },
1290     check_end => sub {
1291     my ($self, $item, $element_state) = @_;
1292    
1293     ## NOTE: xsd:nonNegativeInteger
1294     unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1295     $self->{onerror}->(node => $item->{node},
1296 wakaba 1.18 type => 'invalid attribute value',
1297     level => $self->{level}->{must});
1298 wakaba 1.16 }
1299    
1300     $AtomChecker{check_end}->(@_);
1301     },
1302     };
1303    
1304     ## TODO: fh:complete
1305    
1306     ## TODO: fh:archive
1307    
1308     ## TODO: Check as archive document, page feed document, ...
1309    
1310     ## TODO: APP [RFC 5023]
1311 wakaba 1.15
1312 wakaba 1.1 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1313 wakaba 1.15 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1314 wakaba 1.1
1315     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24