/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (hide annotations) (download)
Thu Mar 20 10:30:21 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +60 -0 lines
++ whatpm/t/ChangeLog	20 Mar 2008 10:00:26 -0000
	* content-model-atom-threading-1.dat: New test data
	on |thr:total| element are added

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Mar 2008 09:59:59 -0000
	* Atom.pm: Support for |thr:total| element.
	Add notes on additional Atom namespaces.

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.15 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 wakaba 1.16 my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11 wakaba 1.1
12 wakaba 1.8 sub FEATURE_RFC4287 () {
13     Whatpm::ContentChecker::FEATURE_STATUS_CR |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16    
17 wakaba 1.15 sub FEATURE_RFC4685 () {
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21    
22 wakaba 1.1 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23    
24     ## NOTE: Commants and PIs are not explicitly allowed.
25    
26     our $AttrChecker;
27    
28     ## Any element MAY have xml:base, xml:lang
29     my $GetAtomAttrsChecker = sub {
30     my $element_specific_checker = shift;
31 wakaba 1.8 my $element_specific_status = shift;
32 wakaba 1.1 return sub {
33 wakaba 1.8 my ($self, $todo, $element_state) = @_;
34 wakaba 1.1 for my $attr (@{$todo->{node}->attributes}) {
35     my $attr_ns = $attr->namespace_uri;
36     $attr_ns = '' unless defined $attr_ns;
37     my $attr_ln = $attr->manakai_local_name;
38     my $checker;
39     if ($attr_ns eq '') {
40     $checker = $element_specific_checker->{$attr_ln};
41     } else {
42     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
43     || $AttrChecker->{$attr_ns}->{''};
44     }
45     if ($checker) {
46 wakaba 1.8 $checker->($self, $attr, $todo, $element_state);
47     } elsif ($attr_ln eq '') {
48     #
49 wakaba 1.1 } else {
50     $self->{onerror}->(node => $attr, level => 'unsupported',
51     type => 'attribute');
52     ## ISSUE: No comformance createria for unknown attributes in the spec
53     }
54 wakaba 1.8
55     if ($attr_ns eq '') {
56     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
57     }
58     ## TODO: global attribute
59 wakaba 1.1 }
60     };
61     }; # $GetAtomAttrsChecker
62    
63 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
64     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
65    
66     my ($self, $attr) = @_;
67     my $value = $attr->value;
68     require Whatpm::LangTag;
69     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
70     my %opt = @_;
71     my $type = 'LangTag:'.$opt{type};
72     $type .= ':' . $opt{subtag} if defined $opt{subtag};
73     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
74     level => $opt{level});
75     });
76     ## ISSUE: RFC 4646 (3066bis)?
77     }; # $AtomLanguageTagAttrChecker
78    
79 wakaba 1.8 my %AtomChecker = (
80     %Whatpm::ContentChecker::AnyChecker,
81     status => FEATURE_RFC4287,
82     check_attrs => $GetAtomAttrsChecker->({}, {}),
83     );
84    
85     my %AtomTextConstruct = (
86     %AtomChecker,
87     check_start => sub {
88     my ($self, $item, $element_state) = @_;
89     $element_state->{type} = 'text';
90     $element_state->{value} = '';
91     },
92     check_attrs => $GetAtomAttrsChecker->({
93     type => sub {
94     my ($self, $attr, $item, $element_state) = @_;
95     my $value = $attr->value;
96     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
97     $element_state->{type} = $value;
98 wakaba 1.1 } else {
99 wakaba 1.8 ## NOTE: IMT MUST NOT be used here.
100 wakaba 1.1 $self->{onerror}->(node => $attr, type => 'keyword:invalid');
101     }
102 wakaba 1.8 }, # checked in |checker|
103     }, {
104     type => FEATURE_RFC4287,
105     }),
106     check_child_element => sub {
107     my ($self, $item, $child_el, $child_nsuri, $child_ln,
108     $child_is_transparent, $element_state) = @_;
109     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
110     $self->{onerror}->(node => $child_el,
111     type => 'element not allowed:minus',
112     level => $self->{must_level});
113     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
114     #
115     } else {
116     if ($element_state->{type} eq 'text' or
117     $element_state->{type} eq 'html') { # MUST NOT
118     $self->{onerror}->(node => $child_el,
119     type => 'element not allowed:atom|TextConstruct',
120     level => $self->{must_level});
121     } elsif ($element_state->{type} eq 'xhtml') {
122     if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
123     $child_ln eq 'div') { # MUST
124     if ($element_state->{has_div}) {
125     $self->{onerror}
126     ->(node => $child_el,
127     type => 'element not allowed:atom|TextConstruct',
128     level => $self->{must_level});
129     } else {
130     $element_state->{has_div} = 1;
131 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
132     }
133 wakaba 1.8 } else {
134     $self->{onerror}->(node => $child_el,
135     type => 'element not allowed:atom|TextConstruct',
136     level => $self->{must_level});
137 wakaba 1.1 }
138 wakaba 1.8 } else {
139     die "atom:TextConstruct type error: $element_state->{type}";
140 wakaba 1.1 }
141 wakaba 1.8 }
142     },
143     check_child_text => sub {
144     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
145     if ($element_state->{type} eq 'text') {
146     #
147     } elsif ($element_state->{type} eq 'html') {
148     $element_state->{value} .= $child_node->text_content;
149     ## NOTE: Markup MUST be escaped.
150     } elsif ($element_state->{type} eq 'xhtml') {
151     if ($has_significant) {
152     $self->{onerror}->(node => $child_node,
153     type => 'character not allowed:atom|TextConstruct',
154     level => $self->{must_level});
155 wakaba 1.1 }
156 wakaba 1.8 } else {
157     die "atom:TextConstruct type error: $element_state->{type}";
158 wakaba 1.1 }
159     },
160 wakaba 1.8 check_end => sub {
161     my ($self, $item, $element_state) = @_;
162 wakaba 1.13 if ($element_state->{type} eq 'xhtml') {
163     unless ($element_state->{has_div}) {
164     $self->{onerror}->(node => $item->{node},
165     type => 'element missing:div',
166     level => $self->{must_level});
167     }
168     } elsif ($element_state->{type} eq 'html') {
169     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
170     # markup MUST be escaped
171     $self->{onsubdoc}->({s => $element_state->{value},
172     container_node => $item->{node},
173     media_type => 'text/html',
174     inner_html_element => 'div',
175     is_char_string => 1});
176 wakaba 1.8 }
177 wakaba 1.1
178 wakaba 1.8 $AtomChecker{check_end}->(@_);
179     },
180     ); # %AtomTextConstruct
181 wakaba 1.1
182 wakaba 1.8 my %AtomPersonConstruct = (
183     %AtomChecker,
184     check_child_element => sub {
185     my ($self, $item, $child_el, $child_nsuri, $child_ln,
186     $child_is_transparent, $element_state) = @_;
187     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
188     $self->{onerror}->(node => $child_el,
189     type => 'element not allowed:minus',
190     level => $self->{must_level});
191     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
192     #
193     } elsif ($child_nsuri eq $ATOM_NS) {
194     if ($child_ln eq 'name') {
195     if ($element_state->{has_name}) {
196     $self->{onerror}
197     ->(node => $child_el,
198     type => 'element not allowed:atom|PersonConstruct',
199     level => $self->{must_level});
200     } else {
201     $element_state->{has_name} = 1;
202     }
203     } elsif ($child_ln eq 'uri') {
204     if ($element_state->{has_uri}) {
205     $self->{onerror}
206     ->(node => $child_el,
207     type => 'element not allowed:atom|PersonConstruct',
208     level => $self->{must_level});
209 wakaba 1.1 } else {
210 wakaba 1.8 $element_state->{has_uri} = 1;
211 wakaba 1.1 }
212 wakaba 1.8 } elsif ($child_ln eq 'email') {
213     if ($element_state->{has_email}) {
214     $self->{onerror}
215     ->(node => $child_el,
216     type => 'element not allowed:atom|PersonConstruct',
217     level => $self->{must_level});
218     } else {
219     $element_state->{has_email} = 1;
220 wakaba 1.7 }
221 wakaba 1.8 } else {
222     $self->{onerror}
223     ->(node => $child_el,
224     type => 'element not allowed:atom|PersonConstruct',
225     level => $self->{must_level});
226 wakaba 1.1 }
227 wakaba 1.8 } else {
228     $self->{onerror}
229     ->(node => $child_el,
230     type => 'element not allowed:atom|PersonConstruct',
231     level => $self->{must_level});
232 wakaba 1.1 }
233 wakaba 1.8 ## TODO: extension element
234     },
235     check_child_text => sub {
236     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
237     if ($has_significant) {
238     $self->{onerror}->(node => $child_node,
239     type => 'character not allowed:atom|PersonConstruct',
240     level => $self->{must_level});
241     }
242     },
243     check_end => sub {
244     my ($self, $item, $element_state) = @_;
245 wakaba 1.1
246 wakaba 1.8 unless ($element_state->{has_name}) {
247     $self->{onerror}->(node => $item->{node},
248     type => 'element missing:atom|name',
249     level => $self->{must_level});
250 wakaba 1.1 }
251    
252 wakaba 1.8 $AtomChecker{check_end}->(@_);
253 wakaba 1.1 },
254 wakaba 1.8 ); # %AtomPersonConstruct
255 wakaba 1.1
256 wakaba 1.2 our $Element;
257    
258 wakaba 1.8 $Element->{$ATOM_NS}->{''} = {
259     %AtomChecker,
260     status => 0,
261     };
262    
263 wakaba 1.2 $Element->{$ATOM_NS}->{name} = {
264 wakaba 1.8 %AtomChecker,
265    
266 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:name
267     ## element outside of Person construct is not defined.
268    
269 wakaba 1.8 ## NOTE: No constraint.
270 wakaba 1.2 };
271    
272     $Element->{$ATOM_NS}->{uri} = {
273 wakaba 1.8 %AtomChecker,
274    
275 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:uri
276     ## element outside of Person construct is not defined.
277 wakaba 1.8
278     ## NOTE: Elements are not explicitly disallowed.
279    
280     check_start => sub {
281     my ($self, $item, $element_state) = @_;
282     $element_state->{value} = '';
283     },
284     check_child_text => sub {
285     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
286     $element_state->{value} .= $child_node->data;
287     },
288     check_end => sub {
289     my ($self, $item, $element_state) = @_;
290 wakaba 1.2
291     ## NOTE: There MUST NOT be any white space.
292 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
293 wakaba 1.2 my %opt = @_;
294 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
295 wakaba 1.2 type => 'URI::'.$opt{type}.
296     (defined $opt{position} ? ':'.$opt{position} : ''));
297     });
298    
299 wakaba 1.8 $AtomChecker{check_end}->(@_);
300 wakaba 1.2 },
301     };
302    
303     $Element->{$ATOM_NS}->{email} = {
304 wakaba 1.8 %AtomChecker,
305    
306 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:email
307     ## element outside of Person construct is not defined.
308 wakaba 1.8
309     ## NOTE: Elements are not explicitly disallowed.
310    
311     check_end => sub {
312     my ($self, $item, $element_state) = @_;
313 wakaba 1.2
314     ## TODO: addr-spec
315 wakaba 1.8 $self->{onerror}->(node => $item->{node},
316     type => 'addr-spec not supported',
317     level => $self->{unsupported_level});
318 wakaba 1.2
319 wakaba 1.8 $AtomChecker{check_end}->(@_);
320 wakaba 1.2 },
321     };
322    
323 wakaba 1.1 ## MUST NOT be any white space
324 wakaba 1.8 my %AtomDateConstruct = (
325     %AtomChecker,
326    
327     ## NOTE: It does not explicitly say that there MUST NOT be any element.
328    
329     check_start => sub {
330     my ($self, $item, $element_state) = @_;
331     $element_state->{value} = '';
332     },
333     check_child_text => sub {
334     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
335     $element_state->{value} .= $child_node->data;
336     },
337     check_end => sub {
338     my ($self, $item, $element_state) = @_;
339 wakaba 1.1
340 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
341 wakaba 1.8 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
342 wakaba 1.4 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
343     = ($1, $2, $3, $4, $5, $6, $7, $8);
344 wakaba 1.8 my $node = $item->{node};
345 wakaba 1.4
346     ## Check additional constraints described or referenced in
347     ## comments of ABNF rules for |date-time|.
348     my $level = $self->{must_level};
349     if (0 < $M and $M < 13) {
350     $self->{onerror}->(node => $node, type => 'datetime:bad day',
351     level => $level)
352     if $d < 1 or
353     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
354     $self->{onerror}->(node => $node, type => 'datetime:bad day',
355     level => $level)
356     if $M == 2 and $d == 29 and
357     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
358     } else {
359     $self->{onerror}->(node => $node, type => 'datetime:bad month',
360     level => $level);
361     }
362     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
363     level => $level) if $h > 23;
364     $self->{onerror}->(node => $node, type => 'datetime:bad minute',
365     level => $level) if $m > 59;
366     $self->{onerror}->(node => $node, type => 'datetime:bad second',
367     level => $level)
368     if $s > 60; ## NOTE: Validness of leap seconds are not checked.
369     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
370     level => $level) if $zh > 23;
371     $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
372     level => $level) if $zm > 59;
373     } else {
374 wakaba 1.8 $self->{onerror}->(node => $item->{node},
375 wakaba 1.4 type => 'datetime:syntax error',
376     level => $self->{must_level});
377     }
378     ## NOTE: SHOULD be accurate as possible (cannot be checked)
379 wakaba 1.1
380 wakaba 1.8 $AtomChecker{check_end}->(@_);
381 wakaba 1.1 },
382 wakaba 1.8 ); # %AtomDateConstruct
383 wakaba 1.1
384 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
385 wakaba 1.8 %AtomChecker,
386 wakaba 1.1 is_root => 1,
387 wakaba 1.8 check_child_element => sub {
388     my ($self, $item, $child_el, $child_nsuri, $child_ln,
389     $child_is_transparent, $element_state) = @_;
390    
391     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
392    
393     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
394     $self->{onerror}->(node => $child_el,
395     type => 'element not allowed:minus',
396     level => $self->{must_level});
397     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
398     #
399     } elsif ($child_nsuri eq $ATOM_NS) {
400     my $not_allowed;
401     if ({ # MUST (0, 1)
402     content => 1,
403     id => 1,
404     published => 1,
405     rights => 1,
406     source => 1,
407     summary => 1,
408     title => 1,
409     updated => 1,
410     }->{$child_ln}) {
411     unless ($element_state->{has_element}->{$child_ln}) {
412     $element_state->{has_element}->{$child_ln} = 1;
413     $not_allowed = $element_state->{has_element}->{entry};
414     } else {
415     $not_allowed = 1;
416     }
417     } elsif ($child_ln eq 'link') { # MAY
418     if ($child_el->rel eq $LINK_REL . 'alternate') {
419     my $type = $child_el->get_attribute_ns (undef, 'type');
420     $type = '' unless defined $type;
421     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
422     $hreflang = '' unless defined $hreflang;
423     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
424     (defined $hreflang ? ':'.$hreflang : '');
425     unless ($element_state->{has_element}->{$key}) {
426     $element_state->{has_element}->{$key} = 1;
427     $element_state->{has_element}->{'link.alternate'} = 1;
428 wakaba 1.1 } else {
429     $not_allowed = 1;
430     }
431     }
432 wakaba 1.8
433     ## NOTE: MAY
434     $not_allowed ||= $element_state->{has_element}->{entry};
435     } elsif ({ # MAY
436     category => 1,
437     contributor => 1,
438     }->{$child_ln}) {
439     $not_allowed = $element_state->{has_element}->{entry};
440 wakaba 1.9 } elsif ($child_ln eq 'author') { # MAY
441     $not_allowed = $element_state->{has_element}->{entry};
442 wakaba 1.12 $element_state->{has_author} = 1; # ./author | ./source/author
443     $element_state->{has_element}->{$child_ln} = 1; # ./author
444 wakaba 1.8 } else {
445     $not_allowed = 1;
446     }
447     if ($not_allowed) {
448     $self->{onerror}->(node => $child_el, type => 'element not allowed');
449 wakaba 1.1 }
450 wakaba 1.15 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
451     ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
452     ## defined in RFC 4685.
453     #
454 wakaba 1.16 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
455     #
456 wakaba 1.8 } else {
457     ## TODO: extension element
458     $self->{onerror}->(node => $child_el, type => 'element not allowed');
459     }
460     },
461     check_child_text => sub {
462     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
463     if ($has_significant) {
464     $self->{onerror}->(node => $child_node, type => 'character not allowed',
465     level => $self->{must_level});
466 wakaba 1.1 }
467 wakaba 1.8 },
468     check_end => sub {
469     my ($self, $item, $element_state) = @_;
470 wakaba 1.1
471 wakaba 1.9 if ($element_state->{has_author}) {
472     ## NOTE: There is either a child atom:author element
473     ## or a child atom:source element which contains an atom:author
474     ## child element.
475     #
476     } else {
477     A: {
478     my $root = $item->{node}->owner_document->document_element;
479     if ($root and $root->manakai_local_name eq 'feed') {
480     my $nsuri = $root->namespace_uri;
481     if (defined $nsuri and $nsuri eq $ATOM_NS) {
482     ## NOTE: An Atom Feed Document.
483     for my $root_child (@{$root->child_nodes}) {
484     ## NOTE: Entity references are not supported.
485     next unless $root_child->node_type == 1; # ELEMENT_NODE
486     next unless $root_child->manakai_local_name eq 'author';
487     my $root_child_nsuri = $root_child->namespace_uri;
488     next unless defined $root_child_nsuri;
489     next unless $root_child_nsuri eq $ATOM_NS;
490     last A;
491     }
492     }
493     }
494    
495     $self->{onerror}->(node => $item->{node},
496     type => 'element missing:atom|author',
497     level => $self->{must_level});
498     } # A
499     }
500    
501 wakaba 1.12 unless ($element_state->{has_element}->{author}) {
502     $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
503     }
504    
505 wakaba 1.1 ## TODO: If entry's with same id, then updated SHOULD be different
506    
507 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
508     $self->{onerror}->(node => $item->{node},
509     type => 'element missing:atom|id');
510     }
511     unless ($element_state->{has_element}->{title}) { # MUST
512     $self->{onerror}->(node => $item->{node},
513     type => 'element missing:atom|title');
514     }
515     unless ($element_state->{has_element}->{updated}) { # MUST
516     $self->{onerror}->(node => $item->{node},
517     type => 'element missing:atom|updated');
518     }
519     if (not $element_state->{has_element}->{content} and
520     not $element_state->{has_element}->{'link.alternate'}) {
521     $self->{onerror}->(node => $item->{node},
522     type => 'element missing:atom|link|alternate');
523 wakaba 1.1 }
524 wakaba 1.14
525     if ($element_state->{require_summary} and
526     not $element_state->{has_element}->{summary}) {
527     $self->{onerror}->(node => $item->{node},
528     type => 'element missing:atom|summary',
529     level => $self->{must_level});
530     }
531 wakaba 1.1 },
532     };
533    
534     $Element->{$ATOM_NS}->{feed} = {
535 wakaba 1.8 %AtomChecker,
536 wakaba 1.1 is_root => 1,
537 wakaba 1.8 check_child_element => sub {
538     my ($self, $item, $child_el, $child_nsuri, $child_ln,
539     $child_is_transparent, $element_state) = @_;
540    
541     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
542    
543     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
544     $self->{onerror}->(node => $child_el,
545     type => 'element not allowed:minus',
546     level => $self->{must_level});
547     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
548     #
549     } elsif ($child_nsuri eq $ATOM_NS) {
550     my $not_allowed;
551     if ($child_ln eq 'entry') {
552     $element_state->{has_element}->{entry} = 1;
553     } elsif ({ # MUST (0, 1)
554     generator => 1,
555     icon => 1,
556     id => 1,
557     logo => 1,
558     rights => 1,
559     subtitle => 1,
560     title => 1,
561     updated => 1,
562     }->{$child_ln}) {
563     unless ($element_state->{has_element}->{$child_ln}) {
564     $element_state->{has_element}->{$child_ln} = 1;
565     $not_allowed = $element_state->{has_element}->{entry};
566     } else {
567     $not_allowed = 1;
568     }
569     } elsif ($child_ln eq 'link') {
570     my $rel = $child_el->rel;
571     if ($rel eq $LINK_REL . 'alternate') {
572     my $type = $child_el->get_attribute_ns (undef, 'type');
573     $type = '' unless defined $type;
574     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
575     $hreflang = '' unless defined $hreflang;
576     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
577     (defined $hreflang ? ':'.$hreflang : '');
578     unless ($element_state->{has_element}->{$key}) {
579     $element_state->{has_element}->{$key} = 1;
580 wakaba 1.1 } else {
581     $not_allowed = 1;
582     }
583 wakaba 1.8 } elsif ($rel eq $LINK_REL . 'self') {
584     $element_state->{has_element}->{'link.self'} = 1;
585 wakaba 1.1 }
586 wakaba 1.8
587     ## NOTE: MAY
588     $not_allowed = $element_state->{has_element}->{entry};
589     } elsif ({ # MAY
590     category => 1,
591     contributor => 1,
592     }->{$child_ln}) {
593     $not_allowed = $element_state->{has_element}->{entry};
594 wakaba 1.12 } elsif ($child_ln eq 'author') { # MAY
595     $not_allowed = $element_state->{has_element}->{entry};
596     $element_state->{has_element}->{author} = 1;
597 wakaba 1.8 } else {
598     $not_allowed = 1;
599 wakaba 1.1 }
600 wakaba 1.8 $self->{onerror}->(node => $child_el, type => 'element not allowed')
601     if $not_allowed;
602     } else {
603     ## TODO: extension element
604     $self->{onerror}->(node => $child_el, type => 'element not allowed');
605 wakaba 1.1 }
606 wakaba 1.8 },
607     check_child_text => sub {
608     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
609     if ($has_significant) {
610     $self->{onerror}->(node => $child_node, type => 'character not allowed',
611     level => $self->{must_level});
612     }
613     },
614     check_end => sub {
615     my ($self, $item, $element_state) = @_;
616 wakaba 1.10
617 wakaba 1.12 if ($element_state->{has_no_author_entry} and
618     not $element_state->{has_element}->{author}) {
619 wakaba 1.10 $self->{onerror}->(node => $item->{node},
620     type => 'element missing:atom|author',
621     level => $self->{must_level});
622 wakaba 1.11 ## ISSUE: If there is no |atom:entry| element,
623     ## there should be an |atom:author| element?
624 wakaba 1.10 }
625 wakaba 1.1
626     ## TODO: If entry's with same id, then updated SHOULD be different
627    
628 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
629     $self->{onerror}->(node => $item->{node},
630     type => 'element missing:atom|id');
631 wakaba 1.1 }
632 wakaba 1.8 unless ($element_state->{has_element}->{title}) { # MUST
633     $self->{onerror}->(node => $item->{node},
634     type => 'element missing:atom|title');
635 wakaba 1.1 }
636 wakaba 1.8 unless ($element_state->{has_element}->{updated}) { # MUST
637     $self->{onerror}->(node => $item->{node},
638     type => 'element missing:atom|updated');
639 wakaba 1.1 }
640 wakaba 1.8 unless ($element_state->{has_element}->{'link.self'}) {
641     $self->{onerror}->(node => $item->{node}, level => 's',
642     type => 'element missing:atom|link|self');
643 wakaba 1.3 }
644 wakaba 1.1
645 wakaba 1.8 $AtomChecker{check_end}->(@_);
646 wakaba 1.1 },
647     };
648    
649     $Element->{$ATOM_NS}->{content} = {
650 wakaba 1.8 %AtomChecker,
651     check_start => sub {
652     my ($self, $item, $element_state) = @_;
653     $element_state->{type} = 'text';
654 wakaba 1.13 $element_state->{value} = '';
655 wakaba 1.8 },
656     check_attrs => $GetAtomAttrsChecker->({
657     src => sub {
658     my ($self, $attr, $item, $element_state) = @_;
659    
660     $element_state->{has_src} = 1;
661 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
662 wakaba 1.8
663     ## NOTE: There MUST NOT be any white space.
664     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
665     my %opt = @_;
666     $self->{onerror}->(node => $item->{node}, level => $opt{level},
667     type => 'URI::'.$opt{type}.
668     (defined $opt{position} ? ':'.$opt{position} : ''));
669     });
670     },
671     type => sub {
672     my ($self, $attr, $item, $element_state) = @_;
673    
674     $element_state->{has_type} = 1;
675 wakaba 1.1
676 wakaba 1.8 my $value = $attr->value;
677 wakaba 1.1 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
678     # MUST
679     } else {
680 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
681     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
682     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
683     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
684     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
685     my @type = ($1, $2);
686     my $param = $3;
687     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
688     if (defined $2) {
689     push @type, $1 => $2;
690     } else {
691     my $n = $1;
692     my $v = $2;
693     $v =~ s/\\(.)/$1/gs;
694     push @type, $n => $v;
695     }
696     }
697     require Whatpm::IMTChecker;
698     Whatpm::IMTChecker->check_imt (sub {
699     my %opt = @_;
700     $self->{onerror}->(node => $attr, level => $opt{level},
701     type => 'IMT:'.$opt{type});
702     }, @type);
703     } else {
704 wakaba 1.8 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
705     level => $self->{must_level});
706 wakaba 1.3 }
707 wakaba 1.1 }
708    
709 wakaba 1.8 if ($value =~ m![+/][Xx][Mm][Ll]\z!) {
710     ## ISSUE: There is no definition for "XML media type" in RFC 3023.
711     ## Is |application/xml-dtd| an XML media type?
712     $value = 'xml';
713     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
714     $value = 'mime_text';
715     } elsif ($value =~ m!^(?>message|multipart)/!i) {
716     $self->{onerror}->(node => $attr, type => 'IMT:composite',
717     level => $self->{must_level});
718 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
719     } else {
720     $item->{parent_state}->{require_summary} = 1;
721 wakaba 1.8 }
722 wakaba 1.1
723 wakaba 1.8 $element_state->{type} = $value;
724     },
725     }, {
726     src => FEATURE_RFC4287,
727     type => FEATURE_RFC4287,
728     }),
729     check_child_element => sub {
730     my ($self, $item, $child_el, $child_nsuri, $child_ln,
731     $child_is_transparent, $element_state) = @_;
732    
733     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
734     $self->{onerror}->(node => $child_el,
735     type => 'element not allowed:minus',
736     level => $self->{must_level});
737     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
738     #
739     } else {
740     if ($element_state->{type} eq 'text' or
741     $element_state->{type} eq 'html' or
742     $element_state->{type} eq 'mime_text') {
743     # MUST NOT
744     $self->{onerror}->(node => $child_el,
745     type => 'element not allowed:atom|content',
746     level => $self->{must_level});
747     } elsif ($element_state->{type} eq 'xhtml') {
748     if ($element_state->{has_div}) {
749     $self->{onerror}->(node => $child_el,
750     type => 'element not allowed:atom|content',
751     level => $self->{must_level});
752     } else {
753     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
754     $element_state->{has_div} = 1;
755     }
756     } elsif ($element_state->{type} eq 'xml') {
757     ## MAY contain elements
758     if ($element_state->{has_src}) {
759     $self->{onerror}->(node => $child_el,
760     type => 'element not allowed:atom|content',
761     level => $self->{must_level});
762 wakaba 1.1 }
763 wakaba 1.8 } else {
764     ## NOTE: Elements are not explicitly disallowed.
765 wakaba 1.1 }
766 wakaba 1.8 }
767     },
768     ## NOTE: If @src, the element MUST be empty. What is "empty"?
769     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
770     ## empty replacement tree shuld be empty, since Atom is defined
771     ## in terms of XML Information Set where entities are expanded.
772     ## (but what if |&e;| is an unexpanded entity?)
773     check_child_text => sub {
774     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
775     if ($has_significant) {
776     if ($element_state->{has_src}) {
777     $self->{onerror}->(node => $child_node,
778     type => 'character not allowed',
779     level => $self->{must_level});
780     } elsif ($element_state->{type} eq 'xhtml' or
781     $element_state->{type} eq 'xml') {
782     $self->{onerror}->(node => $child_node,
783     type => 'character not allowed:atom|content',
784     level => $self->{must_level});
785 wakaba 1.1 }
786 wakaba 1.8 }
787 wakaba 1.1
788 wakaba 1.13 $element_state->{value} .= $child_node->data;
789 wakaba 1.1
790 wakaba 1.8 ## NOTE: type=text/* has no further restriction (i.e. the content don't
791     ## have to conform to the definition of the type).
792     },
793     check_end => sub {
794     my ($self, $item, $element_state) = @_;
795 wakaba 1.1
796 wakaba 1.8 if ($element_state->{has_src}) {
797     if (not $element_state->{has_type}) {
798     $self->{onerror}->(node => $item->{node},
799     type => 'attribute missing:type',
800     level => $self->{should_level});
801 wakaba 1.14 } elsif ($element_state->{type} eq 'text' or
802     $element_state->{type} eq 'html' or
803     $element_state->{type} eq 'xhtml') {
804 wakaba 1.8 $self->{onerror}
805     ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
806     type => 'not IMT', level => $self->{must_level});
807 wakaba 1.1 }
808 wakaba 1.8 }
809 wakaba 1.1
810 wakaba 1.8 if ($element_state->{type} eq 'xhtml') {
811     unless ($element_state->{has_div}) {
812     $self->{onerror}->(node => $item->{node},
813     type => 'element missing:div',
814     level => $self->{must_level});
815 wakaba 1.1 }
816 wakaba 1.11 } elsif ($element_state->{type} eq 'html') {
817     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
818     # markup MUST be escaped
819 wakaba 1.13 $self->{onsubdoc}->({s => $element_state->{value},
820 wakaba 1.11 container_node => $item->{node},
821     media_type => 'text/html',
822     inner_html_element => 'div',
823     is_char_string => 1});
824 wakaba 1.8 } elsif ($element_state->{type} eq 'xml') {
825 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
826 wakaba 1.1 ## If no @src, this would normally mean it contains a
827     ## single child element that would serve as the root element.
828 wakaba 1.8 $self->{onerror}->(node => $item->{node},
829     level => $self->{unsupported_level},
830     type => 'atom|content not supported',
831     value => $item->{node}->get_attribute_ns
832     (undef, 'type'));
833     } elsif ($element_state->{type} eq 'text' or
834     $element_state->{type} eq 'mime-text') {
835     #
836 wakaba 1.1 } else {
837     ## TODO: $s = valid Base64ed [RFC 3548] where
838     ## MAY leading and following "white space" (what?)
839     ## and lines separated by a single U+000A
840 wakaba 1.3
841     ## NOTE: SHOULD be suitable for the indicated media type.
842 wakaba 1.8 $self->{onerror}->(node => $item->{node},
843     level => $self->{unsupported_level},
844     type => 'atom|content not supported',
845     value => $item->{node}->get_attribute_ns
846     (undef, 'type'));
847     }
848 wakaba 1.1
849 wakaba 1.8 $AtomChecker{check_end}->(@_);
850 wakaba 1.1 },
851     };
852 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
853 wakaba 1.1
854 wakaba 1.8 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
855 wakaba 1.1
856     $Element->{$ATOM_NS}->{category} = {
857 wakaba 1.8 %AtomChecker,
858     check_attrs => $GetAtomAttrsChecker->({
859 wakaba 1.1 label => sub { 1 }, # no value constraint
860 wakaba 1.2 scheme => sub { # NOTE: No MUST.
861     my ($self, $attr) = @_;
862     ## NOTE: There MUST NOT be any white space.
863     Whatpm::URIChecker->check_iri ($attr->value, sub {
864     my %opt = @_;
865     $self->{onerror}->(node => $attr, level => $opt{level},
866     type => 'URI::'.$opt{type}.
867     (defined $opt{position} ? ':'.$opt{position} : ''));
868     });
869     },
870 wakaba 1.8 term => sub {
871     my ($self, $attr, $item, $element_state) = @_;
872    
873     ## NOTE: No value constraint.
874    
875     $element_state->{has_term} = 1;
876     },
877     }, {
878     label => FEATURE_RFC4287,
879     scheme => FEATURE_RFC4287,
880     term => FEATURE_RFC4287,
881 wakaba 1.1 }),
882 wakaba 1.8 check_end => sub {
883     my ($self, $item, $element_state) = @_;
884     unless ($element_state->{has_term}) {
885     $self->{onerror}->(node => $item->{node},
886 wakaba 1.1 type => 'attribute missing:term');
887     }
888    
889 wakaba 1.8 $AtomChecker{check_end}->(@_);
890 wakaba 1.1 },
891 wakaba 1.8 ## NOTE: Meaning of content is not defined.
892 wakaba 1.1 };
893    
894 wakaba 1.8 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
895 wakaba 1.6
896     ## TODO: Anything below does not support <html:nest/> yet.
897 wakaba 1.1
898     $Element->{$ATOM_NS}->{generator} = {
899 wakaba 1.8 %AtomChecker,
900     check_attrs => $GetAtomAttrsChecker->({
901 wakaba 1.2 uri => sub { # MUST
902     my ($self, $attr) = @_;
903     ## NOTE: There MUST NOT be any white space.
904     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
905     my %opt = @_;
906     $self->{onerror}->(node => $attr, level => $opt{level},
907     type => 'URI::'.$opt{type}.
908     (defined $opt{position} ? ':'.$opt{position} : ''));
909     });
910     ## NOTE: Dereferencing SHOULD produce a representation
911     ## that is relevant to the agent.
912     },
913 wakaba 1.1 version => sub { 1 }, # no value constraint
914 wakaba 1.8 }, {
915     uri => FEATURE_RFC4287,
916     version => FEATURE_RFC4287,
917 wakaba 1.1 }),
918    
919 wakaba 1.8 ## NOTE: Elements are not explicitly disallowed.
920 wakaba 1.1
921 wakaba 1.8 ## NOTE: Content MUST be a string that is a human-readable name for
922     ## the generating agent.
923 wakaba 1.1 };
924    
925     $Element->{$ATOM_NS}->{icon} = {
926 wakaba 1.8 %AtomChecker,
927     check_start => sub {
928     my ($self, $item, $element_state) = @_;
929     $element_state->{value} = '';
930     },
931     ## NOTE: Elements are not explicitly disallowed.
932     check_child_text => sub {
933     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
934     $element_state->{value} .= $child_node->data;
935     },
936     check_end => sub {
937     my ($self, $item, $element_state) = @_;
938 wakaba 1.1
939 wakaba 1.2 ## NOTE: No MUST.
940     ## NOTE: There MUST NOT be any white space.
941 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
942 wakaba 1.2 my %opt = @_;
943 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
944 wakaba 1.2 type => 'URI::'.$opt{type}.
945     (defined $opt{position} ? ':'.$opt{position} : ''));
946     });
947    
948 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
949    
950 wakaba 1.8 $AtomChecker{check_end}->(@_);
951 wakaba 1.1 },
952     };
953    
954     $Element->{$ATOM_NS}->{id} = {
955 wakaba 1.8 %AtomChecker,
956     check_start => sub {
957     my ($self, $item, $element_state) = @_;
958     $element_state->{value} = '';
959     },
960     ## NOTE: Elements are not explicitly disallowed.
961     check_child_text => sub {
962     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
963     $element_state->{value} .= $child_node->data;
964     },
965     check_end => sub {
966     my ($self, $item, $element_state) = @_;
967 wakaba 1.2
968     ## NOTE: There MUST NOT be any white space.
969 wakaba 1.8 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
970 wakaba 1.2 my %opt = @_;
971 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
972 wakaba 1.2 type => 'URI::'.$opt{type}.
973     (defined $opt{position} ? ':'.$opt{position} : ''));
974     });
975     ## TODO: SHOULD be normalized
976    
977 wakaba 1.8 $AtomChecker{check_end}->(@_);
978 wakaba 1.2 },
979     };
980    
981 wakaba 1.15 my $AtomIMTAttrChecker = sub {
982 wakaba 1.3 my ($self, $attr) = @_;
983     my $value = $attr->value;
984     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
985     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
986     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
987     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
988     my @type = ($1, $2);
989     my $param = $3;
990     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
991     if (defined $2) {
992     push @type, $1 => $2;
993     } else {
994     my $n = $1;
995     my $v = $2;
996     $v =~ s/\\(.)/$1/gs;
997     push @type, $n => $v;
998     }
999     }
1000     require Whatpm::IMTChecker;
1001     Whatpm::IMTChecker->check_imt (sub {
1002     my %opt = @_;
1003     $self->{onerror}->(node => $attr, level => $opt{level},
1004     type => 'IMT:'.$opt{type});
1005     }, @type);
1006     } else {
1007     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1008     }
1009 wakaba 1.15 }; # $AtomIMTAttrChecker
1010    
1011     my $AtomIRIReferenceAttrChecker = sub {
1012     my ($self, $attr) = @_;
1013     ## NOTE: There MUST NOT be any white space.
1014     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1015     my %opt = @_;
1016     $self->{onerror}->(node => $attr, level => $opt{level},
1017     type => 'URI::'.$opt{type}.
1018     (defined $opt{position} ? ':'.$opt{position} : ''));
1019     });
1020     }; # $AtomIRIReferenceAttrChecker
1021    
1022     $Element->{$ATOM_NS}->{link} = {
1023     %AtomChecker,
1024     check_attrs => $GetAtomAttrsChecker->({
1025     href => $AtomIRIReferenceAttrChecker,
1026     hreflang => $AtomLanguageTagAttrChecker,
1027     length => sub { }, # No MUST; in octets.
1028     rel => sub { # MUST
1029     my ($self, $attr) = @_;
1030     my $value = $attr->value;
1031     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1032     $value = $LINK_REL . $value;
1033     }
1034    
1035     ## NOTE: There MUST NOT be any white space.
1036     Whatpm::URIChecker->check_iri ($value, sub {
1037     my %opt = @_;
1038     $self->{onerror}->(node => $attr, level => $opt{level},
1039     type => 'URI::'.$opt{type}.
1040     (defined $opt{position} ? ':'.$opt{position} : ''));
1041     });
1042    
1043     ## TODO: Warn if unregistered
1044 wakaba 1.16
1045     ## TODO: rel=license [RFC 4946]
1046     ## MUST NOT multiple rel=license with same href="",type="" pairs
1047     ## href="" SHOULD be dereferencable
1048     ## title="" SHOULD be there if multiple rel=license
1049     ## MUST NOT "unspecified" and other rel=license
1050 wakaba 1.3 },
1051 wakaba 1.15 title => sub { }, # No MUST
1052     type => $AtomIMTAttrChecker,
1053     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1054 wakaba 1.8 }, {
1055     href => FEATURE_RFC4287,
1056     hreflang => FEATURE_RFC4287,
1057     length => FEATURE_RFC4287,
1058     rel => FEATURE_RFC4287,
1059     title => FEATURE_RFC4287,
1060     type => FEATURE_RFC4287,
1061 wakaba 1.16
1062     ## TODO: thr:count
1063     ## TODO: thr:updated
1064 wakaba 1.2 }),
1065 wakaba 1.8 check_start => sub {
1066     my ($self, $item, $element_state) = @_;
1067 wakaba 1.2
1068 wakaba 1.8 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1069     $self->{onerror}->(node => $item->{node},
1070 wakaba 1.2 type => 'attribute missing:href');
1071     }
1072    
1073 wakaba 1.8 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1074     not $item->{node}->has_attribute_ns (undef, 'length')) {
1075     $self->{onerror}->(node => $item->{node}, level => 's',
1076 wakaba 1.2 type => 'attribute missing:length');
1077     }
1078     },
1079     };
1080    
1081     $Element->{$ATOM_NS}->{logo} = {
1082 wakaba 1.8 %AtomChecker,
1083     ## NOTE: Child elements are not explicitly disallowed
1084     check_start => sub {
1085     my ($self, $item, $element_state) = @_;
1086     $element_state->{value} = '';
1087     },
1088     ## NOTE: Elements are not explicitly disallowed.
1089     check_child_text => sub {
1090     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1091     $element_state->{value} .= $child_node->data;
1092     },
1093     check_end => sub {
1094     my ($self, $item, $element_state) = @_;
1095 wakaba 1.1
1096 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1097 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1098 wakaba 1.2 my %opt = @_;
1099 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
1100 wakaba 1.2 type => 'URI::'.$opt{type}.
1101     (defined $opt{position} ? ':'.$opt{position} : ''));
1102     });
1103    
1104     ## NOTE: Image SHOULD be 2:1
1105    
1106 wakaba 1.8 $AtomChecker{check_end}->(@_);
1107 wakaba 1.2 },
1108     };
1109    
1110 wakaba 1.8 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1111 wakaba 1.2
1112 wakaba 1.8 $Element->{$ATOM_NS}->{rights} = \%AtomDateConstruct;
1113 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1114 wakaba 1.2
1115     $Element->{$ATOM_NS}->{source} = {
1116 wakaba 1.8 %AtomChecker,
1117     check_child_element => sub {
1118     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1119     $child_is_transparent, $element_state) = @_;
1120    
1121     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1122     $self->{onerror}->(node => $child_el,
1123     type => 'element not allowed:minus',
1124     level => $self->{must_level});
1125     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1126     #
1127     } elsif ($child_nsuri eq $ATOM_NS) {
1128     my $not_allowed;
1129     if ($child_ln eq 'entry') {
1130     $element_state->{has_element}->{entry} = 1;
1131     } elsif ({
1132     generator => 1,
1133     icon => 1,
1134     id => 1,
1135     logo => 1,
1136     rights => 1,
1137     subtitle => 1,
1138     title => 1,
1139     updated => 1,
1140     }->{$child_ln}) {
1141     unless ($element_state->{has_element}->{$child_ln}) {
1142     $element_state->{has_element}->{$child_ln} = 1;
1143     $not_allowed = $element_state->{has_element}->{entry};
1144     } else {
1145     $not_allowed = 1;
1146     }
1147     } elsif ($child_ln eq 'link') {
1148     if ($child_ln->rel eq $LINK_REL . 'alternate') {
1149     my $type = $child_ln->get_attribute_ns (undef, 'type');
1150     $type = '' unless defined $type;
1151     my $hreflang = $child_ln->get_attribute_ns (undef, 'hreflang');
1152     $hreflang = '' unless defined $hreflang;
1153     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1154     (defined $hreflang ? ':'.$hreflang : '');
1155     unless ($element_state->{has_element}->{$key}) {
1156     $element_state->{has_element}->{$key} = 1;
1157 wakaba 1.2 } else {
1158     $not_allowed = 1;
1159     }
1160     }
1161 wakaba 1.8 $not_allowed ||= $element_state->{has_element}->{entry};
1162     } elsif ({
1163     category => 1,
1164     contributor => 1,
1165     }->{$child_ln}) {
1166     $not_allowed = $element_state->{has_element}->{entry};
1167 wakaba 1.9 } elsif ($child_ln eq 'author') {
1168     $not_allowed = $element_state->{has_element}->{entry};
1169     $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1170 wakaba 1.8 } else {
1171     $not_allowed = 1;
1172     }
1173     if ($not_allowed) {
1174     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1175 wakaba 1.2 }
1176 wakaba 1.8 } else {
1177     ## TODO: extension element
1178     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1179     }
1180     },
1181     check_child_text => sub {
1182     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1183     if ($has_significant) {
1184     $self->{onerror}->(node => $child_node, type => 'character not allowed',
1185     level => $self->{must_level});
1186 wakaba 1.2 }
1187 wakaba 1.1 },
1188     };
1189 wakaba 1.2
1190 wakaba 1.8 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1191 wakaba 1.2
1192 wakaba 1.8 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1193 wakaba 1.2
1194 wakaba 1.8 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1195 wakaba 1.2
1196 wakaba 1.8 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1197 wakaba 1.2
1198     ## TODO: signature element
1199    
1200     ## TODO: simple extension element and structured extension element
1201 wakaba 1.1
1202 wakaba 1.15 ## -- Atom Threading 1.0 [RFC 4685]
1203    
1204     $Element->{$THR_NS}->{''} = {
1205     %AtomChecker,
1206     status => 0,
1207     };
1208    
1209     ## ISSUE: Strictly speaking, thr:* element/attribute,
1210     ## where * is an undefined local name, is not disallowed.
1211    
1212     $Element->{$THR_NS}->{'in-reply-to'} = {
1213     %AtomChecker,
1214     status => FEATURE_RFC4685,
1215     check_attrs => $GetAtomAttrsChecker->({
1216     href => $AtomIRIReferenceAttrChecker,
1217     ## TODO: fact-level.
1218     ## TODO: MUST be dereferencable.
1219     ref => sub {
1220     my ($self, $attr, $item, $element_state) = @_;
1221     $element_state->{has_ref} = 1;
1222    
1223     ## NOTE: Same as |atom:id|.
1224     ## NOTE: There MUST NOT be any white space.
1225     Whatpm::URIChecker->check_iri ($attr->value, sub {
1226     my %opt = @_;
1227     $self->{onerror}->(node => $attr, level => $opt{level},
1228     type => 'URI::'.$opt{type}.
1229     (defined $opt{position} ? ':'.$opt{position} : ''));
1230     });
1231    
1232     ## TODO: Check against ID guideline...
1233     },
1234     source => $AtomIRIReferenceAttrChecker,
1235     ## TODO: fact-level.
1236     ## TODO: MUST be dereferencable.
1237     type => $AtomIMTAttrChecker,
1238     ## TODO: fact-level.
1239     }, {
1240     href => FEATURE_RFC4685,
1241     source => FEATURE_RFC4685,
1242     ref => FEATURE_RFC4685,
1243     type => FEATURE_RFC4685,
1244     }),
1245     check_end => sub {
1246     my ($self, $item, $element_state) = @_;
1247    
1248     unless ($element_state->{has_ref}) {
1249     $self->{onerror}->(node => $item->{node},
1250     type => 'attribute missing:ref',
1251     level => $self->{must_level});
1252     }
1253    
1254     $AtomChecker{check_end}->(@_);
1255     },
1256     ## NOTE: Content model has no constraint.
1257     };
1258 wakaba 1.16
1259     $Element->{$THR_NS}->{total} = {
1260     %AtomChecker,
1261     check_start => sub {
1262     my ($self, $item, $element_state) = @_;
1263     $element_state->{value} = '';
1264     },
1265     check_child_element => sub {
1266     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1267     $child_is_transparent, $element_state) = @_;
1268    
1269     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1270     $self->{onerror}->(node => $child_el,
1271     type => 'element not allowed:minus',
1272     level => $self->{must_level});
1273     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1274     #
1275     } else {
1276     $self->{onerror}->(node => $child_el,
1277     type => 'element not allowed',
1278     level => $self->{must_level});
1279     }
1280     },
1281     check_child_text => sub {
1282     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1283     $element_state->{value} .= $child_node->data;
1284     },
1285     check_end => sub {
1286     my ($self, $item, $element_state) = @_;
1287    
1288     ## NOTE: xsd:nonNegativeInteger
1289     unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1290     $self->{onerror}->(node => $item->{node},
1291     type => 'syntax error', ## TODO:
1292     level => $self->{must_level});
1293     }
1294    
1295     $AtomChecker{check_end}->(@_);
1296     },
1297     };
1298    
1299     ## TODO: fh:complete
1300    
1301     ## TODO: fh:archive
1302    
1303     ## TODO: Check as archive document, page feed document, ...
1304    
1305     ## TODO: APP [RFC 5023]
1306 wakaba 1.15
1307 wakaba 1.1 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1308 wakaba 1.15 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1309 wakaba 1.1
1310     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24