/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15 - (hide annotations) (download)
Thu Mar 20 09:38:47 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.14: +107 -35 lines
++ whatpm/t/ChangeLog	20 Mar 2008 09:30:57 -0000
	* ContentChecker.t: |content-model-atom-threading-1.dat|
	added.

	* content-model-atom-threading-1.dat: New test file.

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Mar 2008 09:31:15 -0000
	* Atom.pm: Support for |thr:in-reply-to| element.

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.15 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
10 wakaba 1.1
11 wakaba 1.8 sub FEATURE_RFC4287 () {
12     Whatpm::ContentChecker::FEATURE_STATUS_CR |
13     Whatpm::ContentChecker::FEATURE_ALLOWED
14     }
15    
16 wakaba 1.15 sub FEATURE_RFC4685 () {
17     Whatpm::ContentChecker::FEATURE_STATUS_CR |
18     Whatpm::ContentChecker::FEATURE_ALLOWED
19     }
20    
21 wakaba 1.1 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
22    
23     ## NOTE: Commants and PIs are not explicitly allowed.
24    
25     our $AttrChecker;
26    
27     ## Any element MAY have xml:base, xml:lang
28     my $GetAtomAttrsChecker = sub {
29     my $element_specific_checker = shift;
30 wakaba 1.8 my $element_specific_status = shift;
31 wakaba 1.1 return sub {
32 wakaba 1.8 my ($self, $todo, $element_state) = @_;
33 wakaba 1.1 for my $attr (@{$todo->{node}->attributes}) {
34     my $attr_ns = $attr->namespace_uri;
35     $attr_ns = '' unless defined $attr_ns;
36     my $attr_ln = $attr->manakai_local_name;
37     my $checker;
38     if ($attr_ns eq '') {
39     $checker = $element_specific_checker->{$attr_ln};
40     } else {
41     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
42     || $AttrChecker->{$attr_ns}->{''};
43     }
44     if ($checker) {
45 wakaba 1.8 $checker->($self, $attr, $todo, $element_state);
46     } elsif ($attr_ln eq '') {
47     #
48 wakaba 1.1 } else {
49     $self->{onerror}->(node => $attr, level => 'unsupported',
50     type => 'attribute');
51     ## ISSUE: No comformance createria for unknown attributes in the spec
52     }
53 wakaba 1.8
54     if ($attr_ns eq '') {
55     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
56     }
57     ## TODO: global attribute
58 wakaba 1.1 }
59     };
60     }; # $GetAtomAttrsChecker
61    
62 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
63     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
64    
65     my ($self, $attr) = @_;
66     my $value = $attr->value;
67     require Whatpm::LangTag;
68     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
69     my %opt = @_;
70     my $type = 'LangTag:'.$opt{type};
71     $type .= ':' . $opt{subtag} if defined $opt{subtag};
72     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
73     level => $opt{level});
74     });
75     ## ISSUE: RFC 4646 (3066bis)?
76     }; # $AtomLanguageTagAttrChecker
77    
78 wakaba 1.8 my %AtomChecker = (
79     %Whatpm::ContentChecker::AnyChecker,
80     status => FEATURE_RFC4287,
81     check_attrs => $GetAtomAttrsChecker->({}, {}),
82     );
83    
84     my %AtomTextConstruct = (
85     %AtomChecker,
86     check_start => sub {
87     my ($self, $item, $element_state) = @_;
88     $element_state->{type} = 'text';
89     $element_state->{value} = '';
90     },
91     check_attrs => $GetAtomAttrsChecker->({
92     type => sub {
93     my ($self, $attr, $item, $element_state) = @_;
94     my $value = $attr->value;
95     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
96     $element_state->{type} = $value;
97 wakaba 1.1 } else {
98 wakaba 1.8 ## NOTE: IMT MUST NOT be used here.
99 wakaba 1.1 $self->{onerror}->(node => $attr, type => 'keyword:invalid');
100     }
101 wakaba 1.8 }, # checked in |checker|
102     }, {
103     type => FEATURE_RFC4287,
104     }),
105     check_child_element => sub {
106     my ($self, $item, $child_el, $child_nsuri, $child_ln,
107     $child_is_transparent, $element_state) = @_;
108     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
109     $self->{onerror}->(node => $child_el,
110     type => 'element not allowed:minus',
111     level => $self->{must_level});
112     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
113     #
114     } else {
115     if ($element_state->{type} eq 'text' or
116     $element_state->{type} eq 'html') { # MUST NOT
117     $self->{onerror}->(node => $child_el,
118     type => 'element not allowed:atom|TextConstruct',
119     level => $self->{must_level});
120     } elsif ($element_state->{type} eq 'xhtml') {
121     if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
122     $child_ln eq 'div') { # MUST
123     if ($element_state->{has_div}) {
124     $self->{onerror}
125     ->(node => $child_el,
126     type => 'element not allowed:atom|TextConstruct',
127     level => $self->{must_level});
128     } else {
129     $element_state->{has_div} = 1;
130 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
131     }
132 wakaba 1.8 } else {
133     $self->{onerror}->(node => $child_el,
134     type => 'element not allowed:atom|TextConstruct',
135     level => $self->{must_level});
136 wakaba 1.1 }
137 wakaba 1.8 } else {
138     die "atom:TextConstruct type error: $element_state->{type}";
139 wakaba 1.1 }
140 wakaba 1.8 }
141     },
142     check_child_text => sub {
143     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
144     if ($element_state->{type} eq 'text') {
145     #
146     } elsif ($element_state->{type} eq 'html') {
147     $element_state->{value} .= $child_node->text_content;
148     ## NOTE: Markup MUST be escaped.
149     } elsif ($element_state->{type} eq 'xhtml') {
150     if ($has_significant) {
151     $self->{onerror}->(node => $child_node,
152     type => 'character not allowed:atom|TextConstruct',
153     level => $self->{must_level});
154 wakaba 1.1 }
155 wakaba 1.8 } else {
156     die "atom:TextConstruct type error: $element_state->{type}";
157 wakaba 1.1 }
158     },
159 wakaba 1.8 check_end => sub {
160     my ($self, $item, $element_state) = @_;
161 wakaba 1.13 if ($element_state->{type} eq 'xhtml') {
162     unless ($element_state->{has_div}) {
163     $self->{onerror}->(node => $item->{node},
164     type => 'element missing:div',
165     level => $self->{must_level});
166     }
167     } elsif ($element_state->{type} eq 'html') {
168     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
169     # markup MUST be escaped
170     $self->{onsubdoc}->({s => $element_state->{value},
171     container_node => $item->{node},
172     media_type => 'text/html',
173     inner_html_element => 'div',
174     is_char_string => 1});
175 wakaba 1.8 }
176 wakaba 1.1
177 wakaba 1.8 $AtomChecker{check_end}->(@_);
178     },
179     ); # %AtomTextConstruct
180 wakaba 1.1
181 wakaba 1.8 my %AtomPersonConstruct = (
182     %AtomChecker,
183     check_child_element => sub {
184     my ($self, $item, $child_el, $child_nsuri, $child_ln,
185     $child_is_transparent, $element_state) = @_;
186     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
187     $self->{onerror}->(node => $child_el,
188     type => 'element not allowed:minus',
189     level => $self->{must_level});
190     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
191     #
192     } elsif ($child_nsuri eq $ATOM_NS) {
193     if ($child_ln eq 'name') {
194     if ($element_state->{has_name}) {
195     $self->{onerror}
196     ->(node => $child_el,
197     type => 'element not allowed:atom|PersonConstruct',
198     level => $self->{must_level});
199     } else {
200     $element_state->{has_name} = 1;
201     }
202     } elsif ($child_ln eq 'uri') {
203     if ($element_state->{has_uri}) {
204     $self->{onerror}
205     ->(node => $child_el,
206     type => 'element not allowed:atom|PersonConstruct',
207     level => $self->{must_level});
208 wakaba 1.1 } else {
209 wakaba 1.8 $element_state->{has_uri} = 1;
210 wakaba 1.1 }
211 wakaba 1.8 } elsif ($child_ln eq 'email') {
212     if ($element_state->{has_email}) {
213     $self->{onerror}
214     ->(node => $child_el,
215     type => 'element not allowed:atom|PersonConstruct',
216     level => $self->{must_level});
217     } else {
218     $element_state->{has_email} = 1;
219 wakaba 1.7 }
220 wakaba 1.8 } else {
221     $self->{onerror}
222     ->(node => $child_el,
223     type => 'element not allowed:atom|PersonConstruct',
224     level => $self->{must_level});
225 wakaba 1.1 }
226 wakaba 1.8 } else {
227     $self->{onerror}
228     ->(node => $child_el,
229     type => 'element not allowed:atom|PersonConstruct',
230     level => $self->{must_level});
231 wakaba 1.1 }
232 wakaba 1.8 ## TODO: extension element
233     },
234     check_child_text => sub {
235     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
236     if ($has_significant) {
237     $self->{onerror}->(node => $child_node,
238     type => 'character not allowed:atom|PersonConstruct',
239     level => $self->{must_level});
240     }
241     },
242     check_end => sub {
243     my ($self, $item, $element_state) = @_;
244 wakaba 1.1
245 wakaba 1.8 unless ($element_state->{has_name}) {
246     $self->{onerror}->(node => $item->{node},
247     type => 'element missing:atom|name',
248     level => $self->{must_level});
249 wakaba 1.1 }
250    
251 wakaba 1.8 $AtomChecker{check_end}->(@_);
252 wakaba 1.1 },
253 wakaba 1.8 ); # %AtomPersonConstruct
254 wakaba 1.1
255 wakaba 1.2 our $Element;
256    
257 wakaba 1.8 $Element->{$ATOM_NS}->{''} = {
258     %AtomChecker,
259     status => 0,
260     };
261    
262 wakaba 1.2 $Element->{$ATOM_NS}->{name} = {
263 wakaba 1.8 %AtomChecker,
264    
265 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:name
266     ## element outside of Person construct is not defined.
267    
268 wakaba 1.8 ## NOTE: No constraint.
269 wakaba 1.2 };
270    
271     $Element->{$ATOM_NS}->{uri} = {
272 wakaba 1.8 %AtomChecker,
273    
274 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:uri
275     ## element outside of Person construct is not defined.
276 wakaba 1.8
277     ## NOTE: Elements are not explicitly disallowed.
278    
279     check_start => sub {
280     my ($self, $item, $element_state) = @_;
281     $element_state->{value} = '';
282     },
283     check_child_text => sub {
284     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
285     $element_state->{value} .= $child_node->data;
286     },
287     check_end => sub {
288     my ($self, $item, $element_state) = @_;
289 wakaba 1.2
290     ## NOTE: There MUST NOT be any white space.
291 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
292 wakaba 1.2 my %opt = @_;
293 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
294 wakaba 1.2 type => 'URI::'.$opt{type}.
295     (defined $opt{position} ? ':'.$opt{position} : ''));
296     });
297    
298 wakaba 1.8 $AtomChecker{check_end}->(@_);
299 wakaba 1.2 },
300     };
301    
302     $Element->{$ATOM_NS}->{email} = {
303 wakaba 1.8 %AtomChecker,
304    
305 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:email
306     ## element outside of Person construct is not defined.
307 wakaba 1.8
308     ## NOTE: Elements are not explicitly disallowed.
309    
310     check_end => sub {
311     my ($self, $item, $element_state) = @_;
312 wakaba 1.2
313     ## TODO: addr-spec
314 wakaba 1.8 $self->{onerror}->(node => $item->{node},
315     type => 'addr-spec not supported',
316     level => $self->{unsupported_level});
317 wakaba 1.2
318 wakaba 1.8 $AtomChecker{check_end}->(@_);
319 wakaba 1.2 },
320     };
321    
322 wakaba 1.1 ## MUST NOT be any white space
323 wakaba 1.8 my %AtomDateConstruct = (
324     %AtomChecker,
325    
326     ## NOTE: It does not explicitly say that there MUST NOT be any element.
327    
328     check_start => sub {
329     my ($self, $item, $element_state) = @_;
330     $element_state->{value} = '';
331     },
332     check_child_text => sub {
333     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
334     $element_state->{value} .= $child_node->data;
335     },
336     check_end => sub {
337     my ($self, $item, $element_state) = @_;
338 wakaba 1.1
339 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
340 wakaba 1.8 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
341 wakaba 1.4 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
342     = ($1, $2, $3, $4, $5, $6, $7, $8);
343 wakaba 1.8 my $node = $item->{node};
344 wakaba 1.4
345     ## Check additional constraints described or referenced in
346     ## comments of ABNF rules for |date-time|.
347     my $level = $self->{must_level};
348     if (0 < $M and $M < 13) {
349     $self->{onerror}->(node => $node, type => 'datetime:bad day',
350     level => $level)
351     if $d < 1 or
352     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
353     $self->{onerror}->(node => $node, type => 'datetime:bad day',
354     level => $level)
355     if $M == 2 and $d == 29 and
356     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
357     } else {
358     $self->{onerror}->(node => $node, type => 'datetime:bad month',
359     level => $level);
360     }
361     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
362     level => $level) if $h > 23;
363     $self->{onerror}->(node => $node, type => 'datetime:bad minute',
364     level => $level) if $m > 59;
365     $self->{onerror}->(node => $node, type => 'datetime:bad second',
366     level => $level)
367     if $s > 60; ## NOTE: Validness of leap seconds are not checked.
368     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
369     level => $level) if $zh > 23;
370     $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
371     level => $level) if $zm > 59;
372     } else {
373 wakaba 1.8 $self->{onerror}->(node => $item->{node},
374 wakaba 1.4 type => 'datetime:syntax error',
375     level => $self->{must_level});
376     }
377     ## NOTE: SHOULD be accurate as possible (cannot be checked)
378 wakaba 1.1
379 wakaba 1.8 $AtomChecker{check_end}->(@_);
380 wakaba 1.1 },
381 wakaba 1.8 ); # %AtomDateConstruct
382 wakaba 1.1
383 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
384 wakaba 1.8 %AtomChecker,
385 wakaba 1.1 is_root => 1,
386 wakaba 1.8 check_child_element => sub {
387     my ($self, $item, $child_el, $child_nsuri, $child_ln,
388     $child_is_transparent, $element_state) = @_;
389    
390     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
391    
392     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
393     $self->{onerror}->(node => $child_el,
394     type => 'element not allowed:minus',
395     level => $self->{must_level});
396     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
397     #
398     } elsif ($child_nsuri eq $ATOM_NS) {
399     my $not_allowed;
400     if ({ # MUST (0, 1)
401     content => 1,
402     id => 1,
403     published => 1,
404     rights => 1,
405     source => 1,
406     summary => 1,
407     title => 1,
408     updated => 1,
409     }->{$child_ln}) {
410     unless ($element_state->{has_element}->{$child_ln}) {
411     $element_state->{has_element}->{$child_ln} = 1;
412     $not_allowed = $element_state->{has_element}->{entry};
413     } else {
414     $not_allowed = 1;
415     }
416     } elsif ($child_ln eq 'link') { # MAY
417     if ($child_el->rel eq $LINK_REL . 'alternate') {
418     my $type = $child_el->get_attribute_ns (undef, 'type');
419     $type = '' unless defined $type;
420     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
421     $hreflang = '' unless defined $hreflang;
422     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
423     (defined $hreflang ? ':'.$hreflang : '');
424     unless ($element_state->{has_element}->{$key}) {
425     $element_state->{has_element}->{$key} = 1;
426     $element_state->{has_element}->{'link.alternate'} = 1;
427 wakaba 1.1 } else {
428     $not_allowed = 1;
429     }
430     }
431 wakaba 1.8
432     ## NOTE: MAY
433     $not_allowed ||= $element_state->{has_element}->{entry};
434     } elsif ({ # MAY
435     category => 1,
436     contributor => 1,
437     }->{$child_ln}) {
438     $not_allowed = $element_state->{has_element}->{entry};
439 wakaba 1.9 } elsif ($child_ln eq 'author') { # MAY
440     $not_allowed = $element_state->{has_element}->{entry};
441 wakaba 1.12 $element_state->{has_author} = 1; # ./author | ./source/author
442     $element_state->{has_element}->{$child_ln} = 1; # ./author
443 wakaba 1.8 } else {
444     $not_allowed = 1;
445     }
446     if ($not_allowed) {
447     $self->{onerror}->(node => $child_el, type => 'element not allowed');
448 wakaba 1.1 }
449 wakaba 1.15 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
450     ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
451     ## defined in RFC 4685.
452     #
453 wakaba 1.8 } else {
454     ## TODO: extension element
455     $self->{onerror}->(node => $child_el, type => 'element not allowed');
456     }
457     },
458     check_child_text => sub {
459     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
460     if ($has_significant) {
461     $self->{onerror}->(node => $child_node, type => 'character not allowed',
462     level => $self->{must_level});
463 wakaba 1.1 }
464 wakaba 1.8 },
465     check_end => sub {
466     my ($self, $item, $element_state) = @_;
467 wakaba 1.1
468 wakaba 1.9 if ($element_state->{has_author}) {
469     ## NOTE: There is either a child atom:author element
470     ## or a child atom:source element which contains an atom:author
471     ## child element.
472     #
473     } else {
474     A: {
475     my $root = $item->{node}->owner_document->document_element;
476     if ($root and $root->manakai_local_name eq 'feed') {
477     my $nsuri = $root->namespace_uri;
478     if (defined $nsuri and $nsuri eq $ATOM_NS) {
479     ## NOTE: An Atom Feed Document.
480     for my $root_child (@{$root->child_nodes}) {
481     ## NOTE: Entity references are not supported.
482     next unless $root_child->node_type == 1; # ELEMENT_NODE
483     next unless $root_child->manakai_local_name eq 'author';
484     my $root_child_nsuri = $root_child->namespace_uri;
485     next unless defined $root_child_nsuri;
486     next unless $root_child_nsuri eq $ATOM_NS;
487     last A;
488     }
489     }
490     }
491    
492     $self->{onerror}->(node => $item->{node},
493     type => 'element missing:atom|author',
494     level => $self->{must_level});
495     } # A
496     }
497    
498 wakaba 1.12 unless ($element_state->{has_element}->{author}) {
499     $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
500     }
501    
502 wakaba 1.1 ## TODO: If entry's with same id, then updated SHOULD be different
503    
504 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
505     $self->{onerror}->(node => $item->{node},
506     type => 'element missing:atom|id');
507     }
508     unless ($element_state->{has_element}->{title}) { # MUST
509     $self->{onerror}->(node => $item->{node},
510     type => 'element missing:atom|title');
511     }
512     unless ($element_state->{has_element}->{updated}) { # MUST
513     $self->{onerror}->(node => $item->{node},
514     type => 'element missing:atom|updated');
515     }
516     if (not $element_state->{has_element}->{content} and
517     not $element_state->{has_element}->{'link.alternate'}) {
518     $self->{onerror}->(node => $item->{node},
519     type => 'element missing:atom|link|alternate');
520 wakaba 1.1 }
521 wakaba 1.14
522     if ($element_state->{require_summary} and
523     not $element_state->{has_element}->{summary}) {
524     $self->{onerror}->(node => $item->{node},
525     type => 'element missing:atom|summary',
526     level => $self->{must_level});
527     }
528 wakaba 1.1 },
529     };
530    
531     $Element->{$ATOM_NS}->{feed} = {
532 wakaba 1.8 %AtomChecker,
533 wakaba 1.1 is_root => 1,
534 wakaba 1.8 check_child_element => sub {
535     my ($self, $item, $child_el, $child_nsuri, $child_ln,
536     $child_is_transparent, $element_state) = @_;
537    
538     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
539    
540     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
541     $self->{onerror}->(node => $child_el,
542     type => 'element not allowed:minus',
543     level => $self->{must_level});
544     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
545     #
546     } elsif ($child_nsuri eq $ATOM_NS) {
547     my $not_allowed;
548     if ($child_ln eq 'entry') {
549     $element_state->{has_element}->{entry} = 1;
550     } elsif ({ # MUST (0, 1)
551     generator => 1,
552     icon => 1,
553     id => 1,
554     logo => 1,
555     rights => 1,
556     subtitle => 1,
557     title => 1,
558     updated => 1,
559     }->{$child_ln}) {
560     unless ($element_state->{has_element}->{$child_ln}) {
561     $element_state->{has_element}->{$child_ln} = 1;
562     $not_allowed = $element_state->{has_element}->{entry};
563     } else {
564     $not_allowed = 1;
565     }
566     } elsif ($child_ln eq 'link') {
567     my $rel = $child_el->rel;
568     if ($rel eq $LINK_REL . 'alternate') {
569     my $type = $child_el->get_attribute_ns (undef, 'type');
570     $type = '' unless defined $type;
571     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
572     $hreflang = '' unless defined $hreflang;
573     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
574     (defined $hreflang ? ':'.$hreflang : '');
575     unless ($element_state->{has_element}->{$key}) {
576     $element_state->{has_element}->{$key} = 1;
577 wakaba 1.1 } else {
578     $not_allowed = 1;
579     }
580 wakaba 1.8 } elsif ($rel eq $LINK_REL . 'self') {
581     $element_state->{has_element}->{'link.self'} = 1;
582 wakaba 1.1 }
583 wakaba 1.8
584     ## NOTE: MAY
585     $not_allowed = $element_state->{has_element}->{entry};
586     } elsif ({ # MAY
587     category => 1,
588     contributor => 1,
589     }->{$child_ln}) {
590     $not_allowed = $element_state->{has_element}->{entry};
591 wakaba 1.12 } elsif ($child_ln eq 'author') { # MAY
592     $not_allowed = $element_state->{has_element}->{entry};
593     $element_state->{has_element}->{author} = 1;
594 wakaba 1.8 } else {
595     $not_allowed = 1;
596 wakaba 1.1 }
597 wakaba 1.8 $self->{onerror}->(node => $child_el, type => 'element not allowed')
598     if $not_allowed;
599     } else {
600     ## TODO: extension element
601     $self->{onerror}->(node => $child_el, type => 'element not allowed');
602 wakaba 1.1 }
603 wakaba 1.8 },
604     check_child_text => sub {
605     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
606     if ($has_significant) {
607     $self->{onerror}->(node => $child_node, type => 'character not allowed',
608     level => $self->{must_level});
609     }
610     },
611     check_end => sub {
612     my ($self, $item, $element_state) = @_;
613 wakaba 1.10
614 wakaba 1.12 if ($element_state->{has_no_author_entry} and
615     not $element_state->{has_element}->{author}) {
616 wakaba 1.10 $self->{onerror}->(node => $item->{node},
617     type => 'element missing:atom|author',
618     level => $self->{must_level});
619 wakaba 1.11 ## ISSUE: If there is no |atom:entry| element,
620     ## there should be an |atom:author| element?
621 wakaba 1.10 }
622 wakaba 1.1
623     ## TODO: If entry's with same id, then updated SHOULD be different
624    
625 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
626     $self->{onerror}->(node => $item->{node},
627     type => 'element missing:atom|id');
628 wakaba 1.1 }
629 wakaba 1.8 unless ($element_state->{has_element}->{title}) { # MUST
630     $self->{onerror}->(node => $item->{node},
631     type => 'element missing:atom|title');
632 wakaba 1.1 }
633 wakaba 1.8 unless ($element_state->{has_element}->{updated}) { # MUST
634     $self->{onerror}->(node => $item->{node},
635     type => 'element missing:atom|updated');
636 wakaba 1.1 }
637 wakaba 1.8 unless ($element_state->{has_element}->{'link.self'}) {
638     $self->{onerror}->(node => $item->{node}, level => 's',
639     type => 'element missing:atom|link|self');
640 wakaba 1.3 }
641 wakaba 1.1
642 wakaba 1.8 $AtomChecker{check_end}->(@_);
643 wakaba 1.1 },
644     };
645    
646     $Element->{$ATOM_NS}->{content} = {
647 wakaba 1.8 %AtomChecker,
648     check_start => sub {
649     my ($self, $item, $element_state) = @_;
650     $element_state->{type} = 'text';
651 wakaba 1.13 $element_state->{value} = '';
652 wakaba 1.8 },
653     check_attrs => $GetAtomAttrsChecker->({
654     src => sub {
655     my ($self, $attr, $item, $element_state) = @_;
656    
657     $element_state->{has_src} = 1;
658 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
659 wakaba 1.8
660     ## NOTE: There MUST NOT be any white space.
661     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
662     my %opt = @_;
663     $self->{onerror}->(node => $item->{node}, level => $opt{level},
664     type => 'URI::'.$opt{type}.
665     (defined $opt{position} ? ':'.$opt{position} : ''));
666     });
667     },
668     type => sub {
669     my ($self, $attr, $item, $element_state) = @_;
670    
671     $element_state->{has_type} = 1;
672 wakaba 1.1
673 wakaba 1.8 my $value = $attr->value;
674 wakaba 1.1 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
675     # MUST
676     } else {
677 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
678     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
679     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
680     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
681     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
682     my @type = ($1, $2);
683     my $param = $3;
684     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
685     if (defined $2) {
686     push @type, $1 => $2;
687     } else {
688     my $n = $1;
689     my $v = $2;
690     $v =~ s/\\(.)/$1/gs;
691     push @type, $n => $v;
692     }
693     }
694     require Whatpm::IMTChecker;
695     Whatpm::IMTChecker->check_imt (sub {
696     my %opt = @_;
697     $self->{onerror}->(node => $attr, level => $opt{level},
698     type => 'IMT:'.$opt{type});
699     }, @type);
700     } else {
701 wakaba 1.8 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
702     level => $self->{must_level});
703 wakaba 1.3 }
704 wakaba 1.1 }
705    
706 wakaba 1.8 if ($value =~ m![+/][Xx][Mm][Ll]\z!) {
707     ## ISSUE: There is no definition for "XML media type" in RFC 3023.
708     ## Is |application/xml-dtd| an XML media type?
709     $value = 'xml';
710     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
711     $value = 'mime_text';
712     } elsif ($value =~ m!^(?>message|multipart)/!i) {
713     $self->{onerror}->(node => $attr, type => 'IMT:composite',
714     level => $self->{must_level});
715 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
716     } else {
717     $item->{parent_state}->{require_summary} = 1;
718 wakaba 1.8 }
719 wakaba 1.1
720 wakaba 1.8 $element_state->{type} = $value;
721     },
722     }, {
723     src => FEATURE_RFC4287,
724     type => FEATURE_RFC4287,
725     }),
726     check_child_element => sub {
727     my ($self, $item, $child_el, $child_nsuri, $child_ln,
728     $child_is_transparent, $element_state) = @_;
729    
730     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
731     $self->{onerror}->(node => $child_el,
732     type => 'element not allowed:minus',
733     level => $self->{must_level});
734     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
735     #
736     } else {
737     if ($element_state->{type} eq 'text' or
738     $element_state->{type} eq 'html' or
739     $element_state->{type} eq 'mime_text') {
740     # MUST NOT
741     $self->{onerror}->(node => $child_el,
742     type => 'element not allowed:atom|content',
743     level => $self->{must_level});
744     } elsif ($element_state->{type} eq 'xhtml') {
745     if ($element_state->{has_div}) {
746     $self->{onerror}->(node => $child_el,
747     type => 'element not allowed:atom|content',
748     level => $self->{must_level});
749     } else {
750     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
751     $element_state->{has_div} = 1;
752     }
753     } elsif ($element_state->{type} eq 'xml') {
754     ## MAY contain elements
755     if ($element_state->{has_src}) {
756     $self->{onerror}->(node => $child_el,
757     type => 'element not allowed:atom|content',
758     level => $self->{must_level});
759 wakaba 1.1 }
760 wakaba 1.8 } else {
761     ## NOTE: Elements are not explicitly disallowed.
762 wakaba 1.1 }
763 wakaba 1.8 }
764     },
765     ## NOTE: If @src, the element MUST be empty. What is "empty"?
766     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
767     ## empty replacement tree shuld be empty, since Atom is defined
768     ## in terms of XML Information Set where entities are expanded.
769     ## (but what if |&e;| is an unexpanded entity?)
770     check_child_text => sub {
771     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
772     if ($has_significant) {
773     if ($element_state->{has_src}) {
774     $self->{onerror}->(node => $child_node,
775     type => 'character not allowed',
776     level => $self->{must_level});
777     } elsif ($element_state->{type} eq 'xhtml' or
778     $element_state->{type} eq 'xml') {
779     $self->{onerror}->(node => $child_node,
780     type => 'character not allowed:atom|content',
781     level => $self->{must_level});
782 wakaba 1.1 }
783 wakaba 1.8 }
784 wakaba 1.1
785 wakaba 1.13 $element_state->{value} .= $child_node->data;
786 wakaba 1.1
787 wakaba 1.8 ## NOTE: type=text/* has no further restriction (i.e. the content don't
788     ## have to conform to the definition of the type).
789     },
790     check_end => sub {
791     my ($self, $item, $element_state) = @_;
792 wakaba 1.1
793 wakaba 1.8 if ($element_state->{has_src}) {
794     if (not $element_state->{has_type}) {
795     $self->{onerror}->(node => $item->{node},
796     type => 'attribute missing:type',
797     level => $self->{should_level});
798 wakaba 1.14 } elsif ($element_state->{type} eq 'text' or
799     $element_state->{type} eq 'html' or
800     $element_state->{type} eq 'xhtml') {
801 wakaba 1.8 $self->{onerror}
802     ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
803     type => 'not IMT', level => $self->{must_level});
804 wakaba 1.1 }
805 wakaba 1.8 }
806 wakaba 1.1
807 wakaba 1.8 if ($element_state->{type} eq 'xhtml') {
808     unless ($element_state->{has_div}) {
809     $self->{onerror}->(node => $item->{node},
810     type => 'element missing:div',
811     level => $self->{must_level});
812 wakaba 1.1 }
813 wakaba 1.11 } elsif ($element_state->{type} eq 'html') {
814     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
815     # markup MUST be escaped
816 wakaba 1.13 $self->{onsubdoc}->({s => $element_state->{value},
817 wakaba 1.11 container_node => $item->{node},
818     media_type => 'text/html',
819     inner_html_element => 'div',
820     is_char_string => 1});
821 wakaba 1.8 } elsif ($element_state->{type} eq 'xml') {
822 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
823 wakaba 1.1 ## If no @src, this would normally mean it contains a
824     ## single child element that would serve as the root element.
825 wakaba 1.8 $self->{onerror}->(node => $item->{node},
826     level => $self->{unsupported_level},
827     type => 'atom|content not supported',
828     value => $item->{node}->get_attribute_ns
829     (undef, 'type'));
830     } elsif ($element_state->{type} eq 'text' or
831     $element_state->{type} eq 'mime-text') {
832     #
833 wakaba 1.1 } else {
834     ## TODO: $s = valid Base64ed [RFC 3548] where
835     ## MAY leading and following "white space" (what?)
836     ## and lines separated by a single U+000A
837 wakaba 1.3
838     ## NOTE: SHOULD be suitable for the indicated media type.
839 wakaba 1.8 $self->{onerror}->(node => $item->{node},
840     level => $self->{unsupported_level},
841     type => 'atom|content not supported',
842     value => $item->{node}->get_attribute_ns
843     (undef, 'type'));
844     }
845 wakaba 1.1
846 wakaba 1.8 $AtomChecker{check_end}->(@_);
847 wakaba 1.1 },
848     };
849 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
850 wakaba 1.1
851 wakaba 1.8 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
852 wakaba 1.1
853     $Element->{$ATOM_NS}->{category} = {
854 wakaba 1.8 %AtomChecker,
855     check_attrs => $GetAtomAttrsChecker->({
856 wakaba 1.1 label => sub { 1 }, # no value constraint
857 wakaba 1.2 scheme => sub { # NOTE: No MUST.
858     my ($self, $attr) = @_;
859     ## NOTE: There MUST NOT be any white space.
860     Whatpm::URIChecker->check_iri ($attr->value, sub {
861     my %opt = @_;
862     $self->{onerror}->(node => $attr, level => $opt{level},
863     type => 'URI::'.$opt{type}.
864     (defined $opt{position} ? ':'.$opt{position} : ''));
865     });
866     },
867 wakaba 1.8 term => sub {
868     my ($self, $attr, $item, $element_state) = @_;
869    
870     ## NOTE: No value constraint.
871    
872     $element_state->{has_term} = 1;
873     },
874     }, {
875     label => FEATURE_RFC4287,
876     scheme => FEATURE_RFC4287,
877     term => FEATURE_RFC4287,
878 wakaba 1.1 }),
879 wakaba 1.8 check_end => sub {
880     my ($self, $item, $element_state) = @_;
881     unless ($element_state->{has_term}) {
882     $self->{onerror}->(node => $item->{node},
883 wakaba 1.1 type => 'attribute missing:term');
884     }
885    
886 wakaba 1.8 $AtomChecker{check_end}->(@_);
887 wakaba 1.1 },
888 wakaba 1.8 ## NOTE: Meaning of content is not defined.
889 wakaba 1.1 };
890    
891 wakaba 1.8 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
892 wakaba 1.6
893     ## TODO: Anything below does not support <html:nest/> yet.
894 wakaba 1.1
895     $Element->{$ATOM_NS}->{generator} = {
896 wakaba 1.8 %AtomChecker,
897     check_attrs => $GetAtomAttrsChecker->({
898 wakaba 1.2 uri => sub { # MUST
899     my ($self, $attr) = @_;
900     ## NOTE: There MUST NOT be any white space.
901     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
902     my %opt = @_;
903     $self->{onerror}->(node => $attr, level => $opt{level},
904     type => 'URI::'.$opt{type}.
905     (defined $opt{position} ? ':'.$opt{position} : ''));
906     });
907     ## NOTE: Dereferencing SHOULD produce a representation
908     ## that is relevant to the agent.
909     },
910 wakaba 1.1 version => sub { 1 }, # no value constraint
911 wakaba 1.8 }, {
912     uri => FEATURE_RFC4287,
913     version => FEATURE_RFC4287,
914 wakaba 1.1 }),
915    
916 wakaba 1.8 ## NOTE: Elements are not explicitly disallowed.
917 wakaba 1.1
918 wakaba 1.8 ## NOTE: Content MUST be a string that is a human-readable name for
919     ## the generating agent.
920 wakaba 1.1 };
921    
922     $Element->{$ATOM_NS}->{icon} = {
923 wakaba 1.8 %AtomChecker,
924     check_start => sub {
925     my ($self, $item, $element_state) = @_;
926     $element_state->{value} = '';
927     },
928     ## NOTE: Elements are not explicitly disallowed.
929     check_child_text => sub {
930     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
931     $element_state->{value} .= $child_node->data;
932     },
933     check_end => sub {
934     my ($self, $item, $element_state) = @_;
935 wakaba 1.1
936 wakaba 1.2 ## NOTE: No MUST.
937     ## NOTE: There MUST NOT be any white space.
938 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
939 wakaba 1.2 my %opt = @_;
940 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
941 wakaba 1.2 type => 'URI::'.$opt{type}.
942     (defined $opt{position} ? ':'.$opt{position} : ''));
943     });
944    
945 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
946    
947 wakaba 1.8 $AtomChecker{check_end}->(@_);
948 wakaba 1.1 },
949     };
950    
951     $Element->{$ATOM_NS}->{id} = {
952 wakaba 1.8 %AtomChecker,
953     check_start => sub {
954     my ($self, $item, $element_state) = @_;
955     $element_state->{value} = '';
956     },
957     ## NOTE: Elements are not explicitly disallowed.
958     check_child_text => sub {
959     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
960     $element_state->{value} .= $child_node->data;
961     },
962     check_end => sub {
963     my ($self, $item, $element_state) = @_;
964 wakaba 1.2
965     ## NOTE: There MUST NOT be any white space.
966 wakaba 1.8 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
967 wakaba 1.2 my %opt = @_;
968 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
969 wakaba 1.2 type => 'URI::'.$opt{type}.
970     (defined $opt{position} ? ':'.$opt{position} : ''));
971     });
972     ## TODO: SHOULD be normalized
973    
974 wakaba 1.8 $AtomChecker{check_end}->(@_);
975 wakaba 1.2 },
976     };
977    
978 wakaba 1.15 my $AtomIMTAttrChecker = sub {
979 wakaba 1.3 my ($self, $attr) = @_;
980     my $value = $attr->value;
981     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
982     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
983     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
984     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
985     my @type = ($1, $2);
986     my $param = $3;
987     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
988     if (defined $2) {
989     push @type, $1 => $2;
990     } else {
991     my $n = $1;
992     my $v = $2;
993     $v =~ s/\\(.)/$1/gs;
994     push @type, $n => $v;
995     }
996     }
997     require Whatpm::IMTChecker;
998     Whatpm::IMTChecker->check_imt (sub {
999     my %opt = @_;
1000     $self->{onerror}->(node => $attr, level => $opt{level},
1001     type => 'IMT:'.$opt{type});
1002     }, @type);
1003     } else {
1004     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1005     }
1006 wakaba 1.15 }; # $AtomIMTAttrChecker
1007    
1008     my $AtomIRIReferenceAttrChecker = sub {
1009     my ($self, $attr) = @_;
1010     ## NOTE: There MUST NOT be any white space.
1011     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1012     my %opt = @_;
1013     $self->{onerror}->(node => $attr, level => $opt{level},
1014     type => 'URI::'.$opt{type}.
1015     (defined $opt{position} ? ':'.$opt{position} : ''));
1016     });
1017     }; # $AtomIRIReferenceAttrChecker
1018    
1019     $Element->{$ATOM_NS}->{link} = {
1020     %AtomChecker,
1021     check_attrs => $GetAtomAttrsChecker->({
1022     href => $AtomIRIReferenceAttrChecker,
1023     hreflang => $AtomLanguageTagAttrChecker,
1024     length => sub { }, # No MUST; in octets.
1025     rel => sub { # MUST
1026     my ($self, $attr) = @_;
1027     my $value = $attr->value;
1028     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1029     $value = $LINK_REL . $value;
1030     }
1031    
1032     ## NOTE: There MUST NOT be any white space.
1033     Whatpm::URIChecker->check_iri ($value, sub {
1034     my %opt = @_;
1035     $self->{onerror}->(node => $attr, level => $opt{level},
1036     type => 'URI::'.$opt{type}.
1037     (defined $opt{position} ? ':'.$opt{position} : ''));
1038     });
1039    
1040     ## TODO: Warn if unregistered
1041 wakaba 1.3 },
1042 wakaba 1.15 title => sub { }, # No MUST
1043     type => $AtomIMTAttrChecker,
1044     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1045 wakaba 1.8 }, {
1046     href => FEATURE_RFC4287,
1047     hreflang => FEATURE_RFC4287,
1048     length => FEATURE_RFC4287,
1049     rel => FEATURE_RFC4287,
1050     title => FEATURE_RFC4287,
1051     type => FEATURE_RFC4287,
1052 wakaba 1.2 }),
1053 wakaba 1.8 check_start => sub {
1054     my ($self, $item, $element_state) = @_;
1055 wakaba 1.2
1056 wakaba 1.8 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1057     $self->{onerror}->(node => $item->{node},
1058 wakaba 1.2 type => 'attribute missing:href');
1059     }
1060    
1061 wakaba 1.8 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1062     not $item->{node}->has_attribute_ns (undef, 'length')) {
1063     $self->{onerror}->(node => $item->{node}, level => 's',
1064 wakaba 1.2 type => 'attribute missing:length');
1065     }
1066     },
1067     };
1068    
1069     $Element->{$ATOM_NS}->{logo} = {
1070 wakaba 1.8 %AtomChecker,
1071     ## NOTE: Child elements are not explicitly disallowed
1072     check_start => sub {
1073     my ($self, $item, $element_state) = @_;
1074     $element_state->{value} = '';
1075     },
1076     ## NOTE: Elements are not explicitly disallowed.
1077     check_child_text => sub {
1078     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1079     $element_state->{value} .= $child_node->data;
1080     },
1081     check_end => sub {
1082     my ($self, $item, $element_state) = @_;
1083 wakaba 1.1
1084 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1085 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1086 wakaba 1.2 my %opt = @_;
1087 wakaba 1.8 $self->{onerror}->(node => $item->{node}, level => $opt{level},
1088 wakaba 1.2 type => 'URI::'.$opt{type}.
1089     (defined $opt{position} ? ':'.$opt{position} : ''));
1090     });
1091    
1092     ## NOTE: Image SHOULD be 2:1
1093    
1094 wakaba 1.8 $AtomChecker{check_end}->(@_);
1095 wakaba 1.2 },
1096     };
1097    
1098 wakaba 1.8 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1099 wakaba 1.2
1100 wakaba 1.8 $Element->{$ATOM_NS}->{rights} = \%AtomDateConstruct;
1101 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1102 wakaba 1.2
1103     $Element->{$ATOM_NS}->{source} = {
1104 wakaba 1.8 %AtomChecker,
1105     check_child_element => sub {
1106     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1107     $child_is_transparent, $element_state) = @_;
1108    
1109     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1110     $self->{onerror}->(node => $child_el,
1111     type => 'element not allowed:minus',
1112     level => $self->{must_level});
1113     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1114     #
1115     } elsif ($child_nsuri eq $ATOM_NS) {
1116     my $not_allowed;
1117     if ($child_ln eq 'entry') {
1118     $element_state->{has_element}->{entry} = 1;
1119     } elsif ({
1120     generator => 1,
1121     icon => 1,
1122     id => 1,
1123     logo => 1,
1124     rights => 1,
1125     subtitle => 1,
1126     title => 1,
1127     updated => 1,
1128     }->{$child_ln}) {
1129     unless ($element_state->{has_element}->{$child_ln}) {
1130     $element_state->{has_element}->{$child_ln} = 1;
1131     $not_allowed = $element_state->{has_element}->{entry};
1132     } else {
1133     $not_allowed = 1;
1134     }
1135     } elsif ($child_ln eq 'link') {
1136     if ($child_ln->rel eq $LINK_REL . 'alternate') {
1137     my $type = $child_ln->get_attribute_ns (undef, 'type');
1138     $type = '' unless defined $type;
1139     my $hreflang = $child_ln->get_attribute_ns (undef, 'hreflang');
1140     $hreflang = '' unless defined $hreflang;
1141     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1142     (defined $hreflang ? ':'.$hreflang : '');
1143     unless ($element_state->{has_element}->{$key}) {
1144     $element_state->{has_element}->{$key} = 1;
1145 wakaba 1.2 } else {
1146     $not_allowed = 1;
1147     }
1148     }
1149 wakaba 1.8 $not_allowed ||= $element_state->{has_element}->{entry};
1150     } elsif ({
1151     category => 1,
1152     contributor => 1,
1153     }->{$child_ln}) {
1154     $not_allowed = $element_state->{has_element}->{entry};
1155 wakaba 1.9 } elsif ($child_ln eq 'author') {
1156     $not_allowed = $element_state->{has_element}->{entry};
1157     $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1158 wakaba 1.8 } else {
1159     $not_allowed = 1;
1160     }
1161     if ($not_allowed) {
1162     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1163 wakaba 1.2 }
1164 wakaba 1.8 } else {
1165     ## TODO: extension element
1166     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1167     }
1168     },
1169     check_child_text => sub {
1170     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1171     if ($has_significant) {
1172     $self->{onerror}->(node => $child_node, type => 'character not allowed',
1173     level => $self->{must_level});
1174 wakaba 1.2 }
1175 wakaba 1.1 },
1176     };
1177 wakaba 1.2
1178 wakaba 1.8 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1179 wakaba 1.2
1180 wakaba 1.8 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1181 wakaba 1.2
1182 wakaba 1.8 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1183 wakaba 1.2
1184 wakaba 1.8 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1185 wakaba 1.2
1186     ## TODO: signature element
1187    
1188     ## TODO: simple extension element and structured extension element
1189 wakaba 1.1
1190 wakaba 1.15 ## -- Atom Threading 1.0 [RFC 4685]
1191    
1192     $Element->{$THR_NS}->{''} = {
1193     %AtomChecker,
1194     status => 0,
1195     };
1196    
1197     ## ISSUE: Strictly speaking, thr:* element/attribute,
1198     ## where * is an undefined local name, is not disallowed.
1199    
1200     $Element->{$THR_NS}->{'in-reply-to'} = {
1201     %AtomChecker,
1202     status => FEATURE_RFC4685,
1203     check_attrs => $GetAtomAttrsChecker->({
1204     href => $AtomIRIReferenceAttrChecker,
1205     ## TODO: fact-level.
1206     ## TODO: MUST be dereferencable.
1207     ref => sub {
1208     my ($self, $attr, $item, $element_state) = @_;
1209     $element_state->{has_ref} = 1;
1210    
1211     ## NOTE: Same as |atom:id|.
1212     ## NOTE: There MUST NOT be any white space.
1213     Whatpm::URIChecker->check_iri ($attr->value, sub {
1214     my %opt = @_;
1215     $self->{onerror}->(node => $attr, level => $opt{level},
1216     type => 'URI::'.$opt{type}.
1217     (defined $opt{position} ? ':'.$opt{position} : ''));
1218     });
1219    
1220     ## TODO: Check against ID guideline...
1221     },
1222     source => $AtomIRIReferenceAttrChecker,
1223     ## TODO: fact-level.
1224     ## TODO: MUST be dereferencable.
1225     type => $AtomIMTAttrChecker,
1226     ## TODO: fact-level.
1227     }, {
1228     href => FEATURE_RFC4685,
1229     source => FEATURE_RFC4685,
1230     ref => FEATURE_RFC4685,
1231     type => FEATURE_RFC4685,
1232     }),
1233     check_end => sub {
1234     my ($self, $item, $element_state) = @_;
1235    
1236     unless ($element_state->{has_ref}) {
1237     $self->{onerror}->(node => $item->{node},
1238     type => 'attribute missing:ref',
1239     level => $self->{must_level});
1240     }
1241    
1242     $AtomChecker{check_end}->(@_);
1243     },
1244     ## NOTE: Content model has no constraint.
1245     };
1246    
1247 wakaba 1.1 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1248 wakaba 1.15 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1249 wakaba 1.1
1250     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24