/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (hide annotations) (download)
Sat Aug 30 10:26:39 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.20: +1 -1 lines
++ whatpm/t/ChangeLog	30 Aug 2008 10:22:30 -0000
	* ContentChecker.t: Updated for latest version of the
	Whatpm::ContentChecker module.

	* content-model-1.dat, content-model-2.dat, content-model-3.dat,
	content-model-4.dat, content-model-6.dat, content-model-atom-1.dat,
	content-model-atom-2.dat, content-model-atom-threading-1.dat,
	table-1.dat: Results updated.

2008-08-30  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	30 Aug 2008 10:24:24 -0000
	* ContentChecker.pm: Error level definition for |xml_id_error|
	was missing.

	* URIChecker.pm: The end of the URL should be marked as the
	error location for an empty path error.  The position
	between the userinfo and the port components should be
	marked as the error location for an empty host error.

2008-08-30  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	30 Aug 2008 10:26:28 -0000
2008-08-30  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm: s/element missing/child element missing/ for
	consistency.

	* HTML.pm: Typos fixed.
	(pre): "No significant content" error was unintentionally
	disabled.  s/element missing/child element missing/ for
	consistency.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.15 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 wakaba 1.16 my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11 wakaba 1.1
12 wakaba 1.8 sub FEATURE_RFC4287 () {
13     Whatpm::ContentChecker::FEATURE_STATUS_CR |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16    
17 wakaba 1.15 sub FEATURE_RFC4685 () {
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21    
22 wakaba 1.1 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23    
24     ## NOTE: Commants and PIs are not explicitly allowed.
25    
26     our $AttrChecker;
27    
28     ## Any element MAY have xml:base, xml:lang
29     my $GetAtomAttrsChecker = sub {
30     my $element_specific_checker = shift;
31 wakaba 1.8 my $element_specific_status = shift;
32 wakaba 1.1 return sub {
33 wakaba 1.8 my ($self, $todo, $element_state) = @_;
34 wakaba 1.1 for my $attr (@{$todo->{node}->attributes}) {
35     my $attr_ns = $attr->namespace_uri;
36     $attr_ns = '' unless defined $attr_ns;
37     my $attr_ln = $attr->manakai_local_name;
38     my $checker;
39     if ($attr_ns eq '') {
40     $checker = $element_specific_checker->{$attr_ln};
41     } else {
42     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
43     || $AttrChecker->{$attr_ns}->{''};
44     }
45     if ($checker) {
46 wakaba 1.8 $checker->($self, $attr, $todo, $element_state);
47     } elsif ($attr_ln eq '') {
48     #
49 wakaba 1.1 } else {
50 wakaba 1.18 $self->{onerror}->(node => $attr,
51     type => 'unknown attribute',
52     level => $self->{level}->{uncertain});
53 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
54     }
55 wakaba 1.8
56     if ($attr_ns eq '') {
57     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
58     }
59     ## TODO: global attribute
60 wakaba 1.1 }
61     };
62     }; # $GetAtomAttrsChecker
63    
64 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
65     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
66    
67     my ($self, $attr) = @_;
68     my $value = $attr->value;
69     require Whatpm::LangTag;
70     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
71 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
72 wakaba 1.19 }, $self->{level});
73 wakaba 1.4 ## ISSUE: RFC 4646 (3066bis)?
74     }; # $AtomLanguageTagAttrChecker
75    
76 wakaba 1.8 my %AtomChecker = (
77     %Whatpm::ContentChecker::AnyChecker,
78     status => FEATURE_RFC4287,
79     check_attrs => $GetAtomAttrsChecker->({}, {}),
80     );
81    
82     my %AtomTextConstruct = (
83     %AtomChecker,
84     check_start => sub {
85     my ($self, $item, $element_state) = @_;
86     $element_state->{type} = 'text';
87     $element_state->{value} = '';
88     },
89     check_attrs => $GetAtomAttrsChecker->({
90     type => sub {
91     my ($self, $attr, $item, $element_state) = @_;
92     my $value = $attr->value;
93     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
94     $element_state->{type} = $value;
95 wakaba 1.1 } else {
96 wakaba 1.8 ## NOTE: IMT MUST NOT be used here.
97 wakaba 1.18 $self->{onerror}->(node => $attr,
98     type => 'invalid attribute value',
99     level => $self->{level}->{must});
100 wakaba 1.1 }
101 wakaba 1.8 }, # checked in |checker|
102     }, {
103     type => FEATURE_RFC4287,
104     }),
105     check_child_element => sub {
106     my ($self, $item, $child_el, $child_nsuri, $child_ln,
107     $child_is_transparent, $element_state) = @_;
108     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
109     $self->{onerror}->(node => $child_el,
110     type => 'element not allowed:minus',
111 wakaba 1.18 level => $self->{level}->{must});
112 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
113     #
114     } else {
115     if ($element_state->{type} eq 'text' or
116     $element_state->{type} eq 'html') { # MUST NOT
117     $self->{onerror}->(node => $child_el,
118     type => 'element not allowed:atom|TextConstruct',
119 wakaba 1.18 level => $self->{level}->{must});
120 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
121     if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
122     $child_ln eq 'div') { # MUST
123     if ($element_state->{has_div}) {
124     $self->{onerror}
125     ->(node => $child_el,
126     type => 'element not allowed:atom|TextConstruct',
127 wakaba 1.18 level => $self->{level}->{must});
128 wakaba 1.8 } else {
129     $element_state->{has_div} = 1;
130 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
131     }
132 wakaba 1.8 } else {
133     $self->{onerror}->(node => $child_el,
134     type => 'element not allowed:atom|TextConstruct',
135 wakaba 1.18 level => $self->{level}->{must});
136 wakaba 1.1 }
137 wakaba 1.8 } else {
138     die "atom:TextConstruct type error: $element_state->{type}";
139 wakaba 1.1 }
140 wakaba 1.8 }
141     },
142     check_child_text => sub {
143     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
144     if ($element_state->{type} eq 'text') {
145     #
146     } elsif ($element_state->{type} eq 'html') {
147     $element_state->{value} .= $child_node->text_content;
148     ## NOTE: Markup MUST be escaped.
149     } elsif ($element_state->{type} eq 'xhtml') {
150     if ($has_significant) {
151     $self->{onerror}->(node => $child_node,
152     type => 'character not allowed:atom|TextConstruct',
153 wakaba 1.18 level => $self->{level}->{must});
154 wakaba 1.1 }
155 wakaba 1.8 } else {
156     die "atom:TextConstruct type error: $element_state->{type}";
157 wakaba 1.1 }
158     },
159 wakaba 1.8 check_end => sub {
160     my ($self, $item, $element_state) = @_;
161 wakaba 1.13 if ($element_state->{type} eq 'xhtml') {
162     unless ($element_state->{has_div}) {
163     $self->{onerror}->(node => $item->{node},
164 wakaba 1.18 type => 'child element missing',
165     text => 'div',
166     level => $self->{level}->{must});
167 wakaba 1.13 }
168     } elsif ($element_state->{type} eq 'html') {
169     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
170     # markup MUST be escaped
171     $self->{onsubdoc}->({s => $element_state->{value},
172     container_node => $item->{node},
173     media_type => 'text/html',
174     inner_html_element => 'div',
175     is_char_string => 1});
176 wakaba 1.8 }
177 wakaba 1.1
178 wakaba 1.8 $AtomChecker{check_end}->(@_);
179     },
180     ); # %AtomTextConstruct
181 wakaba 1.1
182 wakaba 1.8 my %AtomPersonConstruct = (
183     %AtomChecker,
184     check_child_element => sub {
185     my ($self, $item, $child_el, $child_nsuri, $child_ln,
186     $child_is_transparent, $element_state) = @_;
187     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
188     $self->{onerror}->(node => $child_el,
189     type => 'element not allowed:minus',
190 wakaba 1.18 level => $self->{level}->{must});
191 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
192     #
193     } elsif ($child_nsuri eq $ATOM_NS) {
194     if ($child_ln eq 'name') {
195     if ($element_state->{has_name}) {
196     $self->{onerror}
197     ->(node => $child_el,
198     type => 'element not allowed:atom|PersonConstruct',
199 wakaba 1.18 level => $self->{level}->{must});
200 wakaba 1.8 } else {
201     $element_state->{has_name} = 1;
202     }
203     } elsif ($child_ln eq 'uri') {
204     if ($element_state->{has_uri}) {
205     $self->{onerror}
206     ->(node => $child_el,
207     type => 'element not allowed:atom|PersonConstruct',
208 wakaba 1.18 level => $self->{level}->{must});
209 wakaba 1.1 } else {
210 wakaba 1.8 $element_state->{has_uri} = 1;
211 wakaba 1.1 }
212 wakaba 1.8 } elsif ($child_ln eq 'email') {
213     if ($element_state->{has_email}) {
214     $self->{onerror}
215     ->(node => $child_el,
216     type => 'element not allowed:atom|PersonConstruct',
217 wakaba 1.18 level => $self->{level}->{must});
218 wakaba 1.8 } else {
219     $element_state->{has_email} = 1;
220 wakaba 1.7 }
221 wakaba 1.8 } else {
222     $self->{onerror}
223     ->(node => $child_el,
224     type => 'element not allowed:atom|PersonConstruct',
225 wakaba 1.18 level => $self->{level}->{must});
226 wakaba 1.1 }
227 wakaba 1.8 } else {
228     $self->{onerror}
229     ->(node => $child_el,
230     type => 'element not allowed:atom|PersonConstruct',
231 wakaba 1.18 level => $self->{level}->{must});
232 wakaba 1.1 }
233 wakaba 1.8 ## TODO: extension element
234     },
235     check_child_text => sub {
236     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
237     if ($has_significant) {
238     $self->{onerror}->(node => $child_node,
239     type => 'character not allowed:atom|PersonConstruct',
240 wakaba 1.18 level => $self->{level}->{must});
241 wakaba 1.8 }
242     },
243     check_end => sub {
244     my ($self, $item, $element_state) = @_;
245 wakaba 1.1
246 wakaba 1.8 unless ($element_state->{has_name}) {
247     $self->{onerror}->(node => $item->{node},
248 wakaba 1.18 type => 'child element missing:atom',
249     text => 'name',
250     level => $self->{level}->{must});
251 wakaba 1.1 }
252    
253 wakaba 1.8 $AtomChecker{check_end}->(@_);
254 wakaba 1.1 },
255 wakaba 1.8 ); # %AtomPersonConstruct
256 wakaba 1.1
257 wakaba 1.2 our $Element;
258    
259 wakaba 1.8 $Element->{$ATOM_NS}->{''} = {
260     %AtomChecker,
261     status => 0,
262     };
263    
264 wakaba 1.2 $Element->{$ATOM_NS}->{name} = {
265 wakaba 1.8 %AtomChecker,
266    
267 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:name
268     ## element outside of Person construct is not defined.
269    
270 wakaba 1.8 ## NOTE: No constraint.
271 wakaba 1.2 };
272    
273     $Element->{$ATOM_NS}->{uri} = {
274 wakaba 1.8 %AtomChecker,
275    
276 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:uri
277     ## element outside of Person construct is not defined.
278 wakaba 1.8
279     ## NOTE: Elements are not explicitly disallowed.
280    
281     check_start => sub {
282     my ($self, $item, $element_state) = @_;
283     $element_state->{value} = '';
284     },
285     check_child_text => sub {
286     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
287     $element_state->{value} .= $child_node->data;
288     },
289     check_end => sub {
290     my ($self, $item, $element_state) = @_;
291 wakaba 1.2
292     ## NOTE: There MUST NOT be any white space.
293 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
294 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
295 wakaba 1.19 }, $self->{level});
296 wakaba 1.2
297 wakaba 1.8 $AtomChecker{check_end}->(@_);
298 wakaba 1.2 },
299     };
300    
301     $Element->{$ATOM_NS}->{email} = {
302 wakaba 1.8 %AtomChecker,
303    
304 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:email
305     ## element outside of Person construct is not defined.
306 wakaba 1.8
307     ## NOTE: Elements are not explicitly disallowed.
308    
309     check_end => sub {
310     my ($self, $item, $element_state) = @_;
311 wakaba 1.2
312     ## TODO: addr-spec
313 wakaba 1.8 $self->{onerror}->(node => $item->{node},
314     type => 'addr-spec not supported',
315 wakaba 1.18 level => $self->{level}->{uncertain});
316 wakaba 1.2
317 wakaba 1.8 $AtomChecker{check_end}->(@_);
318 wakaba 1.2 },
319     };
320    
321 wakaba 1.1 ## MUST NOT be any white space
322 wakaba 1.8 my %AtomDateConstruct = (
323     %AtomChecker,
324    
325     ## NOTE: It does not explicitly say that there MUST NOT be any element.
326    
327     check_start => sub {
328     my ($self, $item, $element_state) = @_;
329     $element_state->{value} = '';
330     },
331     check_child_text => sub {
332     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
333     $element_state->{value} .= $child_node->data;
334     },
335     check_end => sub {
336     my ($self, $item, $element_state) = @_;
337 wakaba 1.1
338 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
339 wakaba 1.8 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
340 wakaba 1.4 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
341     = ($1, $2, $3, $4, $5, $6, $7, $8);
342 wakaba 1.8 my $node = $item->{node};
343 wakaba 1.4
344     ## Check additional constraints described or referenced in
345     ## comments of ABNF rules for |date-time|.
346     if (0 < $M and $M < 13) {
347     $self->{onerror}->(node => $node, type => 'datetime:bad day',
348 wakaba 1.18 level => $self->{level}->{must})
349 wakaba 1.4 if $d < 1 or
350     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
351     $self->{onerror}->(node => $node, type => 'datetime:bad day',
352 wakaba 1.18 level => $self->{level}->{must})
353 wakaba 1.4 if $M == 2 and $d == 29 and
354     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
355     } else {
356     $self->{onerror}->(node => $node, type => 'datetime:bad month',
357 wakaba 1.18 level => $self->{level}->{must});
358 wakaba 1.4 }
359     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
360 wakaba 1.18 level => $self->{level}->{must}) if $h > 23;
361 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad minute',
362 wakaba 1.18 level => $self->{level}->{must}) if $m > 59;
363 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad second',
364 wakaba 1.18 level => $self->{level}->{must})
365 wakaba 1.4 if $s > 60; ## NOTE: Validness of leap seconds are not checked.
366     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
367 wakaba 1.18 level => $self->{level}->{must}) if $zh > 23;
368 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
369 wakaba 1.18 level => $self->{level}->{must}) if $zm > 59;
370 wakaba 1.4 } else {
371 wakaba 1.8 $self->{onerror}->(node => $item->{node},
372 wakaba 1.4 type => 'datetime:syntax error',
373 wakaba 1.18 level => $self->{level}->{must});
374 wakaba 1.4 }
375     ## NOTE: SHOULD be accurate as possible (cannot be checked)
376 wakaba 1.1
377 wakaba 1.8 $AtomChecker{check_end}->(@_);
378 wakaba 1.1 },
379 wakaba 1.8 ); # %AtomDateConstruct
380 wakaba 1.1
381 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
382 wakaba 1.8 %AtomChecker,
383 wakaba 1.1 is_root => 1,
384 wakaba 1.8 check_child_element => sub {
385     my ($self, $item, $child_el, $child_nsuri, $child_ln,
386     $child_is_transparent, $element_state) = @_;
387    
388     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
389    
390     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
391     $self->{onerror}->(node => $child_el,
392     type => 'element not allowed:minus',
393 wakaba 1.18 level => $self->{level}->{must});
394 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
395     #
396     } elsif ($child_nsuri eq $ATOM_NS) {
397     my $not_allowed;
398     if ({ # MUST (0, 1)
399     content => 1,
400     id => 1,
401     published => 1,
402     rights => 1,
403     source => 1,
404     summary => 1,
405     title => 1,
406     updated => 1,
407     }->{$child_ln}) {
408     unless ($element_state->{has_element}->{$child_ln}) {
409     $element_state->{has_element}->{$child_ln} = 1;
410     $not_allowed = $element_state->{has_element}->{entry};
411     } else {
412     $not_allowed = 1;
413     }
414     } elsif ($child_ln eq 'link') { # MAY
415     if ($child_el->rel eq $LINK_REL . 'alternate') {
416     my $type = $child_el->get_attribute_ns (undef, 'type');
417     $type = '' unless defined $type;
418     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
419     $hreflang = '' unless defined $hreflang;
420     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
421     (defined $hreflang ? ':'.$hreflang : '');
422     unless ($element_state->{has_element}->{$key}) {
423     $element_state->{has_element}->{$key} = 1;
424     $element_state->{has_element}->{'link.alternate'} = 1;
425 wakaba 1.1 } else {
426     $not_allowed = 1;
427     }
428     }
429 wakaba 1.8
430     ## NOTE: MAY
431     $not_allowed ||= $element_state->{has_element}->{entry};
432     } elsif ({ # MAY
433     category => 1,
434     contributor => 1,
435     }->{$child_ln}) {
436     $not_allowed = $element_state->{has_element}->{entry};
437 wakaba 1.9 } elsif ($child_ln eq 'author') { # MAY
438     $not_allowed = $element_state->{has_element}->{entry};
439 wakaba 1.12 $element_state->{has_author} = 1; # ./author | ./source/author
440     $element_state->{has_element}->{$child_ln} = 1; # ./author
441 wakaba 1.8 } else {
442     $not_allowed = 1;
443     }
444     if ($not_allowed) {
445 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
446     level => $self->{level}->{must});
447 wakaba 1.1 }
448 wakaba 1.15 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
449     ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
450     ## defined in RFC 4685.
451     #
452 wakaba 1.16 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
453     #
454 wakaba 1.8 } else {
455     ## TODO: extension element
456 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
457     level => $self->{level}->{must});
458 wakaba 1.8 }
459     },
460     check_child_text => sub {
461     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
462     if ($has_significant) {
463     $self->{onerror}->(node => $child_node, type => 'character not allowed',
464 wakaba 1.18 level => $self->{level}->{must});
465 wakaba 1.1 }
466 wakaba 1.8 },
467     check_end => sub {
468     my ($self, $item, $element_state) = @_;
469 wakaba 1.1
470 wakaba 1.9 if ($element_state->{has_author}) {
471     ## NOTE: There is either a child atom:author element
472     ## or a child atom:source element which contains an atom:author
473     ## child element.
474     #
475     } else {
476     A: {
477     my $root = $item->{node}->owner_document->document_element;
478     if ($root and $root->manakai_local_name eq 'feed') {
479     my $nsuri = $root->namespace_uri;
480     if (defined $nsuri and $nsuri eq $ATOM_NS) {
481     ## NOTE: An Atom Feed Document.
482     for my $root_child (@{$root->child_nodes}) {
483     ## NOTE: Entity references are not supported.
484     next unless $root_child->node_type == 1; # ELEMENT_NODE
485     next unless $root_child->manakai_local_name eq 'author';
486     my $root_child_nsuri = $root_child->namespace_uri;
487     next unless defined $root_child_nsuri;
488     next unless $root_child_nsuri eq $ATOM_NS;
489     last A;
490     }
491     }
492     }
493    
494     $self->{onerror}->(node => $item->{node},
495 wakaba 1.18 type => 'child element missing:atom',
496     text => 'author',
497     level => $self->{level}->{must});
498 wakaba 1.9 } # A
499     }
500    
501 wakaba 1.12 unless ($element_state->{has_element}->{author}) {
502     $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
503     }
504    
505 wakaba 1.1 ## TODO: If entry's with same id, then updated SHOULD be different
506    
507 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
508     $self->{onerror}->(node => $item->{node},
509 wakaba 1.18 type => 'child element missing:atom',
510     text => 'id',
511     level => $self->{level}->{must});
512 wakaba 1.8 }
513     unless ($element_state->{has_element}->{title}) { # MUST
514     $self->{onerror}->(node => $item->{node},
515 wakaba 1.18 type => 'child element missing:atom',
516     text => 'title',
517     level => $self->{level}->{must});
518 wakaba 1.8 }
519     unless ($element_state->{has_element}->{updated}) { # MUST
520     $self->{onerror}->(node => $item->{node},
521 wakaba 1.18 type => 'child element missing:atom',
522     text => 'updated',
523     level => $self->{level}->{must});
524 wakaba 1.8 }
525     if (not $element_state->{has_element}->{content} and
526     not $element_state->{has_element}->{'link.alternate'}) {
527     $self->{onerror}->(node => $item->{node},
528 wakaba 1.18 type => 'child element missing:atom:link:alternate',
529     level => $self->{level}->{must});
530 wakaba 1.1 }
531 wakaba 1.14
532     if ($element_state->{require_summary} and
533     not $element_state->{has_element}->{summary}) {
534     $self->{onerror}->(node => $item->{node},
535 wakaba 1.18 type => 'child element missing:atom',
536     text => 'summary',
537     level => $self->{level}->{must});
538 wakaba 1.14 }
539 wakaba 1.1 },
540     };
541    
542     $Element->{$ATOM_NS}->{feed} = {
543 wakaba 1.8 %AtomChecker,
544 wakaba 1.1 is_root => 1,
545 wakaba 1.8 check_child_element => sub {
546     my ($self, $item, $child_el, $child_nsuri, $child_ln,
547     $child_is_transparent, $element_state) = @_;
548    
549     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
550    
551     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
552     $self->{onerror}->(node => $child_el,
553     type => 'element not allowed:minus',
554 wakaba 1.18 level => $self->{level}->{must});
555 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
556     #
557     } elsif ($child_nsuri eq $ATOM_NS) {
558     my $not_allowed;
559     if ($child_ln eq 'entry') {
560     $element_state->{has_element}->{entry} = 1;
561     } elsif ({ # MUST (0, 1)
562     generator => 1,
563     icon => 1,
564     id => 1,
565     logo => 1,
566     rights => 1,
567     subtitle => 1,
568     title => 1,
569     updated => 1,
570     }->{$child_ln}) {
571     unless ($element_state->{has_element}->{$child_ln}) {
572     $element_state->{has_element}->{$child_ln} = 1;
573     $not_allowed = $element_state->{has_element}->{entry};
574     } else {
575     $not_allowed = 1;
576     }
577     } elsif ($child_ln eq 'link') {
578     my $rel = $child_el->rel;
579     if ($rel eq $LINK_REL . 'alternate') {
580     my $type = $child_el->get_attribute_ns (undef, 'type');
581     $type = '' unless defined $type;
582     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
583     $hreflang = '' unless defined $hreflang;
584     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
585     (defined $hreflang ? ':'.$hreflang : '');
586     unless ($element_state->{has_element}->{$key}) {
587     $element_state->{has_element}->{$key} = 1;
588 wakaba 1.1 } else {
589     $not_allowed = 1;
590     }
591 wakaba 1.8 } elsif ($rel eq $LINK_REL . 'self') {
592     $element_state->{has_element}->{'link.self'} = 1;
593 wakaba 1.1 }
594 wakaba 1.8
595     ## NOTE: MAY
596     $not_allowed = $element_state->{has_element}->{entry};
597     } elsif ({ # MAY
598     category => 1,
599     contributor => 1,
600     }->{$child_ln}) {
601     $not_allowed = $element_state->{has_element}->{entry};
602 wakaba 1.12 } elsif ($child_ln eq 'author') { # MAY
603     $not_allowed = $element_state->{has_element}->{entry};
604     $element_state->{has_element}->{author} = 1;
605 wakaba 1.8 } else {
606     $not_allowed = 1;
607 wakaba 1.1 }
608 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
609     level => $self->{level}->{must})
610 wakaba 1.8 if $not_allowed;
611     } else {
612     ## TODO: extension element
613 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
614     level => $self->{level}->{must});
615 wakaba 1.1 }
616 wakaba 1.8 },
617     check_child_text => sub {
618     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
619     if ($has_significant) {
620     $self->{onerror}->(node => $child_node, type => 'character not allowed',
621 wakaba 1.18 level => $self->{level}->{must});
622 wakaba 1.8 }
623     },
624     check_end => sub {
625     my ($self, $item, $element_state) = @_;
626 wakaba 1.10
627 wakaba 1.12 if ($element_state->{has_no_author_entry} and
628     not $element_state->{has_element}->{author}) {
629 wakaba 1.10 $self->{onerror}->(node => $item->{node},
630 wakaba 1.18 type => 'child element missing:atom',
631     text => 'author',
632     level => $self->{level}->{must});
633 wakaba 1.11 ## ISSUE: If there is no |atom:entry| element,
634     ## there should be an |atom:author| element?
635 wakaba 1.10 }
636 wakaba 1.1
637     ## TODO: If entry's with same id, then updated SHOULD be different
638    
639 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
640     $self->{onerror}->(node => $item->{node},
641 wakaba 1.18 type => 'child element missing:atom',
642     text => 'id',
643     level => $self->{level}->{must});
644 wakaba 1.1 }
645 wakaba 1.8 unless ($element_state->{has_element}->{title}) { # MUST
646     $self->{onerror}->(node => $item->{node},
647 wakaba 1.18 type => 'child element missing:atom',
648     text => 'title',
649     level => $self->{level}->{must});
650 wakaba 1.1 }
651 wakaba 1.8 unless ($element_state->{has_element}->{updated}) { # MUST
652     $self->{onerror}->(node => $item->{node},
653 wakaba 1.18 type => 'element missing:atom',
654     text => 'updated',
655     level => $self->{level}->{must});
656 wakaba 1.1 }
657 wakaba 1.8 unless ($element_state->{has_element}->{'link.self'}) {
658 wakaba 1.18 $self->{onerror}->(node => $item->{node},
659     type => 'element missing:atom:link:self',
660     level => $self->{level}->{should});
661 wakaba 1.3 }
662 wakaba 1.1
663 wakaba 1.8 $AtomChecker{check_end}->(@_);
664 wakaba 1.1 },
665     };
666    
667     $Element->{$ATOM_NS}->{content} = {
668 wakaba 1.8 %AtomChecker,
669     check_start => sub {
670     my ($self, $item, $element_state) = @_;
671     $element_state->{type} = 'text';
672 wakaba 1.13 $element_state->{value} = '';
673 wakaba 1.8 },
674     check_attrs => $GetAtomAttrsChecker->({
675     src => sub {
676     my ($self, $attr, $item, $element_state) = @_;
677    
678     $element_state->{has_src} = 1;
679 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
680 wakaba 1.8
681     ## NOTE: There MUST NOT be any white space.
682     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
683 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
684 wakaba 1.19 }, $self->{level});
685 wakaba 1.8 },
686     type => sub {
687     my ($self, $attr, $item, $element_state) = @_;
688    
689     $element_state->{has_type} = 1;
690 wakaba 1.1
691 wakaba 1.8 my $value = $attr->value;
692 wakaba 1.1 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
693     # MUST
694     } else {
695 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
696     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
697     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
698     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
699     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
700     my @type = ($1, $2);
701     my $param = $3;
702     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
703     if (defined $2) {
704     push @type, $1 => $2;
705     } else {
706     my $n = $1;
707     my $v = $2;
708     $v =~ s/\\(.)/$1/gs;
709     push @type, $n => $v;
710     }
711     }
712     require Whatpm::IMTChecker;
713 wakaba 1.20 my $ic = Whatpm::IMTChecker->new;
714     $ic->{level} = $self->{level};
715     $ic->check_imt (sub {
716 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
717 wakaba 1.3 }, @type);
718     } else {
719 wakaba 1.8 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
720 wakaba 1.18 level => $self->{level}->{must});
721 wakaba 1.3 }
722 wakaba 1.1 }
723    
724 wakaba 1.17 if ({text => 1, html => 1, xhtml => 1}->{$value}) {
725     #
726     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
727 wakaba 1.8 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
728     ## Is |application/xml-dtd| an XML media type?
729     $value = 'xml';
730     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
731     $value = 'mime_text';
732     } elsif ($value =~ m!^(?>message|multipart)/!i) {
733     $self->{onerror}->(node => $attr, type => 'IMT:composite',
734 wakaba 1.18 level => $self->{level}->{must});
735 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
736     } else {
737     $item->{parent_state}->{require_summary} = 1;
738 wakaba 1.8 }
739 wakaba 1.1
740 wakaba 1.8 $element_state->{type} = $value;
741     },
742     }, {
743     src => FEATURE_RFC4287,
744     type => FEATURE_RFC4287,
745     }),
746     check_child_element => sub {
747     my ($self, $item, $child_el, $child_nsuri, $child_ln,
748     $child_is_transparent, $element_state) = @_;
749    
750     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:minus',
753 wakaba 1.18 level => $self->{level}->{must});
754 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
755     #
756     } else {
757     if ($element_state->{type} eq 'text' or
758     $element_state->{type} eq 'html' or
759     $element_state->{type} eq 'mime_text') {
760     # MUST NOT
761     $self->{onerror}->(node => $child_el,
762     type => 'element not allowed:atom|content',
763 wakaba 1.18 level => $self->{level}->{must});
764 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
765     if ($element_state->{has_div}) {
766     $self->{onerror}->(node => $child_el,
767     type => 'element not allowed:atom|content',
768 wakaba 1.18 level => $self->{level}->{must});
769 wakaba 1.8 } else {
770     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
771     $element_state->{has_div} = 1;
772     }
773     } elsif ($element_state->{type} eq 'xml') {
774     ## MAY contain elements
775     if ($element_state->{has_src}) {
776     $self->{onerror}->(node => $child_el,
777     type => 'element not allowed:atom|content',
778 wakaba 1.18 level => $self->{level}->{must});
779 wakaba 1.1 }
780 wakaba 1.8 } else {
781     ## NOTE: Elements are not explicitly disallowed.
782 wakaba 1.1 }
783 wakaba 1.8 }
784     },
785     ## NOTE: If @src, the element MUST be empty. What is "empty"?
786     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
787     ## empty replacement tree shuld be empty, since Atom is defined
788     ## in terms of XML Information Set where entities are expanded.
789     ## (but what if |&e;| is an unexpanded entity?)
790     check_child_text => sub {
791     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
792     if ($has_significant) {
793     if ($element_state->{has_src}) {
794     $self->{onerror}->(node => $child_node,
795 wakaba 1.18 type => 'character not allowed:empty',
796     level => $self->{level}->{must});
797 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml' or
798     $element_state->{type} eq 'xml') {
799     $self->{onerror}->(node => $child_node,
800     type => 'character not allowed:atom|content',
801 wakaba 1.18 level => $self->{level}->{must});
802 wakaba 1.1 }
803 wakaba 1.8 }
804 wakaba 1.1
805 wakaba 1.13 $element_state->{value} .= $child_node->data;
806 wakaba 1.1
807 wakaba 1.8 ## NOTE: type=text/* has no further restriction (i.e. the content don't
808     ## have to conform to the definition of the type).
809     },
810     check_end => sub {
811     my ($self, $item, $element_state) = @_;
812 wakaba 1.1
813 wakaba 1.8 if ($element_state->{has_src}) {
814     if (not $element_state->{has_type}) {
815     $self->{onerror}->(node => $item->{node},
816 wakaba 1.18 type => 'attribute missing',
817     text => 'type',
818     level => $self->{level}->{should});
819 wakaba 1.14 } elsif ($element_state->{type} eq 'text' or
820     $element_state->{type} eq 'html' or
821     $element_state->{type} eq 'xhtml') {
822 wakaba 1.8 $self->{onerror}
823     ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
824 wakaba 1.18 type => 'not IMT', level => $self->{level}->{must});
825 wakaba 1.1 }
826 wakaba 1.8 }
827 wakaba 1.1
828 wakaba 1.8 if ($element_state->{type} eq 'xhtml') {
829     unless ($element_state->{has_div}) {
830     $self->{onerror}->(node => $item->{node},
831 wakaba 1.21 type => 'child element missing',
832 wakaba 1.18 text => 'div',
833     level => $self->{level}->{must});
834 wakaba 1.1 }
835 wakaba 1.11 } elsif ($element_state->{type} eq 'html') {
836     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
837     # markup MUST be escaped
838 wakaba 1.13 $self->{onsubdoc}->({s => $element_state->{value},
839 wakaba 1.11 container_node => $item->{node},
840     media_type => 'text/html',
841     inner_html_element => 'div',
842     is_char_string => 1});
843 wakaba 1.8 } elsif ($element_state->{type} eq 'xml') {
844 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
845 wakaba 1.1 ## If no @src, this would normally mean it contains a
846     ## single child element that would serve as the root element.
847 wakaba 1.8 $self->{onerror}->(node => $item->{node},
848     type => 'atom|content not supported',
849 wakaba 1.18 text => $item->{node}->get_attribute_ns
850     (undef, 'type'),
851     level => $self->{level}->{uncertain});
852 wakaba 1.8 } elsif ($element_state->{type} eq 'text' or
853     $element_state->{type} eq 'mime-text') {
854     #
855 wakaba 1.1 } else {
856     ## TODO: $s = valid Base64ed [RFC 3548] where
857     ## MAY leading and following "white space" (what?)
858     ## and lines separated by a single U+000A
859 wakaba 1.3
860     ## NOTE: SHOULD be suitable for the indicated media type.
861 wakaba 1.8 $self->{onerror}->(node => $item->{node},
862     type => 'atom|content not supported',
863 wakaba 1.18 text => $item->{node}->get_attribute_ns
864     (undef, 'type'),
865     level => $self->{level}->{uncertain});
866 wakaba 1.8 }
867 wakaba 1.1
868 wakaba 1.8 $AtomChecker{check_end}->(@_);
869 wakaba 1.1 },
870     };
871 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
872 wakaba 1.1
873 wakaba 1.8 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
874 wakaba 1.1
875     $Element->{$ATOM_NS}->{category} = {
876 wakaba 1.8 %AtomChecker,
877     check_attrs => $GetAtomAttrsChecker->({
878 wakaba 1.1 label => sub { 1 }, # no value constraint
879 wakaba 1.2 scheme => sub { # NOTE: No MUST.
880     my ($self, $attr) = @_;
881     ## NOTE: There MUST NOT be any white space.
882     Whatpm::URIChecker->check_iri ($attr->value, sub {
883 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
884 wakaba 1.19 }, $self->{level});
885 wakaba 1.2 },
886 wakaba 1.8 term => sub {
887     my ($self, $attr, $item, $element_state) = @_;
888    
889     ## NOTE: No value constraint.
890    
891     $element_state->{has_term} = 1;
892     },
893     }, {
894     label => FEATURE_RFC4287,
895     scheme => FEATURE_RFC4287,
896     term => FEATURE_RFC4287,
897 wakaba 1.1 }),
898 wakaba 1.8 check_end => sub {
899     my ($self, $item, $element_state) = @_;
900     unless ($element_state->{has_term}) {
901     $self->{onerror}->(node => $item->{node},
902 wakaba 1.18 type => 'attribute missing',
903     text => 'term',
904     level => $self->{level}->{must});
905 wakaba 1.1 }
906    
907 wakaba 1.8 $AtomChecker{check_end}->(@_);
908 wakaba 1.1 },
909 wakaba 1.8 ## NOTE: Meaning of content is not defined.
910 wakaba 1.1 };
911    
912 wakaba 1.8 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
913 wakaba 1.6
914     ## TODO: Anything below does not support <html:nest/> yet.
915 wakaba 1.1
916     $Element->{$ATOM_NS}->{generator} = {
917 wakaba 1.8 %AtomChecker,
918     check_attrs => $GetAtomAttrsChecker->({
919 wakaba 1.2 uri => sub { # MUST
920     my ($self, $attr) = @_;
921     ## NOTE: There MUST NOT be any white space.
922     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
923 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
924 wakaba 1.19 }, $self->{level});
925 wakaba 1.2 ## NOTE: Dereferencing SHOULD produce a representation
926     ## that is relevant to the agent.
927     },
928 wakaba 1.1 version => sub { 1 }, # no value constraint
929 wakaba 1.8 }, {
930     uri => FEATURE_RFC4287,
931     version => FEATURE_RFC4287,
932 wakaba 1.1 }),
933    
934 wakaba 1.8 ## NOTE: Elements are not explicitly disallowed.
935 wakaba 1.1
936 wakaba 1.8 ## NOTE: Content MUST be a string that is a human-readable name for
937     ## the generating agent.
938 wakaba 1.1 };
939    
940     $Element->{$ATOM_NS}->{icon} = {
941 wakaba 1.8 %AtomChecker,
942     check_start => sub {
943     my ($self, $item, $element_state) = @_;
944     $element_state->{value} = '';
945     },
946     ## NOTE: Elements are not explicitly disallowed.
947     check_child_text => sub {
948     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
949     $element_state->{value} .= $child_node->data;
950     },
951     check_end => sub {
952     my ($self, $item, $element_state) = @_;
953 wakaba 1.1
954 wakaba 1.2 ## NOTE: No MUST.
955     ## NOTE: There MUST NOT be any white space.
956 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
957 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
958 wakaba 1.19 }, $self->{level});
959 wakaba 1.2
960 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
961    
962 wakaba 1.8 $AtomChecker{check_end}->(@_);
963 wakaba 1.1 },
964     };
965    
966     $Element->{$ATOM_NS}->{id} = {
967 wakaba 1.8 %AtomChecker,
968     check_start => sub {
969     my ($self, $item, $element_state) = @_;
970     $element_state->{value} = '';
971     },
972     ## NOTE: Elements are not explicitly disallowed.
973     check_child_text => sub {
974     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
975     $element_state->{value} .= $child_node->data;
976     },
977     check_end => sub {
978     my ($self, $item, $element_state) = @_;
979 wakaba 1.2
980     ## NOTE: There MUST NOT be any white space.
981 wakaba 1.8 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
982 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
983 wakaba 1.19 }, $self->{level});
984 wakaba 1.2 ## TODO: SHOULD be normalized
985    
986 wakaba 1.8 $AtomChecker{check_end}->(@_);
987 wakaba 1.2 },
988     };
989    
990 wakaba 1.15 my $AtomIMTAttrChecker = sub {
991 wakaba 1.3 my ($self, $attr) = @_;
992     my $value = $attr->value;
993     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
994     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
995     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
996     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
997     my @type = ($1, $2);
998     my $param = $3;
999     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1000     if (defined $2) {
1001     push @type, $1 => $2;
1002     } else {
1003     my $n = $1;
1004     my $v = $2;
1005     $v =~ s/\\(.)/$1/gs;
1006     push @type, $n => $v;
1007     }
1008     }
1009     require Whatpm::IMTChecker;
1010 wakaba 1.20 my $ic = Whatpm::IMTChecker->new;
1011     $ic->{level} = $self->{level};
1012     $ic->check_imt (sub {
1013 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1014 wakaba 1.3 }, @type);
1015     } else {
1016 wakaba 1.18 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
1017     level => $self->{level}->{must});
1018 wakaba 1.3 }
1019 wakaba 1.15 }; # $AtomIMTAttrChecker
1020    
1021     my $AtomIRIReferenceAttrChecker = sub {
1022     my ($self, $attr) = @_;
1023     ## NOTE: There MUST NOT be any white space.
1024     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1025 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1026 wakaba 1.19 }, $self->{level});
1027 wakaba 1.15 }; # $AtomIRIReferenceAttrChecker
1028    
1029     $Element->{$ATOM_NS}->{link} = {
1030     %AtomChecker,
1031     check_attrs => $GetAtomAttrsChecker->({
1032     href => $AtomIRIReferenceAttrChecker,
1033     hreflang => $AtomLanguageTagAttrChecker,
1034     length => sub { }, # No MUST; in octets.
1035     rel => sub { # MUST
1036     my ($self, $attr) = @_;
1037     my $value = $attr->value;
1038     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1039     $value = $LINK_REL . $value;
1040     }
1041    
1042     ## NOTE: There MUST NOT be any white space.
1043     Whatpm::URIChecker->check_iri ($value, sub {
1044 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1045 wakaba 1.19 }, $self->{level});
1046 wakaba 1.15
1047     ## TODO: Warn if unregistered
1048 wakaba 1.16
1049     ## TODO: rel=license [RFC 4946]
1050     ## MUST NOT multiple rel=license with same href="",type="" pairs
1051     ## href="" SHOULD be dereferencable
1052     ## title="" SHOULD be there if multiple rel=license
1053     ## MUST NOT "unspecified" and other rel=license
1054 wakaba 1.3 },
1055 wakaba 1.15 title => sub { }, # No MUST
1056     type => $AtomIMTAttrChecker,
1057     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1058 wakaba 1.8 }, {
1059     href => FEATURE_RFC4287,
1060     hreflang => FEATURE_RFC4287,
1061     length => FEATURE_RFC4287,
1062     rel => FEATURE_RFC4287,
1063     title => FEATURE_RFC4287,
1064     type => FEATURE_RFC4287,
1065 wakaba 1.16
1066     ## TODO: thr:count
1067     ## TODO: thr:updated
1068 wakaba 1.2 }),
1069 wakaba 1.8 check_start => sub {
1070     my ($self, $item, $element_state) = @_;
1071 wakaba 1.2
1072 wakaba 1.8 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1073     $self->{onerror}->(node => $item->{node},
1074 wakaba 1.18 type => 'attribute missing',
1075     text => 'href',
1076     level => $self->{level}->{must});
1077 wakaba 1.2 }
1078    
1079 wakaba 1.8 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1080     not $item->{node}->has_attribute_ns (undef, 'length')) {
1081 wakaba 1.18 $self->{onerror}->(node => $item->{node},
1082     type => 'attribute missing',
1083     text => 'length',
1084     level => $self->{level}->{should});
1085 wakaba 1.2 }
1086     },
1087     };
1088    
1089     $Element->{$ATOM_NS}->{logo} = {
1090 wakaba 1.8 %AtomChecker,
1091     ## NOTE: Child elements are not explicitly disallowed
1092     check_start => sub {
1093     my ($self, $item, $element_state) = @_;
1094     $element_state->{value} = '';
1095     },
1096     ## NOTE: Elements are not explicitly disallowed.
1097     check_child_text => sub {
1098     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1099     $element_state->{value} .= $child_node->data;
1100     },
1101     check_end => sub {
1102     my ($self, $item, $element_state) = @_;
1103 wakaba 1.1
1104 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1105 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1106 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
1107 wakaba 1.19 }, $self->{level});
1108 wakaba 1.2
1109     ## NOTE: Image SHOULD be 2:1
1110    
1111 wakaba 1.8 $AtomChecker{check_end}->(@_);
1112 wakaba 1.2 },
1113     };
1114    
1115 wakaba 1.8 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1116 wakaba 1.2
1117 wakaba 1.17 $Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct;
1118 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1119 wakaba 1.2
1120     $Element->{$ATOM_NS}->{source} = {
1121 wakaba 1.8 %AtomChecker,
1122     check_child_element => sub {
1123     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1124     $child_is_transparent, $element_state) = @_;
1125    
1126     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1127     $self->{onerror}->(node => $child_el,
1128     type => 'element not allowed:minus',
1129 wakaba 1.18 level => $self->{level}->{must});
1130 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1131     #
1132     } elsif ($child_nsuri eq $ATOM_NS) {
1133     my $not_allowed;
1134     if ($child_ln eq 'entry') {
1135     $element_state->{has_element}->{entry} = 1;
1136     } elsif ({
1137     generator => 1,
1138     icon => 1,
1139     id => 1,
1140     logo => 1,
1141     rights => 1,
1142     subtitle => 1,
1143     title => 1,
1144     updated => 1,
1145     }->{$child_ln}) {
1146     unless ($element_state->{has_element}->{$child_ln}) {
1147     $element_state->{has_element}->{$child_ln} = 1;
1148     $not_allowed = $element_state->{has_element}->{entry};
1149     } else {
1150     $not_allowed = 1;
1151     }
1152     } elsif ($child_ln eq 'link') {
1153 wakaba 1.17 if ($child_el->rel eq $LINK_REL . 'alternate') {
1154     my $type = $child_el->get_attribute_ns (undef, 'type');
1155 wakaba 1.8 $type = '' unless defined $type;
1156 wakaba 1.17 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
1157 wakaba 1.8 $hreflang = '' unless defined $hreflang;
1158     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1159     (defined $hreflang ? ':'.$hreflang : '');
1160     unless ($element_state->{has_element}->{$key}) {
1161     $element_state->{has_element}->{$key} = 1;
1162 wakaba 1.2 } else {
1163     $not_allowed = 1;
1164     }
1165     }
1166 wakaba 1.8 $not_allowed ||= $element_state->{has_element}->{entry};
1167     } elsif ({
1168     category => 1,
1169     contributor => 1,
1170     }->{$child_ln}) {
1171     $not_allowed = $element_state->{has_element}->{entry};
1172 wakaba 1.9 } elsif ($child_ln eq 'author') {
1173     $not_allowed = $element_state->{has_element}->{entry};
1174     $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1175 wakaba 1.8 } else {
1176     $not_allowed = 1;
1177     }
1178     if ($not_allowed) {
1179 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1180     level => $self->{level}->{must});
1181 wakaba 1.2 }
1182 wakaba 1.8 } else {
1183     ## TODO: extension element
1184 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1185     level => $self->{level}->{must});
1186 wakaba 1.8 }
1187     },
1188     check_child_text => sub {
1189     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1190     if ($has_significant) {
1191     $self->{onerror}->(node => $child_node, type => 'character not allowed',
1192 wakaba 1.18 level => $self->{level}->{must});
1193 wakaba 1.2 }
1194 wakaba 1.1 },
1195     };
1196 wakaba 1.2
1197 wakaba 1.8 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1198 wakaba 1.2
1199 wakaba 1.8 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1200 wakaba 1.2
1201 wakaba 1.8 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1202 wakaba 1.2
1203 wakaba 1.8 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1204 wakaba 1.2
1205     ## TODO: signature element
1206    
1207     ## TODO: simple extension element and structured extension element
1208 wakaba 1.1
1209 wakaba 1.15 ## -- Atom Threading 1.0 [RFC 4685]
1210    
1211     $Element->{$THR_NS}->{''} = {
1212     %AtomChecker,
1213     status => 0,
1214     };
1215    
1216     ## ISSUE: Strictly speaking, thr:* element/attribute,
1217     ## where * is an undefined local name, is not disallowed.
1218    
1219     $Element->{$THR_NS}->{'in-reply-to'} = {
1220     %AtomChecker,
1221     status => FEATURE_RFC4685,
1222     check_attrs => $GetAtomAttrsChecker->({
1223     href => $AtomIRIReferenceAttrChecker,
1224     ## TODO: fact-level.
1225     ## TODO: MUST be dereferencable.
1226     ref => sub {
1227     my ($self, $attr, $item, $element_state) = @_;
1228     $element_state->{has_ref} = 1;
1229    
1230     ## NOTE: Same as |atom:id|.
1231     ## NOTE: There MUST NOT be any white space.
1232     Whatpm::URIChecker->check_iri ($attr->value, sub {
1233 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1234 wakaba 1.19 }, $self->{level});
1235 wakaba 1.15
1236     ## TODO: Check against ID guideline...
1237     },
1238     source => $AtomIRIReferenceAttrChecker,
1239     ## TODO: fact-level.
1240     ## TODO: MUST be dereferencable.
1241     type => $AtomIMTAttrChecker,
1242     ## TODO: fact-level.
1243     }, {
1244     href => FEATURE_RFC4685,
1245     source => FEATURE_RFC4685,
1246     ref => FEATURE_RFC4685,
1247     type => FEATURE_RFC4685,
1248     }),
1249     check_end => sub {
1250     my ($self, $item, $element_state) = @_;
1251    
1252     unless ($element_state->{has_ref}) {
1253     $self->{onerror}->(node => $item->{node},
1254 wakaba 1.18 type => 'attribute missing',
1255     text => 'ref',
1256     level => $self->{level}->{must});
1257 wakaba 1.15 }
1258    
1259     $AtomChecker{check_end}->(@_);
1260     },
1261     ## NOTE: Content model has no constraint.
1262     };
1263 wakaba 1.16
1264     $Element->{$THR_NS}->{total} = {
1265     %AtomChecker,
1266     check_start => sub {
1267     my ($self, $item, $element_state) = @_;
1268     $element_state->{value} = '';
1269     },
1270     check_child_element => sub {
1271     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1272     $child_is_transparent, $element_state) = @_;
1273    
1274     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1275     $self->{onerror}->(node => $child_el,
1276     type => 'element not allowed:minus',
1277 wakaba 1.18 level => $self->{level}->{must});
1278 wakaba 1.16 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1279     #
1280     } else {
1281     $self->{onerror}->(node => $child_el,
1282     type => 'element not allowed',
1283 wakaba 1.18 level => $self->{level}->{must});
1284 wakaba 1.16 }
1285     },
1286     check_child_text => sub {
1287     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1288     $element_state->{value} .= $child_node->data;
1289     },
1290     check_end => sub {
1291     my ($self, $item, $element_state) = @_;
1292    
1293     ## NOTE: xsd:nonNegativeInteger
1294     unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1295     $self->{onerror}->(node => $item->{node},
1296 wakaba 1.18 type => 'invalid attribute value',
1297     level => $self->{level}->{must});
1298 wakaba 1.16 }
1299    
1300     $AtomChecker{check_end}->(@_);
1301     },
1302     };
1303    
1304     ## TODO: fh:complete
1305    
1306     ## TODO: fh:archive
1307    
1308     ## TODO: Check as archive document, page feed document, ...
1309    
1310     ## TODO: APP [RFC 5023]
1311 wakaba 1.15
1312 wakaba 1.1 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1313 wakaba 1.15 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1314 wakaba 1.1
1315     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24