/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (hide annotations) (download)
Sat Sep 20 06:10:18 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.21: +16 -7 lines
++ whatpm/t/ChangeLog	20 Sep 2008 05:50:38 -0000
	* content-model-1.dat: Test data for interactive contents are
	added (cf. HTML5 revision 2018).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	20 Sep 2008 05:46:21 -0000
2008-09-20  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm ($IsInHTMLInteractiveContent): New.

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Sep 2008 05:51:55 -0000
	* HTML.pm, Atom.pm: Interactrive content implementation synced
	with the spec (HTML5 revision 2018).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.15 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 wakaba 1.16 my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11 wakaba 1.1
12 wakaba 1.8 sub FEATURE_RFC4287 () {
13     Whatpm::ContentChecker::FEATURE_STATUS_CR |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16    
17 wakaba 1.15 sub FEATURE_RFC4685 () {
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21    
22 wakaba 1.1 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23    
24     ## NOTE: Commants and PIs are not explicitly allowed.
25    
26 wakaba 1.22 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
27    
28 wakaba 1.1 our $AttrChecker;
29    
30     ## Any element MAY have xml:base, xml:lang
31     my $GetAtomAttrsChecker = sub {
32     my $element_specific_checker = shift;
33 wakaba 1.8 my $element_specific_status = shift;
34 wakaba 1.1 return sub {
35 wakaba 1.8 my ($self, $todo, $element_state) = @_;
36 wakaba 1.1 for my $attr (@{$todo->{node}->attributes}) {
37     my $attr_ns = $attr->namespace_uri;
38     $attr_ns = '' unless defined $attr_ns;
39     my $attr_ln = $attr->manakai_local_name;
40     my $checker;
41     if ($attr_ns eq '') {
42     $checker = $element_specific_checker->{$attr_ln};
43     } else {
44     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
45     || $AttrChecker->{$attr_ns}->{''};
46     }
47     if ($checker) {
48 wakaba 1.8 $checker->($self, $attr, $todo, $element_state);
49     } elsif ($attr_ln eq '') {
50     #
51 wakaba 1.1 } else {
52 wakaba 1.18 $self->{onerror}->(node => $attr,
53     type => 'unknown attribute',
54     level => $self->{level}->{uncertain});
55 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
56     }
57 wakaba 1.8
58     if ($attr_ns eq '') {
59     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
60     }
61     ## TODO: global attribute
62 wakaba 1.1 }
63     };
64     }; # $GetAtomAttrsChecker
65    
66 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
67     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
68    
69     my ($self, $attr) = @_;
70     my $value = $attr->value;
71     require Whatpm::LangTag;
72     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
73 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
74 wakaba 1.19 }, $self->{level});
75 wakaba 1.4 ## ISSUE: RFC 4646 (3066bis)?
76     }; # $AtomLanguageTagAttrChecker
77    
78 wakaba 1.8 my %AtomChecker = (
79     %Whatpm::ContentChecker::AnyChecker,
80     status => FEATURE_RFC4287,
81     check_attrs => $GetAtomAttrsChecker->({}, {}),
82     );
83    
84     my %AtomTextConstruct = (
85     %AtomChecker,
86     check_start => sub {
87     my ($self, $item, $element_state) = @_;
88     $element_state->{type} = 'text';
89     $element_state->{value} = '';
90     },
91     check_attrs => $GetAtomAttrsChecker->({
92     type => sub {
93     my ($self, $attr, $item, $element_state) = @_;
94     my $value = $attr->value;
95     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
96     $element_state->{type} = $value;
97 wakaba 1.1 } else {
98 wakaba 1.8 ## NOTE: IMT MUST NOT be used here.
99 wakaba 1.18 $self->{onerror}->(node => $attr,
100     type => 'invalid attribute value',
101     level => $self->{level}->{must});
102 wakaba 1.1 }
103 wakaba 1.8 }, # checked in |checker|
104     }, {
105     type => FEATURE_RFC4287,
106     }),
107     check_child_element => sub {
108     my ($self, $item, $child_el, $child_nsuri, $child_ln,
109     $child_is_transparent, $element_state) = @_;
110 wakaba 1.22 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
111     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
112 wakaba 1.8 $self->{onerror}->(node => $child_el,
113     type => 'element not allowed:minus',
114 wakaba 1.18 level => $self->{level}->{must});
115 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
116     #
117     } else {
118     if ($element_state->{type} eq 'text' or
119     $element_state->{type} eq 'html') { # MUST NOT
120     $self->{onerror}->(node => $child_el,
121     type => 'element not allowed:atom|TextConstruct',
122 wakaba 1.18 level => $self->{level}->{must});
123 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
124     if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
125     $child_ln eq 'div') { # MUST
126     if ($element_state->{has_div}) {
127     $self->{onerror}
128     ->(node => $child_el,
129     type => 'element not allowed:atom|TextConstruct',
130 wakaba 1.18 level => $self->{level}->{must});
131 wakaba 1.8 } else {
132     $element_state->{has_div} = 1;
133 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
134     }
135 wakaba 1.8 } else {
136     $self->{onerror}->(node => $child_el,
137     type => 'element not allowed:atom|TextConstruct',
138 wakaba 1.18 level => $self->{level}->{must});
139 wakaba 1.1 }
140 wakaba 1.8 } else {
141     die "atom:TextConstruct type error: $element_state->{type}";
142 wakaba 1.1 }
143 wakaba 1.8 }
144     },
145     check_child_text => sub {
146     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
147     if ($element_state->{type} eq 'text') {
148     #
149     } elsif ($element_state->{type} eq 'html') {
150     $element_state->{value} .= $child_node->text_content;
151     ## NOTE: Markup MUST be escaped.
152     } elsif ($element_state->{type} eq 'xhtml') {
153     if ($has_significant) {
154     $self->{onerror}->(node => $child_node,
155     type => 'character not allowed:atom|TextConstruct',
156 wakaba 1.18 level => $self->{level}->{must});
157 wakaba 1.1 }
158 wakaba 1.8 } else {
159     die "atom:TextConstruct type error: $element_state->{type}";
160 wakaba 1.1 }
161     },
162 wakaba 1.8 check_end => sub {
163     my ($self, $item, $element_state) = @_;
164 wakaba 1.13 if ($element_state->{type} eq 'xhtml') {
165     unless ($element_state->{has_div}) {
166     $self->{onerror}->(node => $item->{node},
167 wakaba 1.18 type => 'child element missing',
168     text => 'div',
169     level => $self->{level}->{must});
170 wakaba 1.13 }
171     } elsif ($element_state->{type} eq 'html') {
172     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
173     # markup MUST be escaped
174     $self->{onsubdoc}->({s => $element_state->{value},
175     container_node => $item->{node},
176     media_type => 'text/html',
177     inner_html_element => 'div',
178     is_char_string => 1});
179 wakaba 1.8 }
180 wakaba 1.1
181 wakaba 1.8 $AtomChecker{check_end}->(@_);
182     },
183     ); # %AtomTextConstruct
184 wakaba 1.1
185 wakaba 1.8 my %AtomPersonConstruct = (
186     %AtomChecker,
187     check_child_element => sub {
188     my ($self, $item, $child_el, $child_nsuri, $child_ln,
189     $child_is_transparent, $element_state) = @_;
190 wakaba 1.22 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
191     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
192 wakaba 1.8 $self->{onerror}->(node => $child_el,
193     type => 'element not allowed:minus',
194 wakaba 1.18 level => $self->{level}->{must});
195 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
196     #
197     } elsif ($child_nsuri eq $ATOM_NS) {
198     if ($child_ln eq 'name') {
199     if ($element_state->{has_name}) {
200     $self->{onerror}
201     ->(node => $child_el,
202     type => 'element not allowed:atom|PersonConstruct',
203 wakaba 1.18 level => $self->{level}->{must});
204 wakaba 1.8 } else {
205     $element_state->{has_name} = 1;
206     }
207     } elsif ($child_ln eq 'uri') {
208     if ($element_state->{has_uri}) {
209     $self->{onerror}
210     ->(node => $child_el,
211     type => 'element not allowed:atom|PersonConstruct',
212 wakaba 1.18 level => $self->{level}->{must});
213 wakaba 1.1 } else {
214 wakaba 1.8 $element_state->{has_uri} = 1;
215 wakaba 1.1 }
216 wakaba 1.8 } elsif ($child_ln eq 'email') {
217     if ($element_state->{has_email}) {
218     $self->{onerror}
219     ->(node => $child_el,
220     type => 'element not allowed:atom|PersonConstruct',
221 wakaba 1.18 level => $self->{level}->{must});
222 wakaba 1.8 } else {
223     $element_state->{has_email} = 1;
224 wakaba 1.7 }
225 wakaba 1.8 } else {
226     $self->{onerror}
227     ->(node => $child_el,
228     type => 'element not allowed:atom|PersonConstruct',
229 wakaba 1.18 level => $self->{level}->{must});
230 wakaba 1.1 }
231 wakaba 1.8 } else {
232     $self->{onerror}
233     ->(node => $child_el,
234     type => 'element not allowed:atom|PersonConstruct',
235 wakaba 1.18 level => $self->{level}->{must});
236 wakaba 1.1 }
237 wakaba 1.8 ## TODO: extension element
238     },
239     check_child_text => sub {
240     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
241     if ($has_significant) {
242     $self->{onerror}->(node => $child_node,
243     type => 'character not allowed:atom|PersonConstruct',
244 wakaba 1.18 level => $self->{level}->{must});
245 wakaba 1.8 }
246     },
247     check_end => sub {
248     my ($self, $item, $element_state) = @_;
249 wakaba 1.1
250 wakaba 1.8 unless ($element_state->{has_name}) {
251     $self->{onerror}->(node => $item->{node},
252 wakaba 1.18 type => 'child element missing:atom',
253     text => 'name',
254     level => $self->{level}->{must});
255 wakaba 1.1 }
256    
257 wakaba 1.8 $AtomChecker{check_end}->(@_);
258 wakaba 1.1 },
259 wakaba 1.8 ); # %AtomPersonConstruct
260 wakaba 1.1
261 wakaba 1.2 our $Element;
262    
263 wakaba 1.8 $Element->{$ATOM_NS}->{''} = {
264     %AtomChecker,
265     status => 0,
266     };
267    
268 wakaba 1.2 $Element->{$ATOM_NS}->{name} = {
269 wakaba 1.8 %AtomChecker,
270    
271 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:name
272     ## element outside of Person construct is not defined.
273    
274 wakaba 1.8 ## NOTE: No constraint.
275 wakaba 1.2 };
276    
277     $Element->{$ATOM_NS}->{uri} = {
278 wakaba 1.8 %AtomChecker,
279    
280 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:uri
281     ## element outside of Person construct is not defined.
282 wakaba 1.8
283     ## NOTE: Elements are not explicitly disallowed.
284    
285     check_start => sub {
286     my ($self, $item, $element_state) = @_;
287     $element_state->{value} = '';
288     },
289     check_child_text => sub {
290     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
291     $element_state->{value} .= $child_node->data;
292     },
293     check_end => sub {
294     my ($self, $item, $element_state) = @_;
295 wakaba 1.2
296     ## NOTE: There MUST NOT be any white space.
297 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
298 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
299 wakaba 1.19 }, $self->{level});
300 wakaba 1.2
301 wakaba 1.8 $AtomChecker{check_end}->(@_);
302 wakaba 1.2 },
303     };
304    
305     $Element->{$ATOM_NS}->{email} = {
306 wakaba 1.8 %AtomChecker,
307    
308 wakaba 1.2 ## NOTE: Strictly speaking, structure and semantics for atom:email
309     ## element outside of Person construct is not defined.
310 wakaba 1.8
311     ## NOTE: Elements are not explicitly disallowed.
312    
313     check_end => sub {
314     my ($self, $item, $element_state) = @_;
315 wakaba 1.2
316     ## TODO: addr-spec
317 wakaba 1.8 $self->{onerror}->(node => $item->{node},
318     type => 'addr-spec not supported',
319 wakaba 1.18 level => $self->{level}->{uncertain});
320 wakaba 1.2
321 wakaba 1.8 $AtomChecker{check_end}->(@_);
322 wakaba 1.2 },
323     };
324    
325 wakaba 1.1 ## MUST NOT be any white space
326 wakaba 1.8 my %AtomDateConstruct = (
327     %AtomChecker,
328    
329     ## NOTE: It does not explicitly say that there MUST NOT be any element.
330    
331     check_start => sub {
332     my ($self, $item, $element_state) = @_;
333     $element_state->{value} = '';
334     },
335     check_child_text => sub {
336     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
337     $element_state->{value} .= $child_node->data;
338     },
339     check_end => sub {
340     my ($self, $item, $element_state) = @_;
341 wakaba 1.1
342 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
343 wakaba 1.8 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
344 wakaba 1.4 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
345     = ($1, $2, $3, $4, $5, $6, $7, $8);
346 wakaba 1.8 my $node = $item->{node};
347 wakaba 1.4
348     ## Check additional constraints described or referenced in
349     ## comments of ABNF rules for |date-time|.
350     if (0 < $M and $M < 13) {
351     $self->{onerror}->(node => $node, type => 'datetime:bad day',
352 wakaba 1.18 level => $self->{level}->{must})
353 wakaba 1.4 if $d < 1 or
354     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
355     $self->{onerror}->(node => $node, type => 'datetime:bad day',
356 wakaba 1.18 level => $self->{level}->{must})
357 wakaba 1.4 if $M == 2 and $d == 29 and
358     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
359     } else {
360     $self->{onerror}->(node => $node, type => 'datetime:bad month',
361 wakaba 1.18 level => $self->{level}->{must});
362 wakaba 1.4 }
363     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
364 wakaba 1.18 level => $self->{level}->{must}) if $h > 23;
365 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad minute',
366 wakaba 1.18 level => $self->{level}->{must}) if $m > 59;
367 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad second',
368 wakaba 1.18 level => $self->{level}->{must})
369 wakaba 1.4 if $s > 60; ## NOTE: Validness of leap seconds are not checked.
370     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
371 wakaba 1.18 level => $self->{level}->{must}) if $zh > 23;
372 wakaba 1.4 $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
373 wakaba 1.18 level => $self->{level}->{must}) if $zm > 59;
374 wakaba 1.4 } else {
375 wakaba 1.8 $self->{onerror}->(node => $item->{node},
376 wakaba 1.4 type => 'datetime:syntax error',
377 wakaba 1.18 level => $self->{level}->{must});
378 wakaba 1.4 }
379     ## NOTE: SHOULD be accurate as possible (cannot be checked)
380 wakaba 1.1
381 wakaba 1.8 $AtomChecker{check_end}->(@_);
382 wakaba 1.1 },
383 wakaba 1.8 ); # %AtomDateConstruct
384 wakaba 1.1
385 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
386 wakaba 1.8 %AtomChecker,
387 wakaba 1.1 is_root => 1,
388 wakaba 1.8 check_child_element => sub {
389     my ($self, $item, $child_el, $child_nsuri, $child_ln,
390     $child_is_transparent, $element_state) = @_;
391    
392     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
393    
394 wakaba 1.22 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
395     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
396 wakaba 1.8 $self->{onerror}->(node => $child_el,
397     type => 'element not allowed:minus',
398 wakaba 1.18 level => $self->{level}->{must});
399 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
400     #
401     } elsif ($child_nsuri eq $ATOM_NS) {
402     my $not_allowed;
403     if ({ # MUST (0, 1)
404     content => 1,
405     id => 1,
406     published => 1,
407     rights => 1,
408     source => 1,
409     summary => 1,
410     title => 1,
411     updated => 1,
412     }->{$child_ln}) {
413     unless ($element_state->{has_element}->{$child_ln}) {
414     $element_state->{has_element}->{$child_ln} = 1;
415     $not_allowed = $element_state->{has_element}->{entry};
416     } else {
417     $not_allowed = 1;
418     }
419     } elsif ($child_ln eq 'link') { # MAY
420     if ($child_el->rel eq $LINK_REL . 'alternate') {
421     my $type = $child_el->get_attribute_ns (undef, 'type');
422     $type = '' unless defined $type;
423     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
424     $hreflang = '' unless defined $hreflang;
425     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
426     (defined $hreflang ? ':'.$hreflang : '');
427     unless ($element_state->{has_element}->{$key}) {
428     $element_state->{has_element}->{$key} = 1;
429     $element_state->{has_element}->{'link.alternate'} = 1;
430 wakaba 1.1 } else {
431     $not_allowed = 1;
432     }
433     }
434 wakaba 1.8
435     ## NOTE: MAY
436     $not_allowed ||= $element_state->{has_element}->{entry};
437     } elsif ({ # MAY
438     category => 1,
439     contributor => 1,
440     }->{$child_ln}) {
441     $not_allowed = $element_state->{has_element}->{entry};
442 wakaba 1.9 } elsif ($child_ln eq 'author') { # MAY
443     $not_allowed = $element_state->{has_element}->{entry};
444 wakaba 1.12 $element_state->{has_author} = 1; # ./author | ./source/author
445     $element_state->{has_element}->{$child_ln} = 1; # ./author
446 wakaba 1.8 } else {
447     $not_allowed = 1;
448     }
449     if ($not_allowed) {
450 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
451     level => $self->{level}->{must});
452 wakaba 1.1 }
453 wakaba 1.15 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
454     ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
455     ## defined in RFC 4685.
456     #
457 wakaba 1.16 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
458     #
459 wakaba 1.8 } else {
460     ## TODO: extension element
461 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
462     level => $self->{level}->{must});
463 wakaba 1.8 }
464     },
465     check_child_text => sub {
466     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
467     if ($has_significant) {
468     $self->{onerror}->(node => $child_node, type => 'character not allowed',
469 wakaba 1.18 level => $self->{level}->{must});
470 wakaba 1.1 }
471 wakaba 1.8 },
472     check_end => sub {
473     my ($self, $item, $element_state) = @_;
474 wakaba 1.1
475 wakaba 1.9 if ($element_state->{has_author}) {
476     ## NOTE: There is either a child atom:author element
477     ## or a child atom:source element which contains an atom:author
478     ## child element.
479     #
480     } else {
481     A: {
482     my $root = $item->{node}->owner_document->document_element;
483     if ($root and $root->manakai_local_name eq 'feed') {
484     my $nsuri = $root->namespace_uri;
485     if (defined $nsuri and $nsuri eq $ATOM_NS) {
486     ## NOTE: An Atom Feed Document.
487     for my $root_child (@{$root->child_nodes}) {
488     ## NOTE: Entity references are not supported.
489     next unless $root_child->node_type == 1; # ELEMENT_NODE
490     next unless $root_child->manakai_local_name eq 'author';
491     my $root_child_nsuri = $root_child->namespace_uri;
492     next unless defined $root_child_nsuri;
493     next unless $root_child_nsuri eq $ATOM_NS;
494     last A;
495     }
496     }
497     }
498    
499     $self->{onerror}->(node => $item->{node},
500 wakaba 1.18 type => 'child element missing:atom',
501     text => 'author',
502     level => $self->{level}->{must});
503 wakaba 1.9 } # A
504     }
505    
506 wakaba 1.12 unless ($element_state->{has_element}->{author}) {
507     $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
508     }
509    
510 wakaba 1.1 ## TODO: If entry's with same id, then updated SHOULD be different
511    
512 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
513     $self->{onerror}->(node => $item->{node},
514 wakaba 1.18 type => 'child element missing:atom',
515     text => 'id',
516     level => $self->{level}->{must});
517 wakaba 1.8 }
518     unless ($element_state->{has_element}->{title}) { # MUST
519     $self->{onerror}->(node => $item->{node},
520 wakaba 1.18 type => 'child element missing:atom',
521     text => 'title',
522     level => $self->{level}->{must});
523 wakaba 1.8 }
524     unless ($element_state->{has_element}->{updated}) { # MUST
525     $self->{onerror}->(node => $item->{node},
526 wakaba 1.18 type => 'child element missing:atom',
527     text => 'updated',
528     level => $self->{level}->{must});
529 wakaba 1.8 }
530     if (not $element_state->{has_element}->{content} and
531     not $element_state->{has_element}->{'link.alternate'}) {
532     $self->{onerror}->(node => $item->{node},
533 wakaba 1.18 type => 'child element missing:atom:link:alternate',
534     level => $self->{level}->{must});
535 wakaba 1.1 }
536 wakaba 1.14
537     if ($element_state->{require_summary} and
538     not $element_state->{has_element}->{summary}) {
539     $self->{onerror}->(node => $item->{node},
540 wakaba 1.18 type => 'child element missing:atom',
541     text => 'summary',
542     level => $self->{level}->{must});
543 wakaba 1.14 }
544 wakaba 1.1 },
545     };
546    
547     $Element->{$ATOM_NS}->{feed} = {
548 wakaba 1.8 %AtomChecker,
549 wakaba 1.1 is_root => 1,
550 wakaba 1.8 check_child_element => sub {
551     my ($self, $item, $child_el, $child_nsuri, $child_ln,
552     $child_is_transparent, $element_state) = @_;
553    
554     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
555    
556 wakaba 1.22 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
557     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
558 wakaba 1.8 $self->{onerror}->(node => $child_el,
559     type => 'element not allowed:minus',
560 wakaba 1.18 level => $self->{level}->{must});
561 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
562     #
563     } elsif ($child_nsuri eq $ATOM_NS) {
564     my $not_allowed;
565     if ($child_ln eq 'entry') {
566     $element_state->{has_element}->{entry} = 1;
567     } elsif ({ # MUST (0, 1)
568     generator => 1,
569     icon => 1,
570     id => 1,
571     logo => 1,
572     rights => 1,
573     subtitle => 1,
574     title => 1,
575     updated => 1,
576     }->{$child_ln}) {
577     unless ($element_state->{has_element}->{$child_ln}) {
578     $element_state->{has_element}->{$child_ln} = 1;
579     $not_allowed = $element_state->{has_element}->{entry};
580     } else {
581     $not_allowed = 1;
582     }
583     } elsif ($child_ln eq 'link') {
584     my $rel = $child_el->rel;
585     if ($rel eq $LINK_REL . 'alternate') {
586     my $type = $child_el->get_attribute_ns (undef, 'type');
587     $type = '' unless defined $type;
588     my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
589     $hreflang = '' unless defined $hreflang;
590     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
591     (defined $hreflang ? ':'.$hreflang : '');
592     unless ($element_state->{has_element}->{$key}) {
593     $element_state->{has_element}->{$key} = 1;
594 wakaba 1.1 } else {
595     $not_allowed = 1;
596     }
597 wakaba 1.8 } elsif ($rel eq $LINK_REL . 'self') {
598     $element_state->{has_element}->{'link.self'} = 1;
599 wakaba 1.1 }
600 wakaba 1.8
601     ## NOTE: MAY
602     $not_allowed = $element_state->{has_element}->{entry};
603     } elsif ({ # MAY
604     category => 1,
605     contributor => 1,
606     }->{$child_ln}) {
607     $not_allowed = $element_state->{has_element}->{entry};
608 wakaba 1.12 } elsif ($child_ln eq 'author') { # MAY
609     $not_allowed = $element_state->{has_element}->{entry};
610     $element_state->{has_element}->{author} = 1;
611 wakaba 1.8 } else {
612     $not_allowed = 1;
613 wakaba 1.1 }
614 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
615     level => $self->{level}->{must})
616 wakaba 1.8 if $not_allowed;
617     } else {
618     ## TODO: extension element
619 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
620     level => $self->{level}->{must});
621 wakaba 1.1 }
622 wakaba 1.8 },
623     check_child_text => sub {
624     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
625     if ($has_significant) {
626     $self->{onerror}->(node => $child_node, type => 'character not allowed',
627 wakaba 1.18 level => $self->{level}->{must});
628 wakaba 1.8 }
629     },
630     check_end => sub {
631     my ($self, $item, $element_state) = @_;
632 wakaba 1.10
633 wakaba 1.12 if ($element_state->{has_no_author_entry} and
634     not $element_state->{has_element}->{author}) {
635 wakaba 1.10 $self->{onerror}->(node => $item->{node},
636 wakaba 1.18 type => 'child element missing:atom',
637     text => 'author',
638     level => $self->{level}->{must});
639 wakaba 1.11 ## ISSUE: If there is no |atom:entry| element,
640     ## there should be an |atom:author| element?
641 wakaba 1.10 }
642 wakaba 1.1
643     ## TODO: If entry's with same id, then updated SHOULD be different
644    
645 wakaba 1.8 unless ($element_state->{has_element}->{id}) { # MUST
646     $self->{onerror}->(node => $item->{node},
647 wakaba 1.18 type => 'child element missing:atom',
648     text => 'id',
649     level => $self->{level}->{must});
650 wakaba 1.1 }
651 wakaba 1.8 unless ($element_state->{has_element}->{title}) { # MUST
652     $self->{onerror}->(node => $item->{node},
653 wakaba 1.18 type => 'child element missing:atom',
654     text => 'title',
655     level => $self->{level}->{must});
656 wakaba 1.1 }
657 wakaba 1.8 unless ($element_state->{has_element}->{updated}) { # MUST
658     $self->{onerror}->(node => $item->{node},
659 wakaba 1.18 type => 'element missing:atom',
660     text => 'updated',
661     level => $self->{level}->{must});
662 wakaba 1.1 }
663 wakaba 1.8 unless ($element_state->{has_element}->{'link.self'}) {
664 wakaba 1.18 $self->{onerror}->(node => $item->{node},
665     type => 'element missing:atom:link:self',
666     level => $self->{level}->{should});
667 wakaba 1.3 }
668 wakaba 1.1
669 wakaba 1.8 $AtomChecker{check_end}->(@_);
670 wakaba 1.1 },
671     };
672    
673     $Element->{$ATOM_NS}->{content} = {
674 wakaba 1.8 %AtomChecker,
675     check_start => sub {
676     my ($self, $item, $element_state) = @_;
677     $element_state->{type} = 'text';
678 wakaba 1.13 $element_state->{value} = '';
679 wakaba 1.8 },
680     check_attrs => $GetAtomAttrsChecker->({
681     src => sub {
682     my ($self, $attr, $item, $element_state) = @_;
683    
684     $element_state->{has_src} = 1;
685 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
686 wakaba 1.8
687     ## NOTE: There MUST NOT be any white space.
688     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
689 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
690 wakaba 1.19 }, $self->{level});
691 wakaba 1.8 },
692     type => sub {
693     my ($self, $attr, $item, $element_state) = @_;
694    
695     $element_state->{has_type} = 1;
696 wakaba 1.1
697 wakaba 1.8 my $value = $attr->value;
698 wakaba 1.1 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
699     # MUST
700     } else {
701 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
702     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
703     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
704     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
705     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
706     my @type = ($1, $2);
707     my $param = $3;
708     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
709     if (defined $2) {
710     push @type, $1 => $2;
711     } else {
712     my $n = $1;
713     my $v = $2;
714     $v =~ s/\\(.)/$1/gs;
715     push @type, $n => $v;
716     }
717     }
718     require Whatpm::IMTChecker;
719 wakaba 1.20 my $ic = Whatpm::IMTChecker->new;
720     $ic->{level} = $self->{level};
721     $ic->check_imt (sub {
722 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
723 wakaba 1.3 }, @type);
724     } else {
725 wakaba 1.8 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
726 wakaba 1.18 level => $self->{level}->{must});
727 wakaba 1.3 }
728 wakaba 1.1 }
729    
730 wakaba 1.17 if ({text => 1, html => 1, xhtml => 1}->{$value}) {
731     #
732     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
733 wakaba 1.8 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
734     ## Is |application/xml-dtd| an XML media type?
735     $value = 'xml';
736     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
737     $value = 'mime_text';
738     } elsif ($value =~ m!^(?>message|multipart)/!i) {
739     $self->{onerror}->(node => $attr, type => 'IMT:composite',
740 wakaba 1.18 level => $self->{level}->{must});
741 wakaba 1.14 $item->{parent_state}->{require_summary} = 1;
742     } else {
743     $item->{parent_state}->{require_summary} = 1;
744 wakaba 1.8 }
745 wakaba 1.1
746 wakaba 1.8 $element_state->{type} = $value;
747     },
748     }, {
749     src => FEATURE_RFC4287,
750     type => FEATURE_RFC4287,
751     }),
752     check_child_element => sub {
753     my ($self, $item, $child_el, $child_nsuri, $child_ln,
754     $child_is_transparent, $element_state) = @_;
755    
756 wakaba 1.22 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
757     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
758 wakaba 1.8 $self->{onerror}->(node => $child_el,
759     type => 'element not allowed:minus',
760 wakaba 1.18 level => $self->{level}->{must});
761 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
762     #
763     } else {
764     if ($element_state->{type} eq 'text' or
765     $element_state->{type} eq 'html' or
766     $element_state->{type} eq 'mime_text') {
767     # MUST NOT
768     $self->{onerror}->(node => $child_el,
769     type => 'element not allowed:atom|content',
770 wakaba 1.18 level => $self->{level}->{must});
771 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml') {
772     if ($element_state->{has_div}) {
773     $self->{onerror}->(node => $child_el,
774     type => 'element not allowed:atom|content',
775 wakaba 1.18 level => $self->{level}->{must});
776 wakaba 1.8 } else {
777     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
778     $element_state->{has_div} = 1;
779     }
780     } elsif ($element_state->{type} eq 'xml') {
781     ## MAY contain elements
782     if ($element_state->{has_src}) {
783     $self->{onerror}->(node => $child_el,
784     type => 'element not allowed:atom|content',
785 wakaba 1.18 level => $self->{level}->{must});
786 wakaba 1.1 }
787 wakaba 1.8 } else {
788     ## NOTE: Elements are not explicitly disallowed.
789 wakaba 1.1 }
790 wakaba 1.8 }
791     },
792     ## NOTE: If @src, the element MUST be empty. What is "empty"?
793     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
794     ## empty replacement tree shuld be empty, since Atom is defined
795     ## in terms of XML Information Set where entities are expanded.
796     ## (but what if |&e;| is an unexpanded entity?)
797     check_child_text => sub {
798     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
799     if ($has_significant) {
800     if ($element_state->{has_src}) {
801     $self->{onerror}->(node => $child_node,
802 wakaba 1.18 type => 'character not allowed:empty',
803     level => $self->{level}->{must});
804 wakaba 1.8 } elsif ($element_state->{type} eq 'xhtml' or
805     $element_state->{type} eq 'xml') {
806     $self->{onerror}->(node => $child_node,
807     type => 'character not allowed:atom|content',
808 wakaba 1.18 level => $self->{level}->{must});
809 wakaba 1.1 }
810 wakaba 1.8 }
811 wakaba 1.1
812 wakaba 1.13 $element_state->{value} .= $child_node->data;
813 wakaba 1.1
814 wakaba 1.8 ## NOTE: type=text/* has no further restriction (i.e. the content don't
815     ## have to conform to the definition of the type).
816     },
817     check_end => sub {
818     my ($self, $item, $element_state) = @_;
819 wakaba 1.1
820 wakaba 1.8 if ($element_state->{has_src}) {
821     if (not $element_state->{has_type}) {
822     $self->{onerror}->(node => $item->{node},
823 wakaba 1.18 type => 'attribute missing',
824     text => 'type',
825     level => $self->{level}->{should});
826 wakaba 1.14 } elsif ($element_state->{type} eq 'text' or
827     $element_state->{type} eq 'html' or
828     $element_state->{type} eq 'xhtml') {
829 wakaba 1.8 $self->{onerror}
830     ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
831 wakaba 1.18 type => 'not IMT', level => $self->{level}->{must});
832 wakaba 1.1 }
833 wakaba 1.8 }
834 wakaba 1.1
835 wakaba 1.8 if ($element_state->{type} eq 'xhtml') {
836     unless ($element_state->{has_div}) {
837     $self->{onerror}->(node => $item->{node},
838 wakaba 1.21 type => 'child element missing',
839 wakaba 1.18 text => 'div',
840     level => $self->{level}->{must});
841 wakaba 1.1 }
842 wakaba 1.11 } elsif ($element_state->{type} eq 'html') {
843     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
844     # markup MUST be escaped
845 wakaba 1.13 $self->{onsubdoc}->({s => $element_state->{value},
846 wakaba 1.11 container_node => $item->{node},
847     media_type => 'text/html',
848     inner_html_element => 'div',
849     is_char_string => 1});
850 wakaba 1.8 } elsif ($element_state->{type} eq 'xml') {
851 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
852 wakaba 1.1 ## If no @src, this would normally mean it contains a
853     ## single child element that would serve as the root element.
854 wakaba 1.8 $self->{onerror}->(node => $item->{node},
855     type => 'atom|content not supported',
856 wakaba 1.18 text => $item->{node}->get_attribute_ns
857     (undef, 'type'),
858     level => $self->{level}->{uncertain});
859 wakaba 1.8 } elsif ($element_state->{type} eq 'text' or
860     $element_state->{type} eq 'mime-text') {
861     #
862 wakaba 1.1 } else {
863     ## TODO: $s = valid Base64ed [RFC 3548] where
864     ## MAY leading and following "white space" (what?)
865     ## and lines separated by a single U+000A
866 wakaba 1.3
867     ## NOTE: SHOULD be suitable for the indicated media type.
868 wakaba 1.8 $self->{onerror}->(node => $item->{node},
869     type => 'atom|content not supported',
870 wakaba 1.18 text => $item->{node}->get_attribute_ns
871     (undef, 'type'),
872     level => $self->{level}->{uncertain});
873 wakaba 1.8 }
874 wakaba 1.1
875 wakaba 1.8 $AtomChecker{check_end}->(@_);
876 wakaba 1.1 },
877     };
878 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
879 wakaba 1.1
880 wakaba 1.8 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
881 wakaba 1.1
882     $Element->{$ATOM_NS}->{category} = {
883 wakaba 1.8 %AtomChecker,
884     check_attrs => $GetAtomAttrsChecker->({
885 wakaba 1.1 label => sub { 1 }, # no value constraint
886 wakaba 1.2 scheme => sub { # NOTE: No MUST.
887     my ($self, $attr) = @_;
888     ## NOTE: There MUST NOT be any white space.
889     Whatpm::URIChecker->check_iri ($attr->value, sub {
890 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
891 wakaba 1.19 }, $self->{level});
892 wakaba 1.2 },
893 wakaba 1.8 term => sub {
894     my ($self, $attr, $item, $element_state) = @_;
895    
896     ## NOTE: No value constraint.
897    
898     $element_state->{has_term} = 1;
899     },
900     }, {
901     label => FEATURE_RFC4287,
902     scheme => FEATURE_RFC4287,
903     term => FEATURE_RFC4287,
904 wakaba 1.1 }),
905 wakaba 1.8 check_end => sub {
906     my ($self, $item, $element_state) = @_;
907     unless ($element_state->{has_term}) {
908     $self->{onerror}->(node => $item->{node},
909 wakaba 1.18 type => 'attribute missing',
910     text => 'term',
911     level => $self->{level}->{must});
912 wakaba 1.1 }
913    
914 wakaba 1.8 $AtomChecker{check_end}->(@_);
915 wakaba 1.1 },
916 wakaba 1.8 ## NOTE: Meaning of content is not defined.
917 wakaba 1.1 };
918    
919 wakaba 1.8 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
920 wakaba 1.6
921     ## TODO: Anything below does not support <html:nest/> yet.
922 wakaba 1.1
923     $Element->{$ATOM_NS}->{generator} = {
924 wakaba 1.8 %AtomChecker,
925     check_attrs => $GetAtomAttrsChecker->({
926 wakaba 1.2 uri => sub { # MUST
927     my ($self, $attr) = @_;
928     ## NOTE: There MUST NOT be any white space.
929     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
930 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
931 wakaba 1.19 }, $self->{level});
932 wakaba 1.2 ## NOTE: Dereferencing SHOULD produce a representation
933     ## that is relevant to the agent.
934     },
935 wakaba 1.1 version => sub { 1 }, # no value constraint
936 wakaba 1.8 }, {
937     uri => FEATURE_RFC4287,
938     version => FEATURE_RFC4287,
939 wakaba 1.1 }),
940    
941 wakaba 1.8 ## NOTE: Elements are not explicitly disallowed.
942 wakaba 1.1
943 wakaba 1.8 ## NOTE: Content MUST be a string that is a human-readable name for
944     ## the generating agent.
945 wakaba 1.1 };
946    
947     $Element->{$ATOM_NS}->{icon} = {
948 wakaba 1.8 %AtomChecker,
949     check_start => sub {
950     my ($self, $item, $element_state) = @_;
951     $element_state->{value} = '';
952     },
953     ## NOTE: Elements are not explicitly disallowed.
954     check_child_text => sub {
955     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
956     $element_state->{value} .= $child_node->data;
957     },
958     check_end => sub {
959     my ($self, $item, $element_state) = @_;
960 wakaba 1.1
961 wakaba 1.2 ## NOTE: No MUST.
962     ## NOTE: There MUST NOT be any white space.
963 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
964 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
965 wakaba 1.19 }, $self->{level});
966 wakaba 1.2
967 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
968    
969 wakaba 1.8 $AtomChecker{check_end}->(@_);
970 wakaba 1.1 },
971     };
972    
973     $Element->{$ATOM_NS}->{id} = {
974 wakaba 1.8 %AtomChecker,
975     check_start => sub {
976     my ($self, $item, $element_state) = @_;
977     $element_state->{value} = '';
978     },
979     ## NOTE: Elements are not explicitly disallowed.
980     check_child_text => sub {
981     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
982     $element_state->{value} .= $child_node->data;
983     },
984     check_end => sub {
985     my ($self, $item, $element_state) = @_;
986 wakaba 1.2
987     ## NOTE: There MUST NOT be any white space.
988 wakaba 1.8 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
989 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
990 wakaba 1.19 }, $self->{level});
991 wakaba 1.2 ## TODO: SHOULD be normalized
992    
993 wakaba 1.8 $AtomChecker{check_end}->(@_);
994 wakaba 1.2 },
995     };
996    
997 wakaba 1.15 my $AtomIMTAttrChecker = sub {
998 wakaba 1.3 my ($self, $attr) = @_;
999     my $value = $attr->value;
1000     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1001     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1002     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1003     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1004     my @type = ($1, $2);
1005     my $param = $3;
1006     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1007     if (defined $2) {
1008     push @type, $1 => $2;
1009     } else {
1010     my $n = $1;
1011     my $v = $2;
1012     $v =~ s/\\(.)/$1/gs;
1013     push @type, $n => $v;
1014     }
1015     }
1016     require Whatpm::IMTChecker;
1017 wakaba 1.20 my $ic = Whatpm::IMTChecker->new;
1018     $ic->{level} = $self->{level};
1019     $ic->check_imt (sub {
1020 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1021 wakaba 1.3 }, @type);
1022     } else {
1023 wakaba 1.18 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
1024     level => $self->{level}->{must});
1025 wakaba 1.3 }
1026 wakaba 1.15 }; # $AtomIMTAttrChecker
1027    
1028     my $AtomIRIReferenceAttrChecker = sub {
1029     my ($self, $attr) = @_;
1030     ## NOTE: There MUST NOT be any white space.
1031     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1032 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1033 wakaba 1.19 }, $self->{level});
1034 wakaba 1.15 }; # $AtomIRIReferenceAttrChecker
1035    
1036     $Element->{$ATOM_NS}->{link} = {
1037     %AtomChecker,
1038     check_attrs => $GetAtomAttrsChecker->({
1039     href => $AtomIRIReferenceAttrChecker,
1040     hreflang => $AtomLanguageTagAttrChecker,
1041     length => sub { }, # No MUST; in octets.
1042     rel => sub { # MUST
1043     my ($self, $attr) = @_;
1044     my $value = $attr->value;
1045     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1046     $value = $LINK_REL . $value;
1047     }
1048    
1049     ## NOTE: There MUST NOT be any white space.
1050     Whatpm::URIChecker->check_iri ($value, sub {
1051 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1052 wakaba 1.19 }, $self->{level});
1053 wakaba 1.15
1054     ## TODO: Warn if unregistered
1055 wakaba 1.16
1056     ## TODO: rel=license [RFC 4946]
1057     ## MUST NOT multiple rel=license with same href="",type="" pairs
1058     ## href="" SHOULD be dereferencable
1059     ## title="" SHOULD be there if multiple rel=license
1060     ## MUST NOT "unspecified" and other rel=license
1061 wakaba 1.3 },
1062 wakaba 1.15 title => sub { }, # No MUST
1063     type => $AtomIMTAttrChecker,
1064     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1065 wakaba 1.8 }, {
1066     href => FEATURE_RFC4287,
1067     hreflang => FEATURE_RFC4287,
1068     length => FEATURE_RFC4287,
1069     rel => FEATURE_RFC4287,
1070     title => FEATURE_RFC4287,
1071     type => FEATURE_RFC4287,
1072 wakaba 1.16
1073     ## TODO: thr:count
1074     ## TODO: thr:updated
1075 wakaba 1.2 }),
1076 wakaba 1.8 check_start => sub {
1077     my ($self, $item, $element_state) = @_;
1078 wakaba 1.2
1079 wakaba 1.8 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1080     $self->{onerror}->(node => $item->{node},
1081 wakaba 1.18 type => 'attribute missing',
1082     text => 'href',
1083     level => $self->{level}->{must});
1084 wakaba 1.2 }
1085    
1086 wakaba 1.8 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1087     not $item->{node}->has_attribute_ns (undef, 'length')) {
1088 wakaba 1.18 $self->{onerror}->(node => $item->{node},
1089     type => 'attribute missing',
1090     text => 'length',
1091     level => $self->{level}->{should});
1092 wakaba 1.2 }
1093     },
1094     };
1095    
1096     $Element->{$ATOM_NS}->{logo} = {
1097 wakaba 1.8 %AtomChecker,
1098     ## NOTE: Child elements are not explicitly disallowed
1099     check_start => sub {
1100     my ($self, $item, $element_state) = @_;
1101     $element_state->{value} = '';
1102     },
1103     ## NOTE: Elements are not explicitly disallowed.
1104     check_child_text => sub {
1105     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1106     $element_state->{value} .= $child_node->data;
1107     },
1108     check_end => sub {
1109     my ($self, $item, $element_state) = @_;
1110 wakaba 1.1
1111 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1112 wakaba 1.8 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1113 wakaba 1.18 $self->{onerror}->(@_, node => $item->{node});
1114 wakaba 1.19 }, $self->{level});
1115 wakaba 1.2
1116     ## NOTE: Image SHOULD be 2:1
1117    
1118 wakaba 1.8 $AtomChecker{check_end}->(@_);
1119 wakaba 1.2 },
1120     };
1121    
1122 wakaba 1.8 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1123 wakaba 1.2
1124 wakaba 1.17 $Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct;
1125 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1126 wakaba 1.2
1127     $Element->{$ATOM_NS}->{source} = {
1128 wakaba 1.8 %AtomChecker,
1129     check_child_element => sub {
1130     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1131     $child_is_transparent, $element_state) = @_;
1132    
1133 wakaba 1.22 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1134     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1135 wakaba 1.8 $self->{onerror}->(node => $child_el,
1136     type => 'element not allowed:minus',
1137 wakaba 1.18 level => $self->{level}->{must});
1138 wakaba 1.8 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1139     #
1140     } elsif ($child_nsuri eq $ATOM_NS) {
1141     my $not_allowed;
1142     if ($child_ln eq 'entry') {
1143     $element_state->{has_element}->{entry} = 1;
1144     } elsif ({
1145     generator => 1,
1146     icon => 1,
1147     id => 1,
1148     logo => 1,
1149     rights => 1,
1150     subtitle => 1,
1151     title => 1,
1152     updated => 1,
1153     }->{$child_ln}) {
1154     unless ($element_state->{has_element}->{$child_ln}) {
1155     $element_state->{has_element}->{$child_ln} = 1;
1156     $not_allowed = $element_state->{has_element}->{entry};
1157     } else {
1158     $not_allowed = 1;
1159     }
1160     } elsif ($child_ln eq 'link') {
1161 wakaba 1.17 if ($child_el->rel eq $LINK_REL . 'alternate') {
1162     my $type = $child_el->get_attribute_ns (undef, 'type');
1163 wakaba 1.8 $type = '' unless defined $type;
1164 wakaba 1.17 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
1165 wakaba 1.8 $hreflang = '' unless defined $hreflang;
1166     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1167     (defined $hreflang ? ':'.$hreflang : '');
1168     unless ($element_state->{has_element}->{$key}) {
1169     $element_state->{has_element}->{$key} = 1;
1170 wakaba 1.2 } else {
1171     $not_allowed = 1;
1172     }
1173     }
1174 wakaba 1.8 $not_allowed ||= $element_state->{has_element}->{entry};
1175     } elsif ({
1176     category => 1,
1177     contributor => 1,
1178     }->{$child_ln}) {
1179     $not_allowed = $element_state->{has_element}->{entry};
1180 wakaba 1.9 } elsif ($child_ln eq 'author') {
1181     $not_allowed = $element_state->{has_element}->{entry};
1182     $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1183 wakaba 1.8 } else {
1184     $not_allowed = 1;
1185     }
1186     if ($not_allowed) {
1187 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1188     level => $self->{level}->{must});
1189 wakaba 1.2 }
1190 wakaba 1.8 } else {
1191     ## TODO: extension element
1192 wakaba 1.18 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1193     level => $self->{level}->{must});
1194 wakaba 1.8 }
1195     },
1196     check_child_text => sub {
1197     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1198     if ($has_significant) {
1199     $self->{onerror}->(node => $child_node, type => 'character not allowed',
1200 wakaba 1.18 level => $self->{level}->{must});
1201 wakaba 1.2 }
1202 wakaba 1.1 },
1203     };
1204 wakaba 1.2
1205 wakaba 1.8 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1206 wakaba 1.2
1207 wakaba 1.8 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1208 wakaba 1.2
1209 wakaba 1.8 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1210 wakaba 1.2
1211 wakaba 1.8 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1212 wakaba 1.2
1213     ## TODO: signature element
1214    
1215     ## TODO: simple extension element and structured extension element
1216 wakaba 1.1
1217 wakaba 1.15 ## -- Atom Threading 1.0 [RFC 4685]
1218    
1219     $Element->{$THR_NS}->{''} = {
1220     %AtomChecker,
1221     status => 0,
1222     };
1223    
1224     ## ISSUE: Strictly speaking, thr:* element/attribute,
1225     ## where * is an undefined local name, is not disallowed.
1226    
1227     $Element->{$THR_NS}->{'in-reply-to'} = {
1228     %AtomChecker,
1229     status => FEATURE_RFC4685,
1230     check_attrs => $GetAtomAttrsChecker->({
1231     href => $AtomIRIReferenceAttrChecker,
1232     ## TODO: fact-level.
1233     ## TODO: MUST be dereferencable.
1234     ref => sub {
1235     my ($self, $attr, $item, $element_state) = @_;
1236     $element_state->{has_ref} = 1;
1237    
1238     ## NOTE: Same as |atom:id|.
1239     ## NOTE: There MUST NOT be any white space.
1240     Whatpm::URIChecker->check_iri ($attr->value, sub {
1241 wakaba 1.18 $self->{onerror}->(@_, node => $attr);
1242 wakaba 1.19 }, $self->{level});
1243 wakaba 1.15
1244     ## TODO: Check against ID guideline...
1245     },
1246     source => $AtomIRIReferenceAttrChecker,
1247     ## TODO: fact-level.
1248     ## TODO: MUST be dereferencable.
1249     type => $AtomIMTAttrChecker,
1250     ## TODO: fact-level.
1251     }, {
1252     href => FEATURE_RFC4685,
1253     source => FEATURE_RFC4685,
1254     ref => FEATURE_RFC4685,
1255     type => FEATURE_RFC4685,
1256     }),
1257     check_end => sub {
1258     my ($self, $item, $element_state) = @_;
1259    
1260     unless ($element_state->{has_ref}) {
1261     $self->{onerror}->(node => $item->{node},
1262 wakaba 1.18 type => 'attribute missing',
1263     text => 'ref',
1264     level => $self->{level}->{must});
1265 wakaba 1.15 }
1266    
1267     $AtomChecker{check_end}->(@_);
1268     },
1269     ## NOTE: Content model has no constraint.
1270     };
1271 wakaba 1.16
1272     $Element->{$THR_NS}->{total} = {
1273     %AtomChecker,
1274     check_start => sub {
1275     my ($self, $item, $element_state) = @_;
1276     $element_state->{value} = '';
1277     },
1278     check_child_element => sub {
1279     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1280     $child_is_transparent, $element_state) = @_;
1281    
1282 wakaba 1.22 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1283     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1284 wakaba 1.16 $self->{onerror}->(node => $child_el,
1285     type => 'element not allowed:minus',
1286 wakaba 1.18 level => $self->{level}->{must});
1287 wakaba 1.16 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1288     #
1289     } else {
1290     $self->{onerror}->(node => $child_el,
1291     type => 'element not allowed',
1292 wakaba 1.18 level => $self->{level}->{must});
1293 wakaba 1.16 }
1294     },
1295     check_child_text => sub {
1296     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1297     $element_state->{value} .= $child_node->data;
1298     },
1299     check_end => sub {
1300     my ($self, $item, $element_state) = @_;
1301    
1302     ## NOTE: xsd:nonNegativeInteger
1303     unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1304     $self->{onerror}->(node => $item->{node},
1305 wakaba 1.18 type => 'invalid attribute value',
1306     level => $self->{level}->{must});
1307 wakaba 1.16 }
1308    
1309     $AtomChecker{check_end}->(@_);
1310     },
1311     };
1312    
1313     ## TODO: fh:complete
1314    
1315     ## TODO: fh:archive
1316    
1317     ## TODO: Check as archive document, page feed document, ...
1318    
1319     ## TODO: APP [RFC 5023]
1320 wakaba 1.15
1321 wakaba 1.1 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1322 wakaba 1.15 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1323 wakaba 1.1
1324     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24