/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (show annotations) (download)
Sat Sep 20 06:10:18 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.21: +16 -7 lines
++ whatpm/t/ChangeLog	20 Sep 2008 05:50:38 -0000
	* content-model-1.dat: Test data for interactive contents are
	added (cf. HTML5 revision 2018).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	20 Sep 2008 05:46:21 -0000
2008-09-20  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm ($IsInHTMLInteractiveContent): New.

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Sep 2008 05:51:55 -0000
	* HTML.pm, Atom.pm: Interactrive content implementation synced
	with the spec (HTML5 revision 2018).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::ContentChecker;
2 use strict;
3 require Whatpm::ContentChecker;
4
5 require Whatpm::URIChecker;
6
7 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 my $THR_NS = q<http://purl.org/syndication/thread/1.0>;
9 my $FH_NS = q<http://purl.org/syndication/history/1.0>;
10 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
11
12 sub FEATURE_RFC4287 () {
13 Whatpm::ContentChecker::FEATURE_STATUS_CR |
14 Whatpm::ContentChecker::FEATURE_ALLOWED
15 }
16
17 sub FEATURE_RFC4685 () {
18 Whatpm::ContentChecker::FEATURE_STATUS_CR |
19 Whatpm::ContentChecker::FEATURE_ALLOWED
20 }
21
22 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
23
24 ## NOTE: Commants and PIs are not explicitly allowed.
25
26 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
27
28 our $AttrChecker;
29
30 ## Any element MAY have xml:base, xml:lang
31 my $GetAtomAttrsChecker = sub {
32 my $element_specific_checker = shift;
33 my $element_specific_status = shift;
34 return sub {
35 my ($self, $todo, $element_state) = @_;
36 for my $attr (@{$todo->{node}->attributes}) {
37 my $attr_ns = $attr->namespace_uri;
38 $attr_ns = '' unless defined $attr_ns;
39 my $attr_ln = $attr->manakai_local_name;
40 my $checker;
41 if ($attr_ns eq '') {
42 $checker = $element_specific_checker->{$attr_ln};
43 } else {
44 $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
45 || $AttrChecker->{$attr_ns}->{''};
46 }
47 if ($checker) {
48 $checker->($self, $attr, $todo, $element_state);
49 } elsif ($attr_ln eq '') {
50 #
51 } else {
52 $self->{onerror}->(node => $attr,
53 type => 'unknown attribute',
54 level => $self->{level}->{uncertain});
55 ## ISSUE: No comformance createria for unknown attributes in the spec
56 }
57
58 if ($attr_ns eq '') {
59 $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
60 }
61 ## TODO: global attribute
62 }
63 };
64 }; # $GetAtomAttrsChecker
65
66 my $AtomLanguageTagAttrChecker = sub {
67 ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
68
69 my ($self, $attr) = @_;
70 my $value = $attr->value;
71 require Whatpm::LangTag;
72 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
73 $self->{onerror}->(@_, node => $attr);
74 }, $self->{level});
75 ## ISSUE: RFC 4646 (3066bis)?
76 }; # $AtomLanguageTagAttrChecker
77
78 my %AtomChecker = (
79 %Whatpm::ContentChecker::AnyChecker,
80 status => FEATURE_RFC4287,
81 check_attrs => $GetAtomAttrsChecker->({}, {}),
82 );
83
84 my %AtomTextConstruct = (
85 %AtomChecker,
86 check_start => sub {
87 my ($self, $item, $element_state) = @_;
88 $element_state->{type} = 'text';
89 $element_state->{value} = '';
90 },
91 check_attrs => $GetAtomAttrsChecker->({
92 type => sub {
93 my ($self, $attr, $item, $element_state) = @_;
94 my $value = $attr->value;
95 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
96 $element_state->{type} = $value;
97 } else {
98 ## NOTE: IMT MUST NOT be used here.
99 $self->{onerror}->(node => $attr,
100 type => 'invalid attribute value',
101 level => $self->{level}->{must});
102 }
103 }, # checked in |checker|
104 }, {
105 type => FEATURE_RFC4287,
106 }),
107 check_child_element => sub {
108 my ($self, $item, $child_el, $child_nsuri, $child_ln,
109 $child_is_transparent, $element_state) = @_;
110 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
111 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
112 $self->{onerror}->(node => $child_el,
113 type => 'element not allowed:minus',
114 level => $self->{level}->{must});
115 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
116 #
117 } else {
118 if ($element_state->{type} eq 'text' or
119 $element_state->{type} eq 'html') { # MUST NOT
120 $self->{onerror}->(node => $child_el,
121 type => 'element not allowed:atom|TextConstruct',
122 level => $self->{level}->{must});
123 } elsif ($element_state->{type} eq 'xhtml') {
124 if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
125 $child_ln eq 'div') { # MUST
126 if ($element_state->{has_div}) {
127 $self->{onerror}
128 ->(node => $child_el,
129 type => 'element not allowed:atom|TextConstruct',
130 level => $self->{level}->{must});
131 } else {
132 $element_state->{has_div} = 1;
133 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
134 }
135 } else {
136 $self->{onerror}->(node => $child_el,
137 type => 'element not allowed:atom|TextConstruct',
138 level => $self->{level}->{must});
139 }
140 } else {
141 die "atom:TextConstruct type error: $element_state->{type}";
142 }
143 }
144 },
145 check_child_text => sub {
146 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
147 if ($element_state->{type} eq 'text') {
148 #
149 } elsif ($element_state->{type} eq 'html') {
150 $element_state->{value} .= $child_node->text_content;
151 ## NOTE: Markup MUST be escaped.
152 } elsif ($element_state->{type} eq 'xhtml') {
153 if ($has_significant) {
154 $self->{onerror}->(node => $child_node,
155 type => 'character not allowed:atom|TextConstruct',
156 level => $self->{level}->{must});
157 }
158 } else {
159 die "atom:TextConstruct type error: $element_state->{type}";
160 }
161 },
162 check_end => sub {
163 my ($self, $item, $element_state) = @_;
164 if ($element_state->{type} eq 'xhtml') {
165 unless ($element_state->{has_div}) {
166 $self->{onerror}->(node => $item->{node},
167 type => 'child element missing',
168 text => 'div',
169 level => $self->{level}->{must});
170 }
171 } elsif ($element_state->{type} eq 'html') {
172 ## TODO: SHOULD be suitable for handling as HTML [HTML4]
173 # markup MUST be escaped
174 $self->{onsubdoc}->({s => $element_state->{value},
175 container_node => $item->{node},
176 media_type => 'text/html',
177 inner_html_element => 'div',
178 is_char_string => 1});
179 }
180
181 $AtomChecker{check_end}->(@_);
182 },
183 ); # %AtomTextConstruct
184
185 my %AtomPersonConstruct = (
186 %AtomChecker,
187 check_child_element => sub {
188 my ($self, $item, $child_el, $child_nsuri, $child_ln,
189 $child_is_transparent, $element_state) = @_;
190 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
191 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
192 $self->{onerror}->(node => $child_el,
193 type => 'element not allowed:minus',
194 level => $self->{level}->{must});
195 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
196 #
197 } elsif ($child_nsuri eq $ATOM_NS) {
198 if ($child_ln eq 'name') {
199 if ($element_state->{has_name}) {
200 $self->{onerror}
201 ->(node => $child_el,
202 type => 'element not allowed:atom|PersonConstruct',
203 level => $self->{level}->{must});
204 } else {
205 $element_state->{has_name} = 1;
206 }
207 } elsif ($child_ln eq 'uri') {
208 if ($element_state->{has_uri}) {
209 $self->{onerror}
210 ->(node => $child_el,
211 type => 'element not allowed:atom|PersonConstruct',
212 level => $self->{level}->{must});
213 } else {
214 $element_state->{has_uri} = 1;
215 }
216 } elsif ($child_ln eq 'email') {
217 if ($element_state->{has_email}) {
218 $self->{onerror}
219 ->(node => $child_el,
220 type => 'element not allowed:atom|PersonConstruct',
221 level => $self->{level}->{must});
222 } else {
223 $element_state->{has_email} = 1;
224 }
225 } else {
226 $self->{onerror}
227 ->(node => $child_el,
228 type => 'element not allowed:atom|PersonConstruct',
229 level => $self->{level}->{must});
230 }
231 } else {
232 $self->{onerror}
233 ->(node => $child_el,
234 type => 'element not allowed:atom|PersonConstruct',
235 level => $self->{level}->{must});
236 }
237 ## TODO: extension element
238 },
239 check_child_text => sub {
240 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
241 if ($has_significant) {
242 $self->{onerror}->(node => $child_node,
243 type => 'character not allowed:atom|PersonConstruct',
244 level => $self->{level}->{must});
245 }
246 },
247 check_end => sub {
248 my ($self, $item, $element_state) = @_;
249
250 unless ($element_state->{has_name}) {
251 $self->{onerror}->(node => $item->{node},
252 type => 'child element missing:atom',
253 text => 'name',
254 level => $self->{level}->{must});
255 }
256
257 $AtomChecker{check_end}->(@_);
258 },
259 ); # %AtomPersonConstruct
260
261 our $Element;
262
263 $Element->{$ATOM_NS}->{''} = {
264 %AtomChecker,
265 status => 0,
266 };
267
268 $Element->{$ATOM_NS}->{name} = {
269 %AtomChecker,
270
271 ## NOTE: Strictly speaking, structure and semantics for atom:name
272 ## element outside of Person construct is not defined.
273
274 ## NOTE: No constraint.
275 };
276
277 $Element->{$ATOM_NS}->{uri} = {
278 %AtomChecker,
279
280 ## NOTE: Strictly speaking, structure and semantics for atom:uri
281 ## element outside of Person construct is not defined.
282
283 ## NOTE: Elements are not explicitly disallowed.
284
285 check_start => sub {
286 my ($self, $item, $element_state) = @_;
287 $element_state->{value} = '';
288 },
289 check_child_text => sub {
290 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
291 $element_state->{value} .= $child_node->data;
292 },
293 check_end => sub {
294 my ($self, $item, $element_state) = @_;
295
296 ## NOTE: There MUST NOT be any white space.
297 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
298 $self->{onerror}->(@_, node => $item->{node});
299 }, $self->{level});
300
301 $AtomChecker{check_end}->(@_);
302 },
303 };
304
305 $Element->{$ATOM_NS}->{email} = {
306 %AtomChecker,
307
308 ## NOTE: Strictly speaking, structure and semantics for atom:email
309 ## element outside of Person construct is not defined.
310
311 ## NOTE: Elements are not explicitly disallowed.
312
313 check_end => sub {
314 my ($self, $item, $element_state) = @_;
315
316 ## TODO: addr-spec
317 $self->{onerror}->(node => $item->{node},
318 type => 'addr-spec not supported',
319 level => $self->{level}->{uncertain});
320
321 $AtomChecker{check_end}->(@_);
322 },
323 };
324
325 ## MUST NOT be any white space
326 my %AtomDateConstruct = (
327 %AtomChecker,
328
329 ## NOTE: It does not explicitly say that there MUST NOT be any element.
330
331 check_start => sub {
332 my ($self, $item, $element_state) = @_;
333 $element_state->{value} = '';
334 },
335 check_child_text => sub {
336 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
337 $element_state->{value} .= $child_node->data;
338 },
339 check_end => sub {
340 my ($self, $item, $element_state) = @_;
341
342 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
343 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
344 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
345 = ($1, $2, $3, $4, $5, $6, $7, $8);
346 my $node = $item->{node};
347
348 ## Check additional constraints described or referenced in
349 ## comments of ABNF rules for |date-time|.
350 if (0 < $M and $M < 13) {
351 $self->{onerror}->(node => $node, type => 'datetime:bad day',
352 level => $self->{level}->{must})
353 if $d < 1 or
354 $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
355 $self->{onerror}->(node => $node, type => 'datetime:bad day',
356 level => $self->{level}->{must})
357 if $M == 2 and $d == 29 and
358 not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
359 } else {
360 $self->{onerror}->(node => $node, type => 'datetime:bad month',
361 level => $self->{level}->{must});
362 }
363 $self->{onerror}->(node => $node, type => 'datetime:bad hour',
364 level => $self->{level}->{must}) if $h > 23;
365 $self->{onerror}->(node => $node, type => 'datetime:bad minute',
366 level => $self->{level}->{must}) if $m > 59;
367 $self->{onerror}->(node => $node, type => 'datetime:bad second',
368 level => $self->{level}->{must})
369 if $s > 60; ## NOTE: Validness of leap seconds are not checked.
370 $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
371 level => $self->{level}->{must}) if $zh > 23;
372 $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
373 level => $self->{level}->{must}) if $zm > 59;
374 } else {
375 $self->{onerror}->(node => $item->{node},
376 type => 'datetime:syntax error',
377 level => $self->{level}->{must});
378 }
379 ## NOTE: SHOULD be accurate as possible (cannot be checked)
380
381 $AtomChecker{check_end}->(@_);
382 },
383 ); # %AtomDateConstruct
384
385 $Element->{$ATOM_NS}->{entry} = {
386 %AtomChecker,
387 is_root => 1,
388 check_child_element => sub {
389 my ($self, $item, $child_el, $child_nsuri, $child_ln,
390 $child_is_transparent, $element_state) = @_;
391
392 ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
393
394 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
395 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
396 $self->{onerror}->(node => $child_el,
397 type => 'element not allowed:minus',
398 level => $self->{level}->{must});
399 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
400 #
401 } elsif ($child_nsuri eq $ATOM_NS) {
402 my $not_allowed;
403 if ({ # MUST (0, 1)
404 content => 1,
405 id => 1,
406 published => 1,
407 rights => 1,
408 source => 1,
409 summary => 1,
410 title => 1,
411 updated => 1,
412 }->{$child_ln}) {
413 unless ($element_state->{has_element}->{$child_ln}) {
414 $element_state->{has_element}->{$child_ln} = 1;
415 $not_allowed = $element_state->{has_element}->{entry};
416 } else {
417 $not_allowed = 1;
418 }
419 } elsif ($child_ln eq 'link') { # MAY
420 if ($child_el->rel eq $LINK_REL . 'alternate') {
421 my $type = $child_el->get_attribute_ns (undef, 'type');
422 $type = '' unless defined $type;
423 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
424 $hreflang = '' unless defined $hreflang;
425 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
426 (defined $hreflang ? ':'.$hreflang : '');
427 unless ($element_state->{has_element}->{$key}) {
428 $element_state->{has_element}->{$key} = 1;
429 $element_state->{has_element}->{'link.alternate'} = 1;
430 } else {
431 $not_allowed = 1;
432 }
433 }
434
435 ## NOTE: MAY
436 $not_allowed ||= $element_state->{has_element}->{entry};
437 } elsif ({ # MAY
438 category => 1,
439 contributor => 1,
440 }->{$child_ln}) {
441 $not_allowed = $element_state->{has_element}->{entry};
442 } elsif ($child_ln eq 'author') { # MAY
443 $not_allowed = $element_state->{has_element}->{entry};
444 $element_state->{has_author} = 1; # ./author | ./source/author
445 $element_state->{has_element}->{$child_ln} = 1; # ./author
446 } else {
447 $not_allowed = 1;
448 }
449 if ($not_allowed) {
450 $self->{onerror}->(node => $child_el, type => 'element not allowed',
451 level => $self->{level}->{must});
452 }
453 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'in-reply-to') {
454 ## ISSUE: Where |thr:in-reply-to| is allowed is not explicit;y
455 ## defined in RFC 4685.
456 #
457 } elsif ($child_nsuri eq $THR_NS and $child_ln eq 'total') {
458 #
459 } else {
460 ## TODO: extension element
461 $self->{onerror}->(node => $child_el, type => 'element not allowed',
462 level => $self->{level}->{must});
463 }
464 },
465 check_child_text => sub {
466 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
467 if ($has_significant) {
468 $self->{onerror}->(node => $child_node, type => 'character not allowed',
469 level => $self->{level}->{must});
470 }
471 },
472 check_end => sub {
473 my ($self, $item, $element_state) = @_;
474
475 if ($element_state->{has_author}) {
476 ## NOTE: There is either a child atom:author element
477 ## or a child atom:source element which contains an atom:author
478 ## child element.
479 #
480 } else {
481 A: {
482 my $root = $item->{node}->owner_document->document_element;
483 if ($root and $root->manakai_local_name eq 'feed') {
484 my $nsuri = $root->namespace_uri;
485 if (defined $nsuri and $nsuri eq $ATOM_NS) {
486 ## NOTE: An Atom Feed Document.
487 for my $root_child (@{$root->child_nodes}) {
488 ## NOTE: Entity references are not supported.
489 next unless $root_child->node_type == 1; # ELEMENT_NODE
490 next unless $root_child->manakai_local_name eq 'author';
491 my $root_child_nsuri = $root_child->namespace_uri;
492 next unless defined $root_child_nsuri;
493 next unless $root_child_nsuri eq $ATOM_NS;
494 last A;
495 }
496 }
497 }
498
499 $self->{onerror}->(node => $item->{node},
500 type => 'child element missing:atom',
501 text => 'author',
502 level => $self->{level}->{must});
503 } # A
504 }
505
506 unless ($element_state->{has_element}->{author}) {
507 $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
508 }
509
510 ## TODO: If entry's with same id, then updated SHOULD be different
511
512 unless ($element_state->{has_element}->{id}) { # MUST
513 $self->{onerror}->(node => $item->{node},
514 type => 'child element missing:atom',
515 text => 'id',
516 level => $self->{level}->{must});
517 }
518 unless ($element_state->{has_element}->{title}) { # MUST
519 $self->{onerror}->(node => $item->{node},
520 type => 'child element missing:atom',
521 text => 'title',
522 level => $self->{level}->{must});
523 }
524 unless ($element_state->{has_element}->{updated}) { # MUST
525 $self->{onerror}->(node => $item->{node},
526 type => 'child element missing:atom',
527 text => 'updated',
528 level => $self->{level}->{must});
529 }
530 if (not $element_state->{has_element}->{content} and
531 not $element_state->{has_element}->{'link.alternate'}) {
532 $self->{onerror}->(node => $item->{node},
533 type => 'child element missing:atom:link:alternate',
534 level => $self->{level}->{must});
535 }
536
537 if ($element_state->{require_summary} and
538 not $element_state->{has_element}->{summary}) {
539 $self->{onerror}->(node => $item->{node},
540 type => 'child element missing:atom',
541 text => 'summary',
542 level => $self->{level}->{must});
543 }
544 },
545 };
546
547 $Element->{$ATOM_NS}->{feed} = {
548 %AtomChecker,
549 is_root => 1,
550 check_child_element => sub {
551 my ($self, $item, $child_el, $child_nsuri, $child_ln,
552 $child_is_transparent, $element_state) = @_;
553
554 ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
555
556 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
557 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
558 $self->{onerror}->(node => $child_el,
559 type => 'element not allowed:minus',
560 level => $self->{level}->{must});
561 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
562 #
563 } elsif ($child_nsuri eq $ATOM_NS) {
564 my $not_allowed;
565 if ($child_ln eq 'entry') {
566 $element_state->{has_element}->{entry} = 1;
567 } elsif ({ # MUST (0, 1)
568 generator => 1,
569 icon => 1,
570 id => 1,
571 logo => 1,
572 rights => 1,
573 subtitle => 1,
574 title => 1,
575 updated => 1,
576 }->{$child_ln}) {
577 unless ($element_state->{has_element}->{$child_ln}) {
578 $element_state->{has_element}->{$child_ln} = 1;
579 $not_allowed = $element_state->{has_element}->{entry};
580 } else {
581 $not_allowed = 1;
582 }
583 } elsif ($child_ln eq 'link') {
584 my $rel = $child_el->rel;
585 if ($rel eq $LINK_REL . 'alternate') {
586 my $type = $child_el->get_attribute_ns (undef, 'type');
587 $type = '' unless defined $type;
588 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
589 $hreflang = '' unless defined $hreflang;
590 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
591 (defined $hreflang ? ':'.$hreflang : '');
592 unless ($element_state->{has_element}->{$key}) {
593 $element_state->{has_element}->{$key} = 1;
594 } else {
595 $not_allowed = 1;
596 }
597 } elsif ($rel eq $LINK_REL . 'self') {
598 $element_state->{has_element}->{'link.self'} = 1;
599 }
600
601 ## NOTE: MAY
602 $not_allowed = $element_state->{has_element}->{entry};
603 } elsif ({ # MAY
604 category => 1,
605 contributor => 1,
606 }->{$child_ln}) {
607 $not_allowed = $element_state->{has_element}->{entry};
608 } elsif ($child_ln eq 'author') { # MAY
609 $not_allowed = $element_state->{has_element}->{entry};
610 $element_state->{has_element}->{author} = 1;
611 } else {
612 $not_allowed = 1;
613 }
614 $self->{onerror}->(node => $child_el, type => 'element not allowed',
615 level => $self->{level}->{must})
616 if $not_allowed;
617 } else {
618 ## TODO: extension element
619 $self->{onerror}->(node => $child_el, type => 'element not allowed',
620 level => $self->{level}->{must});
621 }
622 },
623 check_child_text => sub {
624 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
625 if ($has_significant) {
626 $self->{onerror}->(node => $child_node, type => 'character not allowed',
627 level => $self->{level}->{must});
628 }
629 },
630 check_end => sub {
631 my ($self, $item, $element_state) = @_;
632
633 if ($element_state->{has_no_author_entry} and
634 not $element_state->{has_element}->{author}) {
635 $self->{onerror}->(node => $item->{node},
636 type => 'child element missing:atom',
637 text => 'author',
638 level => $self->{level}->{must});
639 ## ISSUE: If there is no |atom:entry| element,
640 ## there should be an |atom:author| element?
641 }
642
643 ## TODO: If entry's with same id, then updated SHOULD be different
644
645 unless ($element_state->{has_element}->{id}) { # MUST
646 $self->{onerror}->(node => $item->{node},
647 type => 'child element missing:atom',
648 text => 'id',
649 level => $self->{level}->{must});
650 }
651 unless ($element_state->{has_element}->{title}) { # MUST
652 $self->{onerror}->(node => $item->{node},
653 type => 'child element missing:atom',
654 text => 'title',
655 level => $self->{level}->{must});
656 }
657 unless ($element_state->{has_element}->{updated}) { # MUST
658 $self->{onerror}->(node => $item->{node},
659 type => 'element missing:atom',
660 text => 'updated',
661 level => $self->{level}->{must});
662 }
663 unless ($element_state->{has_element}->{'link.self'}) {
664 $self->{onerror}->(node => $item->{node},
665 type => 'element missing:atom:link:self',
666 level => $self->{level}->{should});
667 }
668
669 $AtomChecker{check_end}->(@_);
670 },
671 };
672
673 $Element->{$ATOM_NS}->{content} = {
674 %AtomChecker,
675 check_start => sub {
676 my ($self, $item, $element_state) = @_;
677 $element_state->{type} = 'text';
678 $element_state->{value} = '';
679 },
680 check_attrs => $GetAtomAttrsChecker->({
681 src => sub {
682 my ($self, $attr, $item, $element_state) = @_;
683
684 $element_state->{has_src} = 1;
685 $item->{parent_state}->{require_summary} = 1;
686
687 ## NOTE: There MUST NOT be any white space.
688 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
689 $self->{onerror}->(@_, node => $item->{node});
690 }, $self->{level});
691 },
692 type => sub {
693 my ($self, $attr, $item, $element_state) = @_;
694
695 $element_state->{has_type} = 1;
696
697 my $value = $attr->value;
698 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
699 # MUST
700 } else {
701 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
702 my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
703 my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
704 my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
705 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
706 my @type = ($1, $2);
707 my $param = $3;
708 while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
709 if (defined $2) {
710 push @type, $1 => $2;
711 } else {
712 my $n = $1;
713 my $v = $2;
714 $v =~ s/\\(.)/$1/gs;
715 push @type, $n => $v;
716 }
717 }
718 require Whatpm::IMTChecker;
719 my $ic = Whatpm::IMTChecker->new;
720 $ic->{level} = $self->{level};
721 $ic->check_imt (sub {
722 $self->{onerror}->(@_, node => $attr);
723 }, @type);
724 } else {
725 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
726 level => $self->{level}->{must});
727 }
728 }
729
730 if ({text => 1, html => 1, xhtml => 1}->{$value}) {
731 #
732 } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
733 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
734 ## Is |application/xml-dtd| an XML media type?
735 $value = 'xml';
736 } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
737 $value = 'mime_text';
738 } elsif ($value =~ m!^(?>message|multipart)/!i) {
739 $self->{onerror}->(node => $attr, type => 'IMT:composite',
740 level => $self->{level}->{must});
741 $item->{parent_state}->{require_summary} = 1;
742 } else {
743 $item->{parent_state}->{require_summary} = 1;
744 }
745
746 $element_state->{type} = $value;
747 },
748 }, {
749 src => FEATURE_RFC4287,
750 type => FEATURE_RFC4287,
751 }),
752 check_child_element => sub {
753 my ($self, $item, $child_el, $child_nsuri, $child_ln,
754 $child_is_transparent, $element_state) = @_;
755
756 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
757 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
758 $self->{onerror}->(node => $child_el,
759 type => 'element not allowed:minus',
760 level => $self->{level}->{must});
761 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
762 #
763 } else {
764 if ($element_state->{type} eq 'text' or
765 $element_state->{type} eq 'html' or
766 $element_state->{type} eq 'mime_text') {
767 # MUST NOT
768 $self->{onerror}->(node => $child_el,
769 type => 'element not allowed:atom|content',
770 level => $self->{level}->{must});
771 } elsif ($element_state->{type} eq 'xhtml') {
772 if ($element_state->{has_div}) {
773 $self->{onerror}->(node => $child_el,
774 type => 'element not allowed:atom|content',
775 level => $self->{level}->{must});
776 } else {
777 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
778 $element_state->{has_div} = 1;
779 }
780 } elsif ($element_state->{type} eq 'xml') {
781 ## MAY contain elements
782 if ($element_state->{has_src}) {
783 $self->{onerror}->(node => $child_el,
784 type => 'element not allowed:atom|content',
785 level => $self->{level}->{must});
786 }
787 } else {
788 ## NOTE: Elements are not explicitly disallowed.
789 }
790 }
791 },
792 ## NOTE: If @src, the element MUST be empty. What is "empty"?
793 ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
794 ## empty replacement tree shuld be empty, since Atom is defined
795 ## in terms of XML Information Set where entities are expanded.
796 ## (but what if |&e;| is an unexpanded entity?)
797 check_child_text => sub {
798 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
799 if ($has_significant) {
800 if ($element_state->{has_src}) {
801 $self->{onerror}->(node => $child_node,
802 type => 'character not allowed:empty',
803 level => $self->{level}->{must});
804 } elsif ($element_state->{type} eq 'xhtml' or
805 $element_state->{type} eq 'xml') {
806 $self->{onerror}->(node => $child_node,
807 type => 'character not allowed:atom|content',
808 level => $self->{level}->{must});
809 }
810 }
811
812 $element_state->{value} .= $child_node->data;
813
814 ## NOTE: type=text/* has no further restriction (i.e. the content don't
815 ## have to conform to the definition of the type).
816 },
817 check_end => sub {
818 my ($self, $item, $element_state) = @_;
819
820 if ($element_state->{has_src}) {
821 if (not $element_state->{has_type}) {
822 $self->{onerror}->(node => $item->{node},
823 type => 'attribute missing',
824 text => 'type',
825 level => $self->{level}->{should});
826 } elsif ($element_state->{type} eq 'text' or
827 $element_state->{type} eq 'html' or
828 $element_state->{type} eq 'xhtml') {
829 $self->{onerror}
830 ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
831 type => 'not IMT', level => $self->{level}->{must});
832 }
833 }
834
835 if ($element_state->{type} eq 'xhtml') {
836 unless ($element_state->{has_div}) {
837 $self->{onerror}->(node => $item->{node},
838 type => 'child element missing',
839 text => 'div',
840 level => $self->{level}->{must});
841 }
842 } elsif ($element_state->{type} eq 'html') {
843 ## TODO: SHOULD be suitable for handling as HTML [HTML4]
844 # markup MUST be escaped
845 $self->{onsubdoc}->({s => $element_state->{value},
846 container_node => $item->{node},
847 media_type => 'text/html',
848 inner_html_element => 'div',
849 is_char_string => 1});
850 } elsif ($element_state->{type} eq 'xml') {
851 ## NOTE: SHOULD be suitable for handling as $value.
852 ## If no @src, this would normally mean it contains a
853 ## single child element that would serve as the root element.
854 $self->{onerror}->(node => $item->{node},
855 type => 'atom|content not supported',
856 text => $item->{node}->get_attribute_ns
857 (undef, 'type'),
858 level => $self->{level}->{uncertain});
859 } elsif ($element_state->{type} eq 'text' or
860 $element_state->{type} eq 'mime-text') {
861 #
862 } else {
863 ## TODO: $s = valid Base64ed [RFC 3548] where
864 ## MAY leading and following "white space" (what?)
865 ## and lines separated by a single U+000A
866
867 ## NOTE: SHOULD be suitable for the indicated media type.
868 $self->{onerror}->(node => $item->{node},
869 type => 'atom|content not supported',
870 text => $item->{node}->get_attribute_ns
871 (undef, 'type'),
872 level => $self->{level}->{uncertain});
873 }
874
875 $AtomChecker{check_end}->(@_);
876 },
877 };
878 ## TODO: Tests for <html:nest/> in <atom:content/>
879
880 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
881
882 $Element->{$ATOM_NS}->{category} = {
883 %AtomChecker,
884 check_attrs => $GetAtomAttrsChecker->({
885 label => sub { 1 }, # no value constraint
886 scheme => sub { # NOTE: No MUST.
887 my ($self, $attr) = @_;
888 ## NOTE: There MUST NOT be any white space.
889 Whatpm::URIChecker->check_iri ($attr->value, sub {
890 $self->{onerror}->(@_, node => $attr);
891 }, $self->{level});
892 },
893 term => sub {
894 my ($self, $attr, $item, $element_state) = @_;
895
896 ## NOTE: No value constraint.
897
898 $element_state->{has_term} = 1;
899 },
900 }, {
901 label => FEATURE_RFC4287,
902 scheme => FEATURE_RFC4287,
903 term => FEATURE_RFC4287,
904 }),
905 check_end => sub {
906 my ($self, $item, $element_state) = @_;
907 unless ($element_state->{has_term}) {
908 $self->{onerror}->(node => $item->{node},
909 type => 'attribute missing',
910 text => 'term',
911 level => $self->{level}->{must});
912 }
913
914 $AtomChecker{check_end}->(@_);
915 },
916 ## NOTE: Meaning of content is not defined.
917 };
918
919 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
920
921 ## TODO: Anything below does not support <html:nest/> yet.
922
923 $Element->{$ATOM_NS}->{generator} = {
924 %AtomChecker,
925 check_attrs => $GetAtomAttrsChecker->({
926 uri => sub { # MUST
927 my ($self, $attr) = @_;
928 ## NOTE: There MUST NOT be any white space.
929 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
930 $self->{onerror}->(@_, node => $attr);
931 }, $self->{level});
932 ## NOTE: Dereferencing SHOULD produce a representation
933 ## that is relevant to the agent.
934 },
935 version => sub { 1 }, # no value constraint
936 }, {
937 uri => FEATURE_RFC4287,
938 version => FEATURE_RFC4287,
939 }),
940
941 ## NOTE: Elements are not explicitly disallowed.
942
943 ## NOTE: Content MUST be a string that is a human-readable name for
944 ## the generating agent.
945 };
946
947 $Element->{$ATOM_NS}->{icon} = {
948 %AtomChecker,
949 check_start => sub {
950 my ($self, $item, $element_state) = @_;
951 $element_state->{value} = '';
952 },
953 ## NOTE: Elements are not explicitly disallowed.
954 check_child_text => sub {
955 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
956 $element_state->{value} .= $child_node->data;
957 },
958 check_end => sub {
959 my ($self, $item, $element_state) = @_;
960
961 ## NOTE: No MUST.
962 ## NOTE: There MUST NOT be any white space.
963 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
964 $self->{onerror}->(@_, node => $item->{node});
965 }, $self->{level});
966
967 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
968
969 $AtomChecker{check_end}->(@_);
970 },
971 };
972
973 $Element->{$ATOM_NS}->{id} = {
974 %AtomChecker,
975 check_start => sub {
976 my ($self, $item, $element_state) = @_;
977 $element_state->{value} = '';
978 },
979 ## NOTE: Elements are not explicitly disallowed.
980 check_child_text => sub {
981 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
982 $element_state->{value} .= $child_node->data;
983 },
984 check_end => sub {
985 my ($self, $item, $element_state) = @_;
986
987 ## NOTE: There MUST NOT be any white space.
988 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
989 $self->{onerror}->(@_, node => $item->{node});
990 }, $self->{level});
991 ## TODO: SHOULD be normalized
992
993 $AtomChecker{check_end}->(@_);
994 },
995 };
996
997 my $AtomIMTAttrChecker = sub {
998 my ($self, $attr) = @_;
999 my $value = $attr->value;
1000 my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1001 my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1002 my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1003 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1004 my @type = ($1, $2);
1005 my $param = $3;
1006 while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1007 if (defined $2) {
1008 push @type, $1 => $2;
1009 } else {
1010 my $n = $1;
1011 my $v = $2;
1012 $v =~ s/\\(.)/$1/gs;
1013 push @type, $n => $v;
1014 }
1015 }
1016 require Whatpm::IMTChecker;
1017 my $ic = Whatpm::IMTChecker->new;
1018 $ic->{level} = $self->{level};
1019 $ic->check_imt (sub {
1020 $self->{onerror}->(@_, node => $attr);
1021 }, @type);
1022 } else {
1023 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
1024 level => $self->{level}->{must});
1025 }
1026 }; # $AtomIMTAttrChecker
1027
1028 my $AtomIRIReferenceAttrChecker = sub {
1029 my ($self, $attr) = @_;
1030 ## NOTE: There MUST NOT be any white space.
1031 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1032 $self->{onerror}->(@_, node => $attr);
1033 }, $self->{level});
1034 }; # $AtomIRIReferenceAttrChecker
1035
1036 $Element->{$ATOM_NS}->{link} = {
1037 %AtomChecker,
1038 check_attrs => $GetAtomAttrsChecker->({
1039 href => $AtomIRIReferenceAttrChecker,
1040 hreflang => $AtomLanguageTagAttrChecker,
1041 length => sub { }, # No MUST; in octets.
1042 rel => sub { # MUST
1043 my ($self, $attr) = @_;
1044 my $value = $attr->value;
1045 if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1046 $value = $LINK_REL . $value;
1047 }
1048
1049 ## NOTE: There MUST NOT be any white space.
1050 Whatpm::URIChecker->check_iri ($value, sub {
1051 $self->{onerror}->(@_, node => $attr);
1052 }, $self->{level});
1053
1054 ## TODO: Warn if unregistered
1055
1056 ## TODO: rel=license [RFC 4946]
1057 ## MUST NOT multiple rel=license with same href="",type="" pairs
1058 ## href="" SHOULD be dereferencable
1059 ## title="" SHOULD be there if multiple rel=license
1060 ## MUST NOT "unspecified" and other rel=license
1061 },
1062 title => sub { }, # No MUST
1063 type => $AtomIMTAttrChecker,
1064 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1065 }, {
1066 href => FEATURE_RFC4287,
1067 hreflang => FEATURE_RFC4287,
1068 length => FEATURE_RFC4287,
1069 rel => FEATURE_RFC4287,
1070 title => FEATURE_RFC4287,
1071 type => FEATURE_RFC4287,
1072
1073 ## TODO: thr:count
1074 ## TODO: thr:updated
1075 }),
1076 check_start => sub {
1077 my ($self, $item, $element_state) = @_;
1078
1079 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1080 $self->{onerror}->(node => $item->{node},
1081 type => 'attribute missing',
1082 text => 'href',
1083 level => $self->{level}->{must});
1084 }
1085
1086 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1087 not $item->{node}->has_attribute_ns (undef, 'length')) {
1088 $self->{onerror}->(node => $item->{node},
1089 type => 'attribute missing',
1090 text => 'length',
1091 level => $self->{level}->{should});
1092 }
1093 },
1094 };
1095
1096 $Element->{$ATOM_NS}->{logo} = {
1097 %AtomChecker,
1098 ## NOTE: Child elements are not explicitly disallowed
1099 check_start => sub {
1100 my ($self, $item, $element_state) = @_;
1101 $element_state->{value} = '';
1102 },
1103 ## NOTE: Elements are not explicitly disallowed.
1104 check_child_text => sub {
1105 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1106 $element_state->{value} .= $child_node->data;
1107 },
1108 check_end => sub {
1109 my ($self, $item, $element_state) = @_;
1110
1111 ## NOTE: There MUST NOT be any white space.
1112 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1113 $self->{onerror}->(@_, node => $item->{node});
1114 }, $self->{level});
1115
1116 ## NOTE: Image SHOULD be 2:1
1117
1118 $AtomChecker{check_end}->(@_);
1119 },
1120 };
1121
1122 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1123
1124 $Element->{$ATOM_NS}->{rights} = \%AtomTextConstruct;
1125 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1126
1127 $Element->{$ATOM_NS}->{source} = {
1128 %AtomChecker,
1129 check_child_element => sub {
1130 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1131 $child_is_transparent, $element_state) = @_;
1132
1133 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1134 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1135 $self->{onerror}->(node => $child_el,
1136 type => 'element not allowed:minus',
1137 level => $self->{level}->{must});
1138 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1139 #
1140 } elsif ($child_nsuri eq $ATOM_NS) {
1141 my $not_allowed;
1142 if ($child_ln eq 'entry') {
1143 $element_state->{has_element}->{entry} = 1;
1144 } elsif ({
1145 generator => 1,
1146 icon => 1,
1147 id => 1,
1148 logo => 1,
1149 rights => 1,
1150 subtitle => 1,
1151 title => 1,
1152 updated => 1,
1153 }->{$child_ln}) {
1154 unless ($element_state->{has_element}->{$child_ln}) {
1155 $element_state->{has_element}->{$child_ln} = 1;
1156 $not_allowed = $element_state->{has_element}->{entry};
1157 } else {
1158 $not_allowed = 1;
1159 }
1160 } elsif ($child_ln eq 'link') {
1161 if ($child_el->rel eq $LINK_REL . 'alternate') {
1162 my $type = $child_el->get_attribute_ns (undef, 'type');
1163 $type = '' unless defined $type;
1164 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
1165 $hreflang = '' unless defined $hreflang;
1166 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1167 (defined $hreflang ? ':'.$hreflang : '');
1168 unless ($element_state->{has_element}->{$key}) {
1169 $element_state->{has_element}->{$key} = 1;
1170 } else {
1171 $not_allowed = 1;
1172 }
1173 }
1174 $not_allowed ||= $element_state->{has_element}->{entry};
1175 } elsif ({
1176 category => 1,
1177 contributor => 1,
1178 }->{$child_ln}) {
1179 $not_allowed = $element_state->{has_element}->{entry};
1180 } elsif ($child_ln eq 'author') {
1181 $not_allowed = $element_state->{has_element}->{entry};
1182 $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1183 } else {
1184 $not_allowed = 1;
1185 }
1186 if ($not_allowed) {
1187 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1188 level => $self->{level}->{must});
1189 }
1190 } else {
1191 ## TODO: extension element
1192 $self->{onerror}->(node => $child_el, type => 'element not allowed',
1193 level => $self->{level}->{must});
1194 }
1195 },
1196 check_child_text => sub {
1197 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1198 if ($has_significant) {
1199 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1200 level => $self->{level}->{must});
1201 }
1202 },
1203 };
1204
1205 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1206
1207 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1208
1209 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1210
1211 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1212
1213 ## TODO: signature element
1214
1215 ## TODO: simple extension element and structured extension element
1216
1217 ## -- Atom Threading 1.0 [RFC 4685]
1218
1219 $Element->{$THR_NS}->{''} = {
1220 %AtomChecker,
1221 status => 0,
1222 };
1223
1224 ## ISSUE: Strictly speaking, thr:* element/attribute,
1225 ## where * is an undefined local name, is not disallowed.
1226
1227 $Element->{$THR_NS}->{'in-reply-to'} = {
1228 %AtomChecker,
1229 status => FEATURE_RFC4685,
1230 check_attrs => $GetAtomAttrsChecker->({
1231 href => $AtomIRIReferenceAttrChecker,
1232 ## TODO: fact-level.
1233 ## TODO: MUST be dereferencable.
1234 ref => sub {
1235 my ($self, $attr, $item, $element_state) = @_;
1236 $element_state->{has_ref} = 1;
1237
1238 ## NOTE: Same as |atom:id|.
1239 ## NOTE: There MUST NOT be any white space.
1240 Whatpm::URIChecker->check_iri ($attr->value, sub {
1241 $self->{onerror}->(@_, node => $attr);
1242 }, $self->{level});
1243
1244 ## TODO: Check against ID guideline...
1245 },
1246 source => $AtomIRIReferenceAttrChecker,
1247 ## TODO: fact-level.
1248 ## TODO: MUST be dereferencable.
1249 type => $AtomIMTAttrChecker,
1250 ## TODO: fact-level.
1251 }, {
1252 href => FEATURE_RFC4685,
1253 source => FEATURE_RFC4685,
1254 ref => FEATURE_RFC4685,
1255 type => FEATURE_RFC4685,
1256 }),
1257 check_end => sub {
1258 my ($self, $item, $element_state) = @_;
1259
1260 unless ($element_state->{has_ref}) {
1261 $self->{onerror}->(node => $item->{node},
1262 type => 'attribute missing',
1263 text => 'ref',
1264 level => $self->{level}->{must});
1265 }
1266
1267 $AtomChecker{check_end}->(@_);
1268 },
1269 ## NOTE: Content model has no constraint.
1270 };
1271
1272 $Element->{$THR_NS}->{total} = {
1273 %AtomChecker,
1274 check_start => sub {
1275 my ($self, $item, $element_state) = @_;
1276 $element_state->{value} = '';
1277 },
1278 check_child_element => sub {
1279 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1280 $child_is_transparent, $element_state) = @_;
1281
1282 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1283 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1284 $self->{onerror}->(node => $child_el,
1285 type => 'element not allowed:minus',
1286 level => $self->{level}->{must});
1287 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1288 #
1289 } else {
1290 $self->{onerror}->(node => $child_el,
1291 type => 'element not allowed',
1292 level => $self->{level}->{must});
1293 }
1294 },
1295 check_child_text => sub {
1296 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1297 $element_state->{value} .= $child_node->data;
1298 },
1299 check_end => sub {
1300 my ($self, $item, $element_state) = @_;
1301
1302 ## NOTE: xsd:nonNegativeInteger
1303 unless ($element_state->{value} =~ /\A(?>[0-9]+|-0+)\z/) {
1304 $self->{onerror}->(node => $item->{node},
1305 type => 'invalid attribute value',
1306 level => $self->{level}->{must});
1307 }
1308
1309 $AtomChecker{check_end}->(@_);
1310 },
1311 };
1312
1313 ## TODO: fh:complete
1314
1315 ## TODO: fh:archive
1316
1317 ## TODO: Check as archive document, page feed document, ...
1318
1319 ## TODO: APP [RFC 5023]
1320
1321 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1322 $Whatpm::ContentChecker::Namespace->{$THR_NS}->{loaded} = 1;
1323
1324 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24