/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations) (download)
Sun Oct 14 09:21:46 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.5: +75 -22 lines
++ whatpm/t/ChangeLog	14 Oct 2007 09:21:32 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat, content-model-2.dat: New test
	data for |rule|, |nest|, and |datatemplate| elements.

++ whatpm/Whatpm/ChangeLog	14 Oct 2007 09:20:23 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (check_document): Support for
	new |is_xml_root| flag.
	(check_element): Support for new |pluses| state.
	(_add_pluses): New method.
	(_remove_minuses): Support for new |minus| item.

++ whatpm/Whatpm/ContentChecker/ChangeLog	14 Oct 2007 09:20:50 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm, HTML.pm: Support for |html:nest|, |html:datatemplate|,
	and |html:rule| elements.

1 package Whatpm::ContentChecker;
2 use strict;
3 require Whatpm::ContentChecker;
4
5 require Whatpm::URIChecker;
6
7 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
9
10 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
11
12 ## NOTE: Commants and PIs are not explicitly allowed.
13
14 our $AttrChecker;
15
16 ## Any element MAY have xml:base, xml:lang
17 my $GetAtomAttrsChecker = sub {
18 my $element_specific_checker = shift;
19 return sub {
20 my ($self, $todo) = @_;
21 for my $attr (@{$todo->{node}->attributes}) {
22 my $attr_ns = $attr->namespace_uri;
23 $attr_ns = '' unless defined $attr_ns;
24 my $attr_ln = $attr->manakai_local_name;
25 my $checker;
26 if ($attr_ns eq '') {
27 $checker = $element_specific_checker->{$attr_ln};
28 } else {
29 $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
30 || $AttrChecker->{$attr_ns}->{''};
31 }
32 if ($checker) {
33 $checker->($self, $attr, $todo);
34 } else {
35 $self->{onerror}->(node => $attr, level => 'unsupported',
36 type => 'attribute');
37 ## ISSUE: No comformance createria for unknown attributes in the spec
38 }
39 }
40 };
41 }; # $GetAtomAttrsChecker
42
43 my $AtomLanguageTagAttrChecker = sub {
44 ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
45
46 my ($self, $attr) = @_;
47 my $value = $attr->value;
48 require Whatpm::LangTag;
49 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
50 my %opt = @_;
51 my $type = 'LangTag:'.$opt{type};
52 $type .= ':' . $opt{subtag} if defined $opt{subtag};
53 $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
54 level => $opt{level});
55 });
56 ## ISSUE: RFC 4646 (3066bis)?
57 }; # $AtomLanguageTagAttrChecker
58
59 my $AtomTextConstruct = {
60 attrs_checker => $GetAtomAttrsChecker->({
61 type => sub { 1 }, # checked in |checker|
62 }),
63 checker => sub {
64 my ($self, $todo) = @_;
65
66 my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
67 my $value = 'text';
68 if ($attr) {
69 $value = $attr->value;
70 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
71 # MUST
72 } else {
73 $self->{onerror}->(node => $attr, type => 'keyword:invalid');
74 }
75 # IMT MUST NOT be used
76 }
77
78 if ($value eq 'text') {
79 my @nodes = (@{$todo->{node}->child_nodes});
80 my $new_todos = [];
81
82 while (@nodes) {
83 my $node = shift @nodes;
84 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
85
86 my $nt = $node->node_type;
87 if ($nt == 1) {
88 # MUST NOT
89 $self->{onerror}->(node => $node, type => 'element not allowed');
90 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
91 unshift @nodes, @$sib;
92 push @$new_todos, @$ch;
93 } elsif ($nt == 5) {
94 unshift @nodes, @{$node->child_nodes};
95 }
96 }
97
98 return ($new_todos);
99 } elsif ($value eq 'html') {
100 my @nodes = (@{$todo->{node}->child_nodes});
101 my $new_todos = [];
102
103 while (@nodes) {
104 my $node = shift @nodes;
105 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
106
107 my $nt = $node->node_type;
108 if ($nt == 1) {
109 # MUST NOT
110 $self->{onerror}->(node => $node, type => 'element not allowed');
111 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
112 unshift @nodes, @$sib;
113 push @$new_todos, @$ch;
114 } elsif ($nt == 5) {
115 unshift @nodes, @{$node->child_nodes};
116 }
117 }
118
119 ## TODO: SHOULD be suitable for handling as HTML [HTML4]
120 # markup MUST be escaped
121 ## TODO: HTML SHOULD be valid as if within <div>
122
123 return ($new_todos);
124 } elsif ($value eq 'xhtml') {
125 my @nodes = (@{$todo->{node}->child_nodes});
126 my $new_todos = [];
127
128 my $has_div;
129 while (@nodes) {
130 my $node = shift @nodes;
131 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
132
133 my $nt = $node->node_type;
134 if ($nt == 1) {
135 # MUST
136 my $nsuri = $node->namespace_uri;
137 if (defined $nsuri and
138 $nsuri eq q<http://www.w3.org/1999/xhtml> and
139 $node->manakai_local_name eq 'div' and
140 not $has_div) {
141 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
142 $has_div = 1;
143 } else {
144 $self->{onerror}->(node => $node, type => 'element not allowed');
145 }
146 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
147 unshift @nodes, @$sib;
148 push @$new_todos, @$ch;
149 } elsif ($nt == 3 or $nt == 4) {
150 ## TODO: Are white spaces allowed?
151 $self->{onerror}->(node => $node, type => 'character not allowed');
152 } elsif ($nt == 5) {
153 unshift @nodes, @{$node->child_nodes};
154 }
155 }
156
157 unless ($has_div) {
158 $self->{onerror}->(node => $todo->{node},
159 type => 'element missing:div');
160 }
161
162 return ($new_todos);
163 }
164
165 },
166 }; # $AtomTextConstruct
167
168 my $AtomPersonConstruct = {
169 attrs_checker => $GetAtomAttrsChecker->({}),
170 checker => sub {
171 my ($self, $todo) = @_;
172
173 my @nodes = (@{$todo->{node}->child_nodes});
174 my $new_todos = [];
175
176 my $has_name;
177 my $has_uri;
178 my $has_email;
179 while (@nodes) {
180 my $node = shift @nodes;
181 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
182
183 my $nt = $node->node_type;
184 if ($nt == 1) {
185 # MUST
186 my $nsuri = $node->namespace_uri;
187 $nsuri = '' unless defined $nsuri;
188 my $not_allowed;
189 if ($nsuri eq $ATOM_NS) {
190 my $ln = $node->manakai_local_name;
191 if ($ln eq 'name') {
192 unless ($has_name) {
193 $has_name = 1;
194 } else {
195 $not_allowed = 1;
196 }
197 } elsif ($ln eq 'uri') {
198 unless ($has_uri) {
199 $has_uri = 1;
200 } else {
201 $not_allowed = 1; # MUST NOT
202 }
203 } elsif ($ln eq 'email') {
204 unless ($has_email) {
205 $has_email = 1;
206 } else {
207 $not_allowed = 1; # MUST NOT
208 }
209 } else {
210 $not_allowed = 1;
211 }
212 } else {
213 ## TODO: extension element
214 $not_allowed = 1;
215 }
216 $self->{onerror}->(node => $node, type => 'element not allowed')
217 if $not_allowed;
218 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
219 unshift @nodes, @$sib;
220 push @$new_todos, @$ch;
221 } elsif ($nt == 3 or $nt == 4) {
222 ## TODO: Are white spaces allowed?
223 $self->{onerror}->(node => $node, type => 'character not allowed');
224 } elsif ($nt == 5) {
225 unshift @nodes, @{$node->child_nodes};
226 }
227 }
228
229 unless ($has_name) { # MUST
230 $self->{onerror}->(node => $todo->{node},
231 type => 'element missing:atom.name');
232 }
233
234 return ($new_todos);
235 },
236 }; # $AtomPersonConstruct
237
238 our $Element;
239
240 $Element->{$ATOM_NS}->{name} = {
241 ## NOTE: Strictly speaking, structure and semantics for atom:name
242 ## element outside of Person construct is not defined.
243 attrs_checker => $GetAtomAttrsChecker->({}),
244 checker => sub {
245 my ($self, $todo) = @_;
246
247 my @nodes = (@{$todo->{node}->child_nodes});
248 my $new_todos = [];
249
250 while (@nodes) {
251 my $node = shift @nodes;
252 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
253
254 my $nt = $node->node_type;
255 if ($nt == 1) {
256 ## NOTE: No constraint.
257 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
258 unshift @nodes, @$sib;
259 push @$new_todos, @$ch;
260 } elsif ($nt == 3 or $nt == 4) {
261 #
262 } elsif ($nt == 5) {
263 unshift @nodes, @{$node->child_nodes};
264 }
265 }
266
267 return ($new_todos);
268 },
269 };
270
271 $Element->{$ATOM_NS}->{uri} = {
272 ## NOTE: Strictly speaking, structure and semantics for atom:uri
273 ## element outside of Person construct is not defined.
274 attrs_checker => $GetAtomAttrsChecker->({}),
275 checker => sub {
276 my ($self, $todo) = @_;
277
278 my @nodes = (@{$todo->{node}->child_nodes});
279 my $new_todos = [];
280
281 my $s = '';
282 while (@nodes) {
283 my $node = shift @nodes;
284 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
285
286 my $nt = $node->node_type;
287 if ($nt == 1) {
288 my $node_ns = $node->namespace_uri;
289 $node_ns = '' unless defined $node_ns;
290 my $node_ln = $node->manakai_local_name;
291 unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
292 ## NOTE: Not explicitly disallowed.
293 $self->{onerror}->(node => $node, type => 'element not allowed');
294 }
295 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
296 unshift @nodes, @$sib;
297 push @$new_todos, @$ch;
298 } elsif ($nt == 3 or $nt == 4) {
299 $s .= $node->data;
300 } elsif ($nt == 5) {
301 unshift @nodes, @{$node->child_nodes};
302 }
303 }
304
305 ## NOTE: There MUST NOT be any white space.
306 Whatpm::URIChecker->check_iri_reference ($s, sub {
307 my %opt = @_;
308 $self->{onerror}->(node => $todo->{node}, level => $opt{level},
309 type => 'URI::'.$opt{type}.
310 (defined $opt{position} ? ':'.$opt{position} : ''));
311 });
312
313 return ($new_todos);
314 },
315 };
316
317 $Element->{$ATOM_NS}->{email} = {
318 ## NOTE: Strictly speaking, structure and semantics for atom:email
319 ## element outside of Person construct is not defined.
320 attrs_checker => $GetAtomAttrsChecker->({}),
321 checker => sub {
322 my ($self, $todo) = @_;
323
324 my @nodes = (@{$todo->{node}->child_nodes});
325 my $new_todos = [];
326
327 my $s = '';
328 while (@nodes) {
329 my $node = shift @nodes;
330 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
331
332 my $nt = $node->node_type;
333 if ($nt == 1) {
334 my $node_ns = $node->namespace_uri;
335 $node_ns = '' unless defined $node_ns;
336 my $node_ln = $node->manakai_local_name;
337 unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
338 ## NOTE: Not explicitly disallowed.
339 $self->{onerror}->(node => $node, type => 'element not allowed');
340 }
341 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
342 unshift @nodes, @$sib;
343 push @$new_todos, @$ch;
344 } elsif ($nt == 3 or $nt == 4) {
345 $s .= $node->data;
346 } elsif ($nt == 5) {
347 unshift @nodes, @{$node->child_nodes};
348 }
349 }
350
351 ## TODO: addr-spec
352 $self->{onerror}->(node => $todo->{node}, type => 'addr-spec',
353 level => 'unsupported');
354
355 return ($new_todos);
356 },
357 };
358
359 ## MUST NOT be any white space
360 my $AtomDateConstruct = {
361 attrs_checker => $GetAtomAttrsChecker->({}),
362 checker => sub {
363 my ($self, $todo) = @_;
364
365 my @nodes = (@{$todo->{node}->child_nodes});
366 my $new_todos = [];
367
368 my $s = '';
369 while (@nodes) {
370 my $node = shift @nodes;
371 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
372
373 my $nt = $node->node_type;
374 if ($nt == 1) {
375 my $node_ns = $node->namespace_uri;
376 $node_ns = '' unless defined $node_ns;
377 my $node_ln = $node->manakai_local_name;
378 unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
379 ## NOTE: It does not explicitly say that there MUST NOT be any element.
380 $self->{onerror}->(node => $node, type => 'element not allowed');
381 }
382 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
383 unshift @nodes, @$sib;
384 push @$new_todos, @$ch;
385 } elsif ($nt == 3 or $nt == 4) {
386 $s .= $node->data;
387 } elsif ($nt == 5) {
388 unshift @nodes, @{$node->child_nodes};
389 }
390 }
391
392 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
393 if ($s =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
394 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
395 = ($1, $2, $3, $4, $5, $6, $7, $8);
396 my $node = $todo->{node};
397
398 ## Check additional constraints described or referenced in
399 ## comments of ABNF rules for |date-time|.
400 my $level = $self->{must_level};
401 if (0 < $M and $M < 13) {
402 $self->{onerror}->(node => $node, type => 'datetime:bad day',
403 level => $level)
404 if $d < 1 or
405 $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
406 $self->{onerror}->(node => $node, type => 'datetime:bad day',
407 level => $level)
408 if $M == 2 and $d == 29 and
409 not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
410 } else {
411 $self->{onerror}->(node => $node, type => 'datetime:bad month',
412 level => $level);
413 }
414 $self->{onerror}->(node => $node, type => 'datetime:bad hour',
415 level => $level) if $h > 23;
416 $self->{onerror}->(node => $node, type => 'datetime:bad minute',
417 level => $level) if $m > 59;
418 $self->{onerror}->(node => $node, type => 'datetime:bad second',
419 level => $level)
420 if $s > 60; ## NOTE: Validness of leap seconds are not checked.
421 $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
422 level => $level) if $zh > 23;
423 $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
424 level => $level) if $zm > 59;
425 } else {
426 $self->{onerror}->(node => $todo->{node},
427 type => 'datetime:syntax error',
428 level => $self->{must_level});
429 }
430 ## NOTE: SHOULD be accurate as possible (cannot be checked)
431
432 return ($new_todos);
433 },
434 }; # $AtomDateConstruct
435
436 $Element->{$ATOM_NS}->{entry} = {
437 is_root => 1,
438 attrs_checker => $GetAtomAttrsChecker->({}),
439 checker => sub {
440 my ($self, $todo) = @_;
441
442 my @nodes = (@{$todo->{node}->child_nodes});
443 my $new_todos = [];
444
445 ## TODO: MUST author+ unless (child::source/child::author)
446 ## or (parent::feed/child::author)
447
448 my $has_element = {};
449 while (@nodes) {
450 my $node = shift @nodes;
451 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
452
453 my $nt = $node->node_type;
454 if ($nt == 1) {
455 # MUST
456 my $nsuri = $node->namespace_uri;
457 $nsuri = '' unless defined $nsuri;
458 my $ln = $node->manakai_local_name;
459 my $not_allowed;
460 if ($self->{pluses}->{$nsuri}->{$ln}) {
461 #
462 } elsif ($nsuri eq $ATOM_NS) {
463 if ({ # MUST (0, 1)
464 content => 1,
465 id => 1,
466 published => 1,
467 rights => 1,
468 source => 1,
469 summary => 1,
470 ## TODO: MUST if child::content/@src | child::content/@type = IMT, !text/ !/xml !+xml
471 title => 1,
472 updated => 1,
473 }->{$ln}) {
474 unless ($has_element->{$ln}) {
475 $has_element->{$ln} = 1;
476 $not_allowed = $has_element->{entry};
477 } else {
478 $not_allowed = 1;
479 }
480 } elsif ($ln eq 'link') { # MAY
481 if ($node->rel eq $LINK_REL . 'alternate') {
482 my $type = $node->get_attribute_ns (undef, 'type');
483 $type = '' unless defined $type;
484 my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
485 $hreflang = '' unless defined $hreflang;
486 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
487 (defined $hreflang ? ':'.$hreflang : '');
488 unless ($has_element->{$key}) {
489 $has_element->{$key} = 1;
490 $has_element->{'link.alternate'} = 1;
491 } else {
492 $not_allowed = 1;
493 }
494 }
495
496 ## NOTE: MAY
497 $not_allowed ||= $has_element->{entry};
498 } elsif ({ # MAY
499 author => 1,
500 category => 1,
501 contributor => 1,
502 }->{$ln}) {
503 $not_allowed = $has_element->{entry};
504 } else {
505 $not_allowed = 1;
506 }
507 } else {
508 ## TODO: extension element
509 $not_allowed = 1;
510 }
511 $self->{onerror}->(node => $node, type => 'element not allowed')
512 if $not_allowed;
513 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
514 unshift @nodes, @$sib;
515 push @$new_todos, @$ch;
516 } elsif ($nt == 3 or $nt == 4) {
517 ## TODO: Are white spaces allowed?
518 $self->{onerror}->(node => $node, type => 'character not allowed');
519 } elsif ($nt == 5) {
520 unshift @nodes, @{$node->child_nodes};
521 }
522 }
523
524 ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
525
526 ## TODO: If entry's with same id, then updated SHOULD be different
527
528 unless ($has_element->{id}) { # MUST
529 $self->{onerror}->(node => $todo->{node},
530 type => 'element missing:atom.id');
531 }
532 unless ($has_element->{title}) { # MUST
533 $self->{onerror}->(node => $todo->{node},
534 type => 'element missing:atom.title');
535 }
536 unless ($has_element->{updated}) { # MUST
537 $self->{onerror}->(node => $todo->{node},
538 type => 'element missing:atom.updated');
539 }
540 if (not $has_element->{content} and
541 not $has_element->{'link.alternate'}) {
542 $self->{onerror}->(node => $todo->{node},
543 type => 'element missing:atom.link.alternate');
544 }
545
546 return ($new_todos);
547 },
548 };
549
550 $Element->{$ATOM_NS}->{feed} = {
551 is_root => 1,
552 attrs_checker => $GetAtomAttrsChecker->({}),
553 checker => sub {
554 my ($self, $todo) = @_;
555
556 my @nodes = (@{$todo->{node}->child_nodes});
557 my $new_todos = [];
558
559 ## TODO: MUST author+ unless all entry child has author+.
560
561 my $has_element = {};
562 while (@nodes) {
563 my $node = shift @nodes;
564 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
565
566 my $nt = $node->node_type;
567 if ($nt == 1) {
568 my $nsuri = $node->namespace_uri;
569 $nsuri = '' unless defined $nsuri;
570 my $ln = $node->manakai_local_name;
571 my $not_allowed;
572 if ($self->{pluses}->{$nsuri}->{$ln}) {
573 #
574 } elsif ($nsuri eq $ATOM_NS) {
575 if ($ln eq 'entry') {
576 $has_element->{entry} = 1;
577 } elsif ({ # MUST (0, 1)
578 generator => 1,
579 icon => 1,
580 id => 1,
581 logo => 1,
582 rights => 1,
583 subtitle => 1,
584 title => 1,
585 updated => 1,
586 }->{$ln}) {
587 unless ($has_element->{$ln}) {
588 $has_element->{$ln} = 1;
589 $not_allowed = $has_element->{entry};
590 } else {
591 $not_allowed = 1;
592 }
593 } elsif ($ln eq 'link') {
594 my $rel = $node->rel;
595 if ($rel eq $LINK_REL . 'alternate') {
596 my $type = $node->get_attribute_ns (undef, 'type');
597 $type = '' unless defined $type;
598 my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
599 $hreflang = '' unless defined $hreflang;
600 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
601 (defined $hreflang ? ':'.$hreflang : '');
602 unless ($has_element->{$key}) {
603 $has_element->{$key} = 1;
604 } else {
605 $not_allowed = 1;
606 }
607 } elsif ($rel eq $LINK_REL . 'self') {
608 $has_element->{'link.self'} = 1;
609 }
610
611 ## NOTE: MAY
612 $not_allowed = $has_element->{entry};
613 } elsif ({ # MAY
614 author => 1,
615 category => 1,
616 contributor => 1,
617 }->{$ln}) {
618 $not_allowed = $has_element->{entry};
619 } else {
620 $not_allowed = 1;
621 }
622 } else {
623 ## TODO: extension element
624 $not_allowed = 1;
625 }
626 $self->{onerror}->(node => $node, type => 'element not allowed')
627 if $not_allowed;
628 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
629 unshift @nodes, @$sib;
630 push @$new_todos, @$ch;
631 } elsif ($nt == 3 or $nt == 4) {
632 ## TODO: Are white spaces allowed?
633 $self->{onerror}->(node => $node, type => 'character not allowed');
634 } elsif ($nt == 5) {
635 unshift @nodes, @{$node->child_nodes};
636 }
637 }
638
639 ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
640
641 ## TODO: If entry's with same id, then updated SHOULD be different
642
643 unless ($has_element->{id}) { # MUST
644 $self->{onerror}->(node => $todo->{node},
645 type => 'element missing:atom.id');
646 }
647 unless ($has_element->{title}) { # MUST
648 $self->{onerror}->(node => $todo->{node},
649 type => 'element missing:atom.title');
650 }
651 unless ($has_element->{updated}) { # MUST
652 $self->{onerror}->(node => $todo->{node},
653 type => 'element missing:atom.updated');
654 }
655 unless ($has_element->{'link.self'}) {
656 $self->{onerror}->(node => $todo->{node}, level => 's',
657 type => 'child element missing:atom.link.self');
658 }
659
660 return ($new_todos);
661 },
662 };
663
664 $Element->{$ATOM_NS}->{content} = {
665 attrs_checker => $GetAtomAttrsChecker->({
666 src => sub { 1 }, # checked in |checker|
667 type => sub { 1 }, # checked in |checker|
668 }),
669 checker => sub {
670 my ($self, $todo) = @_;
671
672 my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
673 my $src_attr = $todo->{node}->get_attribute_node_ns (undef, 'src');
674 my $value;
675 if ($attr) {
676 $value = $attr->value;
677 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
678 # MUST
679 } else {
680 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
681 my $value = $attr->value;
682 my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
683 my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
684 my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
685 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
686 my @type = ($1, $2);
687 my $param = $3;
688 while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
689 if (defined $2) {
690 push @type, $1 => $2;
691 } else {
692 my $n = $1;
693 my $v = $2;
694 $v =~ s/\\(.)/$1/gs;
695 push @type, $n => $v;
696 }
697 }
698 require Whatpm::IMTChecker;
699 Whatpm::IMTChecker->check_imt (sub {
700 my %opt = @_;
701 $self->{onerror}->(node => $attr, level => $opt{level},
702 type => 'IMT:'.$opt{type});
703 }, @type);
704 } else {
705 $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
706 }
707 }
708 } elsif ($src_attr) {
709 $value = '';
710 $self->{onerror}->(node => $todo->{node},
711 type => 'attribute missing:type', level => 's');
712 } else {
713 $value = 'text';
714 }
715
716 ## TODO: This implementation is not optimal.
717
718 if ($src_attr) {
719 ## NOTE: There MUST NOT be any white space.
720 Whatpm::URIChecker->check_iri_reference ($src_attr->value, sub {
721 my %opt = @_;
722 $self->{onerror}->(node => $todo->{node}, level => $opt{level},
723 type => 'URI::'.$opt{type}.
724 (defined $opt{position} ? ':'.$opt{position} : ''));
725 });
726
727 ## NOTE: If @src, the element MUST be empty. What is "empty"?
728 ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
729 ## empty replacement tree shuld be empty, since Atom is defined
730 ## in terms of XML Information Set where entities are expanded.
731 ## (but what if |&e;| is an unexpanded entity?)
732 }
733
734 if ($value eq 'text') {
735 $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
736
737 my @nodes = (@{$todo->{node}->child_nodes});
738 my $new_todos = [];
739
740 while (@nodes) {
741 my $node = shift @nodes;
742 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
743
744 my $nt = $node->node_type;
745 if ($nt == 1) {
746 my $node_ns = $node->namespace_uri;
747 $node_ns = '' unless defined $node_ns;
748 my $node_ln = $node->manakai_local_name;
749 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
750 #
751 } else {
752 # MUST NOT
753 $self->{onerror}->(node => $node, type => 'element not allowed');
754 }
755 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
756 unshift @nodes, @$sib;
757 push @$new_todos, @$ch;
758 } elsif ($nt == 3 or $nt == 4) {
759 $self->{onerror}->(node => $node, type => 'character not allowed')
760 if $src_attr;
761 } elsif ($nt == 5) {
762 unshift @nodes, @{$node->child_nodes};
763 }
764 }
765
766 return ($new_todos);
767 } elsif ($value eq 'html') {
768 $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
769
770 my @nodes = (@{$todo->{node}->child_nodes});
771 my $new_todos = [];
772
773 while (@nodes) {
774 my $node = shift @nodes;
775 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
776
777 my $nt = $node->node_type;
778 if ($nt == 1) {
779 my $node_ns = $node->namespace_uri;
780 $node_ns = '' unless defined $node_ns;
781 my $node_ln = $node->manakai_local_name;
782 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
783 #
784 } else {
785 # MUST NOT
786 $self->{onerror}->(node => $node, type => 'element not allowed');
787 }
788 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
789 unshift @nodes, @$sib;
790 push @$new_todos, @$ch;
791 } elsif ($nt == 3 or $nt == 4) {
792 $self->{onerror}->(node => $node, type => 'character not allowed')
793 if $src_attr;
794 } elsif ($nt == 5) {
795 unshift @nodes, @{$node->child_nodes};
796 }
797 }
798
799 ## TODO: SHOULD be suitable for handling as HTML [HTML4]
800 # markup MUST be escaped
801 ## TODO: HTML SHOULD be valid as if within <div>
802
803 return ($new_todos);
804 } elsif ($value eq 'xhtml') {
805 $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
806
807 my @nodes = (@{$todo->{node}->child_nodes});
808 my $new_todos = [];
809
810 my $has_div;
811 while (@nodes) {
812 my $node = shift @nodes;
813 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
814
815 my $nt = $node->node_type;
816 if ($nt == 1) {
817 # MUST
818 my $nsuri = $node->namespace_uri;
819 $nsuri = '' unless defined $nsuri;
820 my $node_ln = $node->manakai_local_name;
821 if ($self->{pluses}->{$nsuri}->{$node_ln}) {
822 #
823 } elsif ($nsuri eq q<http://www.w3.org/1999/xhtml> and
824 $node_ln eq 'div' and not $has_div) {
825 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
826 $has_div = 1;
827 $self->{onerror}->(node => $node, type => 'element not allowed')
828 if $src_attr;
829 } else {
830 $self->{onerror}->(node => $node, type => 'element not allowed');
831 }
832 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
833 unshift @nodes, @$sib;
834 push @$new_todos, @$ch;
835 } elsif ($nt == 3 or $nt == 4) {
836 ## TODO: Are white spaces allowed?
837 $self->{onerror}->(node => $node, type => 'character not allowed');
838 } elsif ($nt == 5) {
839 unshift @nodes, @{$node->child_nodes};
840 }
841 }
842
843 unless ($has_div) {
844 $self->{onerror}->(node => $todo->{node},
845 type => 'element missing:div');
846 }
847
848 return ($new_todos);
849 } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
850 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
851 ## Is |application/xml-dtd| an XML media type?
852
853 my @nodes = (@{$todo->{node}->child_nodes});
854 my $new_todos = [];
855
856 while (@nodes) {
857 my $node = shift @nodes;
858 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
859
860 my $nt = $node->node_type;
861 if ($nt == 1) {
862 ## MAY contain elements
863 if ($src_attr) {
864 my $node_ns = $node->namespace_uri;
865 $node_ns = '' unless defined $node_ns;
866 my $node_ln = $node->manakai_local_name;
867 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
868 #
869 } else {
870 $self->{onerror}->(node => $node, type => 'element not allowed');
871 }
872 }
873 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
874 unshift @nodes, @$sib;
875 push @$new_todos, @$ch;
876 } elsif ($nt == 3 or $nt == 4) {
877 ## TODO: Are white spaces allowed?
878 $self->{onerror}->(node => $node, type => 'character not allowed');
879 } elsif ($nt == 5) {
880 unshift @nodes, @{$node->child_nodes};
881 }
882 }
883
884 ## NOTE: SHOULD be suitable for handling as $value.
885 ## If no @src, this would normally mean it contains a
886 ## single child element that would serve as the root element.
887 $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
888 type => 'content:'.$value);
889
890 return ($new_todos);
891 } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
892 my @nodes = (@{$todo->{node}->child_nodes});
893 my $new_todos = [];
894
895 while (@nodes) {
896 my $node = shift @nodes;
897 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
898
899 my $nt = $node->node_type;
900 if ($nt == 1) {
901 my $node_ns = $node->namespace_uri;
902 $node_ns = '' unless defined $node_ns;
903 my $node_ln = $node->manakai_local_name;
904 if ($self->{pluses}->{$node_ns}->{$node_ln}) {
905 #
906 } else {
907 # MUST NOT
908 $self->{onerror}->(node => $node, type => 'element not allowed');
909 }
910 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
911 unshift @nodes, @$sib;
912 push @$new_todos, @$ch;
913 } elsif ($nt == 3 or $nt == 4) {
914 $self->{onerror}->(node => $node, type => 'character not allowed')
915 if $src_attr;
916 } elsif ($nt == 5) {
917 unshift @nodes, @{$node->child_nodes};
918 }
919 }
920
921 ## NOTE: No further restriction (such as to conform to the type).
922
923 return ($new_todos);
924 } else {
925 my @nodes = (@{$todo->{node}->child_nodes});
926 my $new_todos = [];
927
928 if ($value =~ m!^(?>message|multipart)/!i) { # MUST NOT
929 $self->{onerror}->(node => $attr, type => 'IMT:composite');
930 }
931
932 my $s = '';
933 while (@nodes) {
934 my $node = shift @nodes;
935 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
936
937 my $nt = $node->node_type;
938 if ($nt == 1) {
939 ## not explicitly disallowed
940 $self->{onerror}->(node => $node, type => 'element not allowed');
941 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
942 unshift @nodes, @$sib;
943 push @$new_todos, @$ch;
944 } elsif ($nt == 3 or $nt == 4) {
945 $s .= $node->data;
946 $self->{onerror}->(node => $node, type => 'character not allowed')
947 if $src_attr;
948 } elsif ($nt == 5) {
949 unshift @nodes, @{$node->child_nodes};
950 }
951 }
952
953 ## TODO: $s = valid Base64ed [RFC 3548] where
954 ## MAY leading and following "white space" (what?)
955 ## and lines separated by a single U+000A
956
957 ## NOTE: SHOULD be suitable for the indicated media type.
958 $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
959 type => 'content:'.$value);
960
961 return ($new_todos);
962 }
963 },
964 };
965 ## TODO: Tests for <html:nest/> in <atom:content/>
966
967 $Element->{$ATOM_NS}->{author} = $AtomPersonConstruct;
968
969 $Element->{$ATOM_NS}->{category} = {
970 attrs_checker => $GetAtomAttrsChecker->({
971 label => sub { 1 }, # no value constraint
972 scheme => sub { # NOTE: No MUST.
973 my ($self, $attr) = @_;
974 ## NOTE: There MUST NOT be any white space.
975 Whatpm::URIChecker->check_iri ($attr->value, sub {
976 my %opt = @_;
977 $self->{onerror}->(node => $attr, level => $opt{level},
978 type => 'URI::'.$opt{type}.
979 (defined $opt{position} ? ':'.$opt{position} : ''));
980 });
981 },
982 term => sub { 1 }, # no value constraint
983 }),
984 checker => sub {
985 my ($self, $todo) = @_;
986
987 unless ($todo->{node}->has_attribute_ns (undef, 'term')) {
988 $self->{onerror}->(node => $todo->{node},
989 type => 'attribute missing:term');
990 }
991
992 my @nodes = (@{$todo->{node}->child_nodes});
993 my $new_todos = [];
994
995 while (@nodes) {
996 my $node = shift @nodes;
997 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
998
999 my $nt = $node->node_type;
1000 if ($nt == 1) {
1001 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1002 unshift @nodes, @$sib;
1003 push @$new_todos, @$ch;
1004 } elsif ($nt == 3 or $nt == 4) {
1005 #
1006 } elsif ($nt == 5) {
1007 unshift @nodes, @{$node->child_nodes};
1008 }
1009 }
1010
1011 return ($new_todos);
1012 },
1013 };
1014
1015 $Element->{$ATOM_NS}->{contributor} = $AtomPersonConstruct;
1016
1017 ## TODO: Anything below does not support <html:nest/> yet.
1018
1019 $Element->{$ATOM_NS}->{generator} = {
1020 attrs_checker => $GetAtomAttrsChecker->({
1021 uri => sub { # MUST
1022 my ($self, $attr) = @_;
1023 ## NOTE: There MUST NOT be any white space.
1024 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1025 my %opt = @_;
1026 $self->{onerror}->(node => $attr, level => $opt{level},
1027 type => 'URI::'.$opt{type}.
1028 (defined $opt{position} ? ':'.$opt{position} : ''));
1029 });
1030 ## NOTE: Dereferencing SHOULD produce a representation
1031 ## that is relevant to the agent.
1032 },
1033 version => sub { 1 }, # no value constraint
1034 }),
1035 checker => sub {
1036 my ($self, $todo) = @_;
1037
1038 my @nodes = (@{$todo->{node}->child_nodes});
1039 my $new_todos = [];
1040
1041 while (@nodes) {
1042 my $node = shift @nodes;
1043 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1044
1045 my $nt = $node->node_type;
1046 if ($nt == 1) {
1047 ## not explicitly disallowed
1048 $self->{onerror}->(node => $node, type => 'element not allowed');
1049 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1050 unshift @nodes, @$sib;
1051 push @$new_todos, @$ch;
1052 } elsif ($nt == 3 or $nt == 4) {
1053 ## MUST be a string that is a human-readable name for
1054 ## the generating agent
1055 } elsif ($nt == 5) {
1056 unshift @nodes, @{$node->child_nodes};
1057 }
1058 }
1059
1060 return ($new_todos);
1061 },
1062 };
1063
1064 $Element->{$ATOM_NS}->{icon} = {
1065 attrs_checker => $GetAtomAttrsChecker->({}),
1066 checker => sub {
1067 my ($self, $todo) = @_;
1068
1069 my @nodes = (@{$todo->{node}->child_nodes});
1070 my $new_todos = [];
1071
1072 my $s = '';
1073 while (@nodes) {
1074 my $node = shift @nodes;
1075 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1076
1077 my $nt = $node->node_type;
1078 if ($nt == 1) {
1079 ## not explicitly disallowed
1080 $self->{onerror}->(node => $node, type => 'element not allowed');
1081 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1082 unshift @nodes, @$sib;
1083 push @$new_todos, @$ch;
1084 } elsif ($nt == 3 or $nt == 4) {
1085 $s .= $node->data;
1086 } elsif ($nt == 5) {
1087 unshift @nodes, @{$node->child_nodes};
1088 }
1089 }
1090
1091 ## NOTE: No MUST.
1092 ## NOTE: There MUST NOT be any white space.
1093 Whatpm::URIChecker->check_iri_reference ($s, sub {
1094 my %opt = @_;
1095 $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1096 type => 'URI::'.$opt{type}.
1097 (defined $opt{position} ? ':'.$opt{position} : ''));
1098 });
1099
1100 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
1101
1102 return ($new_todos);
1103 },
1104 };
1105
1106 $Element->{$ATOM_NS}->{id} = {
1107 attrs_checker => $GetAtomAttrsChecker->({}),
1108 checker => sub {
1109 my ($self, $todo) = @_;
1110
1111 my @nodes = (@{$todo->{node}->child_nodes});
1112 my $new_todos = [];
1113
1114 my $s = '';
1115 while (@nodes) {
1116 my $node = shift @nodes;
1117 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1118
1119 my $nt = $node->node_type;
1120 if ($nt == 1) {
1121 ## not explicitly disallowed
1122 $self->{onerror}->(node => $node, type => 'element not allowed');
1123 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1124 unshift @nodes, @$sib;
1125 push @$new_todos, @$ch;
1126 } elsif ($nt == 3 or $nt == 4) {
1127 $s .= $node->data;
1128 } elsif ($nt == 5) {
1129 unshift @nodes, @{$node->child_nodes};
1130 }
1131 }
1132
1133 ## NOTE: There MUST NOT be any white space.
1134 Whatpm::URIChecker->check_iri ($s, sub { # MUST
1135 my %opt = @_;
1136 $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1137 type => 'URI::'.$opt{type}.
1138 (defined $opt{position} ? ':'.$opt{position} : ''));
1139 });
1140 ## TODO: SHOULD be normalized
1141
1142 return ($new_todos);
1143 },
1144 };
1145
1146 $Element->{$ATOM_NS}->{link} = {
1147 attrs_checker => $GetAtomAttrsChecker->({
1148 href => sub {
1149 my ($self, $attr) = @_;
1150 ## NOTE: There MUST NOT be any white space.
1151 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1152 my %opt = @_;
1153 $self->{onerror}->(node => $attr, level => $opt{level},
1154 type => 'URI::'.$opt{type}.
1155 (defined $opt{position} ? ':'.$opt{position} : ''));
1156 });
1157 },
1158 hreflang => $AtomLanguageTagAttrChecker,
1159 length => sub { }, # No MUST; in octets.
1160 rel => sub { # MUST
1161 my ($self, $attr) = @_;
1162 my $value = $attr->value;
1163 if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1164 $value = $LINK_REL . $value;
1165 }
1166
1167 ## NOTE: There MUST NOT be any white space.
1168 Whatpm::URIChecker->check_iri ($value, sub {
1169 my %opt = @_;
1170 $self->{onerror}->(node => $attr, level => $opt{level},
1171 type => 'URI::'.$opt{type}.
1172 (defined $opt{position} ? ':'.$opt{position} : ''));
1173 });
1174
1175 ## TODO: Warn if unregistered
1176 },
1177 title => sub { }, # No MUST
1178 type => sub {
1179 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1180 my ($self, $attr) = @_;
1181 my $value = $attr->value;
1182 my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1183 my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1184 my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1185 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1186 my @type = ($1, $2);
1187 my $param = $3;
1188 while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1189 if (defined $2) {
1190 push @type, $1 => $2;
1191 } else {
1192 my $n = $1;
1193 my $v = $2;
1194 $v =~ s/\\(.)/$1/gs;
1195 push @type, $n => $v;
1196 }
1197 }
1198 require Whatpm::IMTChecker;
1199 Whatpm::IMTChecker->check_imt (sub {
1200 my %opt = @_;
1201 $self->{onerror}->(node => $attr, level => $opt{level},
1202 type => 'IMT:'.$opt{type});
1203 }, @type);
1204 } else {
1205 $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1206 }
1207 },
1208 }),
1209 checker => sub {
1210 my ($self, $todo) = @_;
1211
1212 unless ($todo->{node}->has_attribute_ns (undef, 'href')) { # MUST
1213 $self->{onerror}->(node => $todo->{node},
1214 type => 'attribute missing:href');
1215 }
1216
1217 if ($todo->{node}->rel eq $LINK_REL . 'enclosure' and
1218 not $todo->{node}->has_attribute_ns (undef, 'length')) {
1219 $self->{onerror}->(node => $todo->{node}, level => 's',
1220 type => 'attribute missing:length');
1221 }
1222
1223 my @nodes = (@{$todo->{node}->child_nodes});
1224 my $new_todos = [];
1225
1226 while (@nodes) {
1227 my $node = shift @nodes;
1228 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1229
1230 my $nt = $node->node_type;
1231 if ($nt == 1) {
1232 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1233 unshift @nodes, @$sib;
1234 push @$new_todos, @$ch;
1235 } elsif ($nt == 3 or $nt == 4) {
1236 #
1237 } elsif ($nt == 5) {
1238 unshift @nodes, @{$node->child_nodes};
1239 }
1240 }
1241
1242 return ($new_todos);
1243 },
1244 };
1245
1246 $Element->{$ATOM_NS}->{logo} = {
1247 attrs_checker => $GetAtomAttrsChecker->({}),
1248 checker => sub {
1249 my ($self, $todo) = @_;
1250
1251 my @nodes = (@{$todo->{node}->child_nodes});
1252 my $new_todos = [];
1253
1254 my $s = '';
1255 while (@nodes) {
1256 my $node = shift @nodes;
1257 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1258
1259 my $nt = $node->node_type;
1260 if ($nt == 1) {
1261 ## not explicitly disallowed
1262 $self->{onerror}->(node => $node, type => 'element not allowed');
1263 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1264 unshift @nodes, @$sib;
1265 push @$new_todos, @$ch;
1266 } elsif ($nt == 3 or $nt == 4) {
1267 $s .= $node->data;
1268 } elsif ($nt == 5) {
1269 unshift @nodes, @{$node->child_nodes};
1270 }
1271 }
1272
1273 ## NOTE: There MUST NOT be any white space.
1274 Whatpm::URIChecker->check_iri_reference ($s, sub {
1275 my %opt = @_;
1276 $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1277 type => 'URI::'.$opt{type}.
1278 (defined $opt{position} ? ':'.$opt{position} : ''));
1279 });
1280
1281 ## NOTE: Image SHOULD be 2:1
1282
1283 return ($new_todos);
1284 },
1285 };
1286
1287 $Element->{$ATOM_NS}->{published} = $AtomDateConstruct;
1288
1289 $Element->{$ATOM_NS}->{rights} = $AtomDateConstruct;
1290 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1291
1292 $Element->{$ATOM_NS}->{source} = {
1293 attrs_checker => $GetAtomAttrsChecker->({}),
1294 checker => sub {
1295 my ($self, $todo) = @_;
1296
1297 my @nodes = (@{$todo->{node}->child_nodes});
1298 my $new_todos = [];
1299 my $has_element = {};
1300 while (@nodes) {
1301 my $node = shift @nodes;
1302 $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1303
1304 my $nt = $node->node_type;
1305 if ($nt == 1) {
1306 my $nsuri = $node->namespace_uri;
1307 $nsuri = '' unless defined $nsuri;
1308 my $not_allowed;
1309 if ($nsuri eq $ATOM_NS) {
1310 my $ln = $node->manakai_local_name;
1311 if ($ln eq 'entry') {
1312 $has_element->{entry} = 1;
1313 } elsif ({
1314 generator => 1,
1315 icon => 1,
1316 id => 1,
1317 logo => 1,
1318 rights => 1,
1319 subtitle => 1,
1320 title => 1,
1321 updated => 1,
1322 }->{$ln}) {
1323 unless ($has_element->{$ln}) {
1324 $has_element->{$ln} = 1;
1325 $not_allowed = $has_element->{entry};
1326 } else {
1327 $not_allowed = 1;
1328 }
1329 } elsif ($ln eq 'link') {
1330 if ($node->rel eq $LINK_REL . 'alternate') {
1331 my $type = $node->get_attribute_ns (undef, 'type');
1332 $type = '' unless defined $type;
1333 my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
1334 $hreflang = '' unless defined $hreflang;
1335 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1336 (defined $hreflang ? ':'.$hreflang : '');
1337 unless ($has_element->{$key}) {
1338 $has_element->{$key} = 1;
1339 } else {
1340 $not_allowed = 1;
1341 }
1342 }
1343 $not_allowed ||= $has_element->{entry};
1344 } elsif ({
1345 author => 1,
1346 category => 1,
1347 contributor => 1,
1348 }->{$ln}) {
1349 $not_allowed = $has_element->{entry};
1350 } else {
1351 $not_allowed = 1;
1352 }
1353 } else {
1354 ## TODO: extension element
1355 $not_allowed = 1;
1356 }
1357 $self->{onerror}->(node => $node, type => 'element not allowed')
1358 if $not_allowed;
1359 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1360 unshift @nodes, @$sib;
1361 push @$new_todos, @$ch;
1362 } elsif ($nt == 3 or $nt == 4) {
1363 ## TODO: Are white spaces allowed?
1364 $self->{onerror}->(node => $node, type => 'character not allowed');
1365 } elsif ($nt == 5) {
1366 unshift @nodes, @{$node->child_nodes};
1367 }
1368 }
1369
1370 return ($new_todos);
1371 },
1372 };
1373
1374 $Element->{$ATOM_NS}->{subtitle} = $AtomTextConstruct;
1375
1376 $Element->{$ATOM_NS}->{summary} = $AtomTextConstruct;
1377
1378 $Element->{$ATOM_NS}->{title} = $AtomTextConstruct;
1379
1380 $Element->{$ATOM_NS}->{updated} = $AtomDateConstruct;
1381
1382 ## TODO: signature element
1383
1384 ## TODO: simple extension element and structured extension element
1385
1386 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1387
1388 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24