/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations) (download)
Sun Oct 14 09:21:46 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.5: +75 -22 lines
++ whatpm/t/ChangeLog	14 Oct 2007 09:21:32 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat, content-model-2.dat: New test
	data for |rule|, |nest|, and |datatemplate| elements.

++ whatpm/Whatpm/ChangeLog	14 Oct 2007 09:20:23 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm (check_document): Support for
	new |is_xml_root| flag.
	(check_element): Support for new |pluses| state.
	(_add_pluses): New method.
	(_remove_minuses): Support for new |minus| item.

++ whatpm/Whatpm/ContentChecker/ChangeLog	14 Oct 2007 09:20:50 -0000
2007-10-14  Wakaba  <wakaba@suika.fam.cx>

	* Atom.pm, HTML.pm: Support for |html:nest|, |html:datatemplate|,
	and |html:rule| elements.

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
9 wakaba 1.1
10     ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
11    
12     ## NOTE: Commants and PIs are not explicitly allowed.
13    
14     our $AttrChecker;
15    
16     ## Any element MAY have xml:base, xml:lang
17     my $GetAtomAttrsChecker = sub {
18     my $element_specific_checker = shift;
19     return sub {
20     my ($self, $todo) = @_;
21     for my $attr (@{$todo->{node}->attributes}) {
22     my $attr_ns = $attr->namespace_uri;
23     $attr_ns = '' unless defined $attr_ns;
24     my $attr_ln = $attr->manakai_local_name;
25     my $checker;
26     if ($attr_ns eq '') {
27     $checker = $element_specific_checker->{$attr_ln};
28     } else {
29     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
30     || $AttrChecker->{$attr_ns}->{''};
31     }
32     if ($checker) {
33     $checker->($self, $attr, $todo);
34     } else {
35     $self->{onerror}->(node => $attr, level => 'unsupported',
36     type => 'attribute');
37     ## ISSUE: No comformance createria for unknown attributes in the spec
38     }
39     }
40     };
41     }; # $GetAtomAttrsChecker
42    
43 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
44     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
45    
46     my ($self, $attr) = @_;
47     my $value = $attr->value;
48     require Whatpm::LangTag;
49     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
50     my %opt = @_;
51     my $type = 'LangTag:'.$opt{type};
52     $type .= ':' . $opt{subtag} if defined $opt{subtag};
53     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
54     level => $opt{level});
55     });
56     ## ISSUE: RFC 4646 (3066bis)?
57     }; # $AtomLanguageTagAttrChecker
58    
59 wakaba 1.1 my $AtomTextConstruct = {
60     attrs_checker => $GetAtomAttrsChecker->({
61     type => sub { 1 }, # checked in |checker|
62     }),
63     checker => sub {
64     my ($self, $todo) = @_;
65    
66     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
67     my $value = 'text';
68     if ($attr) {
69     $value = $attr->value;
70     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
71     # MUST
72     } else {
73     $self->{onerror}->(node => $attr, type => 'keyword:invalid');
74     }
75     # IMT MUST NOT be used
76     }
77    
78     if ($value eq 'text') {
79     my @nodes = (@{$todo->{node}->child_nodes});
80     my $new_todos = [];
81    
82     while (@nodes) {
83     my $node = shift @nodes;
84     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
85    
86     my $nt = $node->node_type;
87     if ($nt == 1) {
88     # MUST NOT
89     $self->{onerror}->(node => $node, type => 'element not allowed');
90     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
91     unshift @nodes, @$sib;
92     push @$new_todos, @$ch;
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97    
98     return ($new_todos);
99     } elsif ($value eq 'html') {
100     my @nodes = (@{$todo->{node}->child_nodes});
101     my $new_todos = [];
102    
103     while (@nodes) {
104     my $node = shift @nodes;
105     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
106    
107     my $nt = $node->node_type;
108     if ($nt == 1) {
109     # MUST NOT
110     $self->{onerror}->(node => $node, type => 'element not allowed');
111     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
112     unshift @nodes, @$sib;
113     push @$new_todos, @$ch;
114     } elsif ($nt == 5) {
115     unshift @nodes, @{$node->child_nodes};
116     }
117     }
118    
119     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
120     # markup MUST be escaped
121     ## TODO: HTML SHOULD be valid as if within <div>
122    
123     return ($new_todos);
124     } elsif ($value eq 'xhtml') {
125     my @nodes = (@{$todo->{node}->child_nodes});
126     my $new_todos = [];
127    
128     my $has_div;
129     while (@nodes) {
130     my $node = shift @nodes;
131     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
132    
133     my $nt = $node->node_type;
134     if ($nt == 1) {
135     # MUST
136     my $nsuri = $node->namespace_uri;
137     if (defined $nsuri and
138     $nsuri eq q<http://www.w3.org/1999/xhtml> and
139     $node->manakai_local_name eq 'div' and
140     not $has_div) {
141     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
142     $has_div = 1;
143     } else {
144     $self->{onerror}->(node => $node, type => 'element not allowed');
145     }
146     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
147     unshift @nodes, @$sib;
148     push @$new_todos, @$ch;
149     } elsif ($nt == 3 or $nt == 4) {
150     ## TODO: Are white spaces allowed?
151     $self->{onerror}->(node => $node, type => 'character not allowed');
152     } elsif ($nt == 5) {
153     unshift @nodes, @{$node->child_nodes};
154     }
155     }
156    
157     unless ($has_div) {
158     $self->{onerror}->(node => $todo->{node},
159     type => 'element missing:div');
160     }
161    
162     return ($new_todos);
163     }
164    
165     },
166     }; # $AtomTextConstruct
167    
168     my $AtomPersonConstruct = {
169     attrs_checker => $GetAtomAttrsChecker->({}),
170     checker => sub {
171     my ($self, $todo) = @_;
172    
173     my @nodes = (@{$todo->{node}->child_nodes});
174     my $new_todos = [];
175    
176     my $has_name;
177     my $has_uri;
178     my $has_email;
179     while (@nodes) {
180     my $node = shift @nodes;
181     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
182    
183     my $nt = $node->node_type;
184     if ($nt == 1) {
185     # MUST
186     my $nsuri = $node->namespace_uri;
187     $nsuri = '' unless defined $nsuri;
188     my $not_allowed;
189     if ($nsuri eq $ATOM_NS) {
190     my $ln = $node->manakai_local_name;
191     if ($ln eq 'name') {
192     unless ($has_name) {
193     $has_name = 1;
194     } else {
195     $not_allowed = 1;
196     }
197     } elsif ($ln eq 'uri') {
198     unless ($has_uri) {
199     $has_uri = 1;
200     } else {
201     $not_allowed = 1; # MUST NOT
202     }
203     } elsif ($ln eq 'email') {
204     unless ($has_email) {
205     $has_email = 1;
206     } else {
207     $not_allowed = 1; # MUST NOT
208     }
209     } else {
210     $not_allowed = 1;
211     }
212     } else {
213     ## TODO: extension element
214     $not_allowed = 1;
215     }
216     $self->{onerror}->(node => $node, type => 'element not allowed')
217     if $not_allowed;
218     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
219     unshift @nodes, @$sib;
220     push @$new_todos, @$ch;
221     } elsif ($nt == 3 or $nt == 4) {
222     ## TODO: Are white spaces allowed?
223     $self->{onerror}->(node => $node, type => 'character not allowed');
224     } elsif ($nt == 5) {
225     unshift @nodes, @{$node->child_nodes};
226     }
227     }
228    
229     unless ($has_name) { # MUST
230     $self->{onerror}->(node => $todo->{node},
231     type => 'element missing:atom.name');
232     }
233    
234     return ($new_todos);
235     },
236     }; # $AtomPersonConstruct
237    
238 wakaba 1.2 our $Element;
239    
240     $Element->{$ATOM_NS}->{name} = {
241     ## NOTE: Strictly speaking, structure and semantics for atom:name
242     ## element outside of Person construct is not defined.
243     attrs_checker => $GetAtomAttrsChecker->({}),
244     checker => sub {
245     my ($self, $todo) = @_;
246    
247     my @nodes = (@{$todo->{node}->child_nodes});
248     my $new_todos = [];
249    
250     while (@nodes) {
251     my $node = shift @nodes;
252     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
253    
254     my $nt = $node->node_type;
255     if ($nt == 1) {
256     ## NOTE: No constraint.
257     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
258     unshift @nodes, @$sib;
259     push @$new_todos, @$ch;
260     } elsif ($nt == 3 or $nt == 4) {
261     #
262     } elsif ($nt == 5) {
263     unshift @nodes, @{$node->child_nodes};
264     }
265     }
266    
267     return ($new_todos);
268     },
269     };
270    
271     $Element->{$ATOM_NS}->{uri} = {
272     ## NOTE: Strictly speaking, structure and semantics for atom:uri
273     ## element outside of Person construct is not defined.
274     attrs_checker => $GetAtomAttrsChecker->({}),
275     checker => sub {
276     my ($self, $todo) = @_;
277    
278     my @nodes = (@{$todo->{node}->child_nodes});
279     my $new_todos = [];
280    
281     my $s = '';
282     while (@nodes) {
283     my $node = shift @nodes;
284     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
285    
286     my $nt = $node->node_type;
287     if ($nt == 1) {
288 wakaba 1.6 my $node_ns = $node->namespace_uri;
289     $node_ns = '' unless defined $node_ns;
290     my $node_ln = $node->manakai_local_name;
291     unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
292     ## NOTE: Not explicitly disallowed.
293     $self->{onerror}->(node => $node, type => 'element not allowed');
294     }
295 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
296     unshift @nodes, @$sib;
297     push @$new_todos, @$ch;
298     } elsif ($nt == 3 or $nt == 4) {
299     $s .= $node->data;
300     } elsif ($nt == 5) {
301     unshift @nodes, @{$node->child_nodes};
302     }
303     }
304    
305     ## NOTE: There MUST NOT be any white space.
306     Whatpm::URIChecker->check_iri_reference ($s, sub {
307     my %opt = @_;
308     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
309     type => 'URI::'.$opt{type}.
310     (defined $opt{position} ? ':'.$opt{position} : ''));
311     });
312    
313     return ($new_todos);
314     },
315     };
316    
317     $Element->{$ATOM_NS}->{email} = {
318     ## NOTE: Strictly speaking, structure and semantics for atom:email
319     ## element outside of Person construct is not defined.
320     attrs_checker => $GetAtomAttrsChecker->({}),
321     checker => sub {
322     my ($self, $todo) = @_;
323    
324     my @nodes = (@{$todo->{node}->child_nodes});
325     my $new_todos = [];
326    
327     my $s = '';
328     while (@nodes) {
329     my $node = shift @nodes;
330     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
331    
332     my $nt = $node->node_type;
333     if ($nt == 1) {
334 wakaba 1.6 my $node_ns = $node->namespace_uri;
335     $node_ns = '' unless defined $node_ns;
336     my $node_ln = $node->manakai_local_name;
337     unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
338     ## NOTE: Not explicitly disallowed.
339     $self->{onerror}->(node => $node, type => 'element not allowed');
340     }
341 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
342     unshift @nodes, @$sib;
343     push @$new_todos, @$ch;
344     } elsif ($nt == 3 or $nt == 4) {
345     $s .= $node->data;
346     } elsif ($nt == 5) {
347     unshift @nodes, @{$node->child_nodes};
348     }
349     }
350    
351     ## TODO: addr-spec
352     $self->{onerror}->(node => $todo->{node}, type => 'addr-spec',
353     level => 'unsupported');
354    
355     return ($new_todos);
356     },
357     };
358    
359 wakaba 1.1 ## MUST NOT be any white space
360     my $AtomDateConstruct = {
361     attrs_checker => $GetAtomAttrsChecker->({}),
362     checker => sub {
363     my ($self, $todo) = @_;
364    
365     my @nodes = (@{$todo->{node}->child_nodes});
366     my $new_todos = [];
367    
368     my $s = '';
369     while (@nodes) {
370     my $node = shift @nodes;
371     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
372    
373     my $nt = $node->node_type;
374     if ($nt == 1) {
375 wakaba 1.6 my $node_ns = $node->namespace_uri;
376     $node_ns = '' unless defined $node_ns;
377     my $node_ln = $node->manakai_local_name;
378     unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
379     ## NOTE: It does not explicitly say that there MUST NOT be any element.
380     $self->{onerror}->(node => $node, type => 'element not allowed');
381     }
382 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
383     unshift @nodes, @$sib;
384     push @$new_todos, @$ch;
385     } elsif ($nt == 3 or $nt == 4) {
386     $s .= $node->data;
387     } elsif ($nt == 5) {
388     unshift @nodes, @{$node->child_nodes};
389     }
390     }
391    
392 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
393     if ($s =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
394     my ($y, $M, $d, $h, $m, $s, $zh, $zm)
395     = ($1, $2, $3, $4, $5, $6, $7, $8);
396     my $node = $todo->{node};
397    
398     ## Check additional constraints described or referenced in
399     ## comments of ABNF rules for |date-time|.
400     my $level = $self->{must_level};
401     if (0 < $M and $M < 13) {
402     $self->{onerror}->(node => $node, type => 'datetime:bad day',
403     level => $level)
404     if $d < 1 or
405     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
406     $self->{onerror}->(node => $node, type => 'datetime:bad day',
407     level => $level)
408     if $M == 2 and $d == 29 and
409     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
410     } else {
411     $self->{onerror}->(node => $node, type => 'datetime:bad month',
412     level => $level);
413     }
414     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
415     level => $level) if $h > 23;
416     $self->{onerror}->(node => $node, type => 'datetime:bad minute',
417     level => $level) if $m > 59;
418     $self->{onerror}->(node => $node, type => 'datetime:bad second',
419     level => $level)
420     if $s > 60; ## NOTE: Validness of leap seconds are not checked.
421     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
422     level => $level) if $zh > 23;
423     $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
424     level => $level) if $zm > 59;
425     } else {
426     $self->{onerror}->(node => $todo->{node},
427     type => 'datetime:syntax error',
428     level => $self->{must_level});
429     }
430     ## NOTE: SHOULD be accurate as possible (cannot be checked)
431 wakaba 1.1
432     return ($new_todos);
433     },
434     }; # $AtomDateConstruct
435    
436 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
437 wakaba 1.1 is_root => 1,
438     attrs_checker => $GetAtomAttrsChecker->({}),
439     checker => sub {
440     my ($self, $todo) = @_;
441    
442     my @nodes = (@{$todo->{node}->child_nodes});
443     my $new_todos = [];
444    
445     ## TODO: MUST author+ unless (child::source/child::author)
446     ## or (parent::feed/child::author)
447    
448     my $has_element = {};
449     while (@nodes) {
450     my $node = shift @nodes;
451     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
452    
453     my $nt = $node->node_type;
454     if ($nt == 1) {
455     # MUST
456     my $nsuri = $node->namespace_uri;
457     $nsuri = '' unless defined $nsuri;
458 wakaba 1.6 my $ln = $node->manakai_local_name;
459 wakaba 1.1 my $not_allowed;
460 wakaba 1.6 if ($self->{pluses}->{$nsuri}->{$ln}) {
461     #
462     } elsif ($nsuri eq $ATOM_NS) {
463 wakaba 1.1 if ({ # MUST (0, 1)
464     content => 1,
465     id => 1,
466     published => 1,
467     rights => 1,
468     source => 1,
469     summary => 1,
470     ## TODO: MUST if child::content/@src | child::content/@type = IMT, !text/ !/xml !+xml
471     title => 1,
472     updated => 1,
473     }->{$ln}) {
474     unless ($has_element->{$ln}) {
475     $has_element->{$ln} = 1;
476     $not_allowed = $has_element->{entry};
477     } else {
478     $not_allowed = 1;
479     }
480     } elsif ($ln eq 'link') { # MAY
481 wakaba 1.3 if ($node->rel eq $LINK_REL . 'alternate') {
482     my $type = $node->get_attribute_ns (undef, 'type');
483     $type = '' unless defined $type;
484     my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
485     $hreflang = '' unless defined $hreflang;
486     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
487     (defined $hreflang ? ':'.$hreflang : '');
488     unless ($has_element->{$key}) {
489     $has_element->{$key} = 1;
490     $has_element->{'link.alternate'} = 1;
491     } else {
492     $not_allowed = 1;
493     }
494     }
495    
496 wakaba 1.1 ## NOTE: MAY
497 wakaba 1.3 $not_allowed ||= $has_element->{entry};
498 wakaba 1.1 } elsif ({ # MAY
499     author => 1,
500     category => 1,
501     contributor => 1,
502     }->{$ln}) {
503     $not_allowed = $has_element->{entry};
504     } else {
505     $not_allowed = 1;
506     }
507     } else {
508     ## TODO: extension element
509     $not_allowed = 1;
510     }
511     $self->{onerror}->(node => $node, type => 'element not allowed')
512     if $not_allowed;
513     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
514     unshift @nodes, @$sib;
515     push @$new_todos, @$ch;
516     } elsif ($nt == 3 or $nt == 4) {
517     ## TODO: Are white spaces allowed?
518     $self->{onerror}->(node => $node, type => 'character not allowed');
519     } elsif ($nt == 5) {
520     unshift @nodes, @{$node->child_nodes};
521     }
522     }
523    
524     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
525    
526     ## TODO: If entry's with same id, then updated SHOULD be different
527    
528     unless ($has_element->{id}) { # MUST
529     $self->{onerror}->(node => $todo->{node},
530     type => 'element missing:atom.id');
531     }
532     unless ($has_element->{title}) { # MUST
533     $self->{onerror}->(node => $todo->{node},
534     type => 'element missing:atom.title');
535     }
536     unless ($has_element->{updated}) { # MUST
537     $self->{onerror}->(node => $todo->{node},
538     type => 'element missing:atom.updated');
539     }
540 wakaba 1.3 if (not $has_element->{content} and
541     not $has_element->{'link.alternate'}) {
542     $self->{onerror}->(node => $todo->{node},
543     type => 'element missing:atom.link.alternate');
544     }
545 wakaba 1.1
546     return ($new_todos);
547     },
548     };
549    
550     $Element->{$ATOM_NS}->{feed} = {
551     is_root => 1,
552     attrs_checker => $GetAtomAttrsChecker->({}),
553     checker => sub {
554     my ($self, $todo) = @_;
555    
556     my @nodes = (@{$todo->{node}->child_nodes});
557     my $new_todos = [];
558    
559     ## TODO: MUST author+ unless all entry child has author+.
560    
561     my $has_element = {};
562     while (@nodes) {
563     my $node = shift @nodes;
564     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
565    
566     my $nt = $node->node_type;
567     if ($nt == 1) {
568     my $nsuri = $node->namespace_uri;
569     $nsuri = '' unless defined $nsuri;
570 wakaba 1.6 my $ln = $node->manakai_local_name;
571 wakaba 1.1 my $not_allowed;
572 wakaba 1.6 if ($self->{pluses}->{$nsuri}->{$ln}) {
573     #
574     } elsif ($nsuri eq $ATOM_NS) {
575 wakaba 1.1 if ($ln eq 'entry') {
576     $has_element->{entry} = 1;
577     } elsif ({ # MUST (0, 1)
578     generator => 1,
579     icon => 1,
580     id => 1,
581     logo => 1,
582     rights => 1,
583     subtitle => 1,
584     title => 1,
585     updated => 1,
586     }->{$ln}) {
587     unless ($has_element->{$ln}) {
588     $has_element->{$ln} = 1;
589     $not_allowed = $has_element->{entry};
590     } else {
591     $not_allowed = 1;
592     }
593 wakaba 1.3 } elsif ($ln eq 'link') {
594     my $rel = $node->rel;
595     if ($rel eq $LINK_REL . 'alternate') {
596     my $type = $node->get_attribute_ns (undef, 'type');
597     $type = '' unless defined $type;
598     my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
599     $hreflang = '' unless defined $hreflang;
600     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
601     (defined $hreflang ? ':'.$hreflang : '');
602     unless ($has_element->{$key}) {
603     $has_element->{$key} = 1;
604     } else {
605     $not_allowed = 1;
606     }
607     } elsif ($rel eq $LINK_REL . 'self') {
608     $has_element->{'link.self'} = 1;
609     }
610    
611     ## NOTE: MAY
612 wakaba 1.1 $not_allowed = $has_element->{entry};
613     } elsif ({ # MAY
614     author => 1,
615     category => 1,
616     contributor => 1,
617     }->{$ln}) {
618     $not_allowed = $has_element->{entry};
619     } else {
620     $not_allowed = 1;
621     }
622     } else {
623     ## TODO: extension element
624     $not_allowed = 1;
625     }
626     $self->{onerror}->(node => $node, type => 'element not allowed')
627     if $not_allowed;
628     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
629     unshift @nodes, @$sib;
630     push @$new_todos, @$ch;
631     } elsif ($nt == 3 or $nt == 4) {
632     ## TODO: Are white spaces allowed?
633     $self->{onerror}->(node => $node, type => 'character not allowed');
634     } elsif ($nt == 5) {
635     unshift @nodes, @{$node->child_nodes};
636     }
637     }
638    
639     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
640    
641     ## TODO: If entry's with same id, then updated SHOULD be different
642    
643     unless ($has_element->{id}) { # MUST
644     $self->{onerror}->(node => $todo->{node},
645     type => 'element missing:atom.id');
646     }
647     unless ($has_element->{title}) { # MUST
648     $self->{onerror}->(node => $todo->{node},
649     type => 'element missing:atom.title');
650     }
651     unless ($has_element->{updated}) { # MUST
652     $self->{onerror}->(node => $todo->{node},
653     type => 'element missing:atom.updated');
654     }
655 wakaba 1.3 unless ($has_element->{'link.self'}) {
656     $self->{onerror}->(node => $todo->{node}, level => 's',
657     type => 'child element missing:atom.link.self');
658     }
659 wakaba 1.1
660     return ($new_todos);
661     },
662     };
663    
664     $Element->{$ATOM_NS}->{content} = {
665     attrs_checker => $GetAtomAttrsChecker->({
666     src => sub { 1 }, # checked in |checker|
667     type => sub { 1 }, # checked in |checker|
668     }),
669     checker => sub {
670     my ($self, $todo) = @_;
671    
672     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
673     my $src_attr = $todo->{node}->get_attribute_node_ns (undef, 'src');
674     my $value;
675     if ($attr) {
676     $value = $attr->value;
677     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
678     # MUST
679     } else {
680 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
681     my $value = $attr->value;
682     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
683     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
684     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
685     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
686     my @type = ($1, $2);
687     my $param = $3;
688     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
689     if (defined $2) {
690     push @type, $1 => $2;
691     } else {
692     my $n = $1;
693     my $v = $2;
694     $v =~ s/\\(.)/$1/gs;
695     push @type, $n => $v;
696     }
697     }
698     require Whatpm::IMTChecker;
699     Whatpm::IMTChecker->check_imt (sub {
700     my %opt = @_;
701     $self->{onerror}->(node => $attr, level => $opt{level},
702     type => 'IMT:'.$opt{type});
703     }, @type);
704     } else {
705     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
706     }
707 wakaba 1.1 }
708     } elsif ($src_attr) {
709     $value = '';
710     $self->{onerror}->(node => $todo->{node},
711     type => 'attribute missing:type', level => 's');
712     } else {
713     $value = 'text';
714     }
715    
716     ## TODO: This implementation is not optimal.
717    
718     if ($src_attr) {
719 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
720     Whatpm::URIChecker->check_iri_reference ($src_attr->value, sub {
721     my %opt = @_;
722     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
723     type => 'URI::'.$opt{type}.
724     (defined $opt{position} ? ':'.$opt{position} : ''));
725     });
726 wakaba 1.1
727     ## NOTE: If @src, the element MUST be empty. What is "empty"?
728     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
729     ## empty replacement tree shuld be empty, since Atom is defined
730     ## in terms of XML Information Set where entities are expanded.
731     ## (but what if |&e;| is an unexpanded entity?)
732     }
733    
734     if ($value eq 'text') {
735     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
736    
737     my @nodes = (@{$todo->{node}->child_nodes});
738     my $new_todos = [];
739    
740     while (@nodes) {
741     my $node = shift @nodes;
742     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
743    
744     my $nt = $node->node_type;
745     if ($nt == 1) {
746 wakaba 1.6 my $node_ns = $node->namespace_uri;
747     $node_ns = '' unless defined $node_ns;
748     my $node_ln = $node->manakai_local_name;
749     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
750     #
751     } else {
752     # MUST NOT
753     $self->{onerror}->(node => $node, type => 'element not allowed');
754     }
755 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
756     unshift @nodes, @$sib;
757     push @$new_todos, @$ch;
758     } elsif ($nt == 3 or $nt == 4) {
759     $self->{onerror}->(node => $node, type => 'character not allowed')
760     if $src_attr;
761     } elsif ($nt == 5) {
762     unshift @nodes, @{$node->child_nodes};
763     }
764     }
765    
766     return ($new_todos);
767     } elsif ($value eq 'html') {
768     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
769    
770     my @nodes = (@{$todo->{node}->child_nodes});
771     my $new_todos = [];
772    
773     while (@nodes) {
774     my $node = shift @nodes;
775     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
776    
777     my $nt = $node->node_type;
778     if ($nt == 1) {
779 wakaba 1.6 my $node_ns = $node->namespace_uri;
780     $node_ns = '' unless defined $node_ns;
781     my $node_ln = $node->manakai_local_name;
782     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
783     #
784     } else {
785     # MUST NOT
786     $self->{onerror}->(node => $node, type => 'element not allowed');
787     }
788 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
789     unshift @nodes, @$sib;
790     push @$new_todos, @$ch;
791     } elsif ($nt == 3 or $nt == 4) {
792     $self->{onerror}->(node => $node, type => 'character not allowed')
793     if $src_attr;
794     } elsif ($nt == 5) {
795     unshift @nodes, @{$node->child_nodes};
796     }
797     }
798    
799     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
800     # markup MUST be escaped
801     ## TODO: HTML SHOULD be valid as if within <div>
802    
803     return ($new_todos);
804     } elsif ($value eq 'xhtml') {
805     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
806    
807     my @nodes = (@{$todo->{node}->child_nodes});
808     my $new_todos = [];
809    
810     my $has_div;
811     while (@nodes) {
812     my $node = shift @nodes;
813     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
814    
815     my $nt = $node->node_type;
816     if ($nt == 1) {
817     # MUST
818     my $nsuri = $node->namespace_uri;
819 wakaba 1.6 $nsuri = '' unless defined $nsuri;
820     my $node_ln = $node->manakai_local_name;
821     if ($self->{pluses}->{$nsuri}->{$node_ln}) {
822     #
823     } elsif ($nsuri eq q<http://www.w3.org/1999/xhtml> and
824     $node_ln eq 'div' and not $has_div) {
825 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
826     $has_div = 1;
827     $self->{onerror}->(node => $node, type => 'element not allowed')
828     if $src_attr;
829     } else {
830     $self->{onerror}->(node => $node, type => 'element not allowed');
831     }
832     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
833     unshift @nodes, @$sib;
834     push @$new_todos, @$ch;
835     } elsif ($nt == 3 or $nt == 4) {
836     ## TODO: Are white spaces allowed?
837     $self->{onerror}->(node => $node, type => 'character not allowed');
838     } elsif ($nt == 5) {
839     unshift @nodes, @{$node->child_nodes};
840     }
841     }
842    
843     unless ($has_div) {
844     $self->{onerror}->(node => $todo->{node},
845     type => 'element missing:div');
846     }
847    
848     return ($new_todos);
849     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
850     ## ISSUE: There is no definition for "XML media type" in RFC 3023.
851     ## Is |application/xml-dtd| an XML media type?
852    
853     my @nodes = (@{$todo->{node}->child_nodes});
854     my $new_todos = [];
855    
856     while (@nodes) {
857     my $node = shift @nodes;
858     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
859    
860     my $nt = $node->node_type;
861     if ($nt == 1) {
862     ## MAY contain elements
863 wakaba 1.6 if ($src_attr) {
864     my $node_ns = $node->namespace_uri;
865     $node_ns = '' unless defined $node_ns;
866     my $node_ln = $node->manakai_local_name;
867     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
868     #
869     } else {
870     $self->{onerror}->(node => $node, type => 'element not allowed');
871     }
872     }
873 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
874     unshift @nodes, @$sib;
875     push @$new_todos, @$ch;
876     } elsif ($nt == 3 or $nt == 4) {
877     ## TODO: Are white spaces allowed?
878     $self->{onerror}->(node => $node, type => 'character not allowed');
879     } elsif ($nt == 5) {
880     unshift @nodes, @{$node->child_nodes};
881     }
882     }
883    
884 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
885 wakaba 1.1 ## If no @src, this would normally mean it contains a
886     ## single child element that would serve as the root element.
887 wakaba 1.3 $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
888     type => 'content:'.$value);
889 wakaba 1.1
890     return ($new_todos);
891     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
892     my @nodes = (@{$todo->{node}->child_nodes});
893     my $new_todos = [];
894    
895     while (@nodes) {
896     my $node = shift @nodes;
897     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
898    
899     my $nt = $node->node_type;
900     if ($nt == 1) {
901 wakaba 1.6 my $node_ns = $node->namespace_uri;
902     $node_ns = '' unless defined $node_ns;
903     my $node_ln = $node->manakai_local_name;
904     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
905     #
906     } else {
907     # MUST NOT
908     $self->{onerror}->(node => $node, type => 'element not allowed');
909     }
910 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
911     unshift @nodes, @$sib;
912     push @$new_todos, @$ch;
913     } elsif ($nt == 3 or $nt == 4) {
914     $self->{onerror}->(node => $node, type => 'character not allowed')
915     if $src_attr;
916     } elsif ($nt == 5) {
917     unshift @nodes, @{$node->child_nodes};
918     }
919     }
920    
921     ## NOTE: No further restriction (such as to conform to the type).
922    
923     return ($new_todos);
924     } else {
925     my @nodes = (@{$todo->{node}->child_nodes});
926     my $new_todos = [];
927    
928     if ($value =~ m!^(?>message|multipart)/!i) { # MUST NOT
929     $self->{onerror}->(node => $attr, type => 'IMT:composite');
930     }
931    
932     my $s = '';
933     while (@nodes) {
934     my $node = shift @nodes;
935     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
936    
937     my $nt = $node->node_type;
938     if ($nt == 1) {
939     ## not explicitly disallowed
940     $self->{onerror}->(node => $node, type => 'element not allowed');
941     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
942     unshift @nodes, @$sib;
943     push @$new_todos, @$ch;
944     } elsif ($nt == 3 or $nt == 4) {
945     $s .= $node->data;
946     $self->{onerror}->(node => $node, type => 'character not allowed')
947     if $src_attr;
948     } elsif ($nt == 5) {
949     unshift @nodes, @{$node->child_nodes};
950     }
951     }
952    
953     ## TODO: $s = valid Base64ed [RFC 3548] where
954     ## MAY leading and following "white space" (what?)
955     ## and lines separated by a single U+000A
956 wakaba 1.3
957     ## NOTE: SHOULD be suitable for the indicated media type.
958     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
959     type => 'content:'.$value);
960 wakaba 1.1
961     return ($new_todos);
962     }
963     },
964     };
965 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
966 wakaba 1.1
967     $Element->{$ATOM_NS}->{author} = $AtomPersonConstruct;
968    
969     $Element->{$ATOM_NS}->{category} = {
970     attrs_checker => $GetAtomAttrsChecker->({
971     label => sub { 1 }, # no value constraint
972 wakaba 1.2 scheme => sub { # NOTE: No MUST.
973     my ($self, $attr) = @_;
974     ## NOTE: There MUST NOT be any white space.
975     Whatpm::URIChecker->check_iri ($attr->value, sub {
976     my %opt = @_;
977     $self->{onerror}->(node => $attr, level => $opt{level},
978     type => 'URI::'.$opt{type}.
979     (defined $opt{position} ? ':'.$opt{position} : ''));
980     });
981     },
982 wakaba 1.1 term => sub { 1 }, # no value constraint
983     }),
984     checker => sub {
985     my ($self, $todo) = @_;
986    
987     unless ($todo->{node}->has_attribute_ns (undef, 'term')) {
988     $self->{onerror}->(node => $todo->{node},
989     type => 'attribute missing:term');
990     }
991    
992     my @nodes = (@{$todo->{node}->child_nodes});
993     my $new_todos = [];
994    
995     while (@nodes) {
996     my $node = shift @nodes;
997     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
998    
999     my $nt = $node->node_type;
1000     if ($nt == 1) {
1001     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1002     unshift @nodes, @$sib;
1003     push @$new_todos, @$ch;
1004     } elsif ($nt == 3 or $nt == 4) {
1005     #
1006     } elsif ($nt == 5) {
1007     unshift @nodes, @{$node->child_nodes};
1008     }
1009     }
1010    
1011     return ($new_todos);
1012     },
1013     };
1014    
1015     $Element->{$ATOM_NS}->{contributor} = $AtomPersonConstruct;
1016 wakaba 1.6
1017     ## TODO: Anything below does not support <html:nest/> yet.
1018 wakaba 1.1
1019     $Element->{$ATOM_NS}->{generator} = {
1020     attrs_checker => $GetAtomAttrsChecker->({
1021 wakaba 1.2 uri => sub { # MUST
1022     my ($self, $attr) = @_;
1023     ## NOTE: There MUST NOT be any white space.
1024     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1025     my %opt = @_;
1026     $self->{onerror}->(node => $attr, level => $opt{level},
1027     type => 'URI::'.$opt{type}.
1028     (defined $opt{position} ? ':'.$opt{position} : ''));
1029     });
1030     ## NOTE: Dereferencing SHOULD produce a representation
1031     ## that is relevant to the agent.
1032     },
1033 wakaba 1.1 version => sub { 1 }, # no value constraint
1034     }),
1035     checker => sub {
1036     my ($self, $todo) = @_;
1037    
1038     my @nodes = (@{$todo->{node}->child_nodes});
1039     my $new_todos = [];
1040    
1041     while (@nodes) {
1042     my $node = shift @nodes;
1043     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1044    
1045     my $nt = $node->node_type;
1046     if ($nt == 1) {
1047     ## not explicitly disallowed
1048     $self->{onerror}->(node => $node, type => 'element not allowed');
1049     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1050     unshift @nodes, @$sib;
1051     push @$new_todos, @$ch;
1052     } elsif ($nt == 3 or $nt == 4) {
1053     ## MUST be a string that is a human-readable name for
1054     ## the generating agent
1055     } elsif ($nt == 5) {
1056     unshift @nodes, @{$node->child_nodes};
1057     }
1058     }
1059    
1060     return ($new_todos);
1061     },
1062     };
1063    
1064     $Element->{$ATOM_NS}->{icon} = {
1065     attrs_checker => $GetAtomAttrsChecker->({}),
1066     checker => sub {
1067     my ($self, $todo) = @_;
1068    
1069     my @nodes = (@{$todo->{node}->child_nodes});
1070     my $new_todos = [];
1071    
1072 wakaba 1.2 my $s = '';
1073 wakaba 1.1 while (@nodes) {
1074     my $node = shift @nodes;
1075     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1076    
1077     my $nt = $node->node_type;
1078     if ($nt == 1) {
1079     ## not explicitly disallowed
1080     $self->{onerror}->(node => $node, type => 'element not allowed');
1081     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1082     unshift @nodes, @$sib;
1083     push @$new_todos, @$ch;
1084     } elsif ($nt == 3 or $nt == 4) {
1085 wakaba 1.2 $s .= $node->data;
1086 wakaba 1.1 } elsif ($nt == 5) {
1087     unshift @nodes, @{$node->child_nodes};
1088     }
1089     }
1090    
1091 wakaba 1.2 ## NOTE: No MUST.
1092     ## NOTE: There MUST NOT be any white space.
1093     Whatpm::URIChecker->check_iri_reference ($s, sub {
1094     my %opt = @_;
1095     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1096     type => 'URI::'.$opt{type}.
1097     (defined $opt{position} ? ':'.$opt{position} : ''));
1098     });
1099    
1100 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
1101    
1102     return ($new_todos);
1103     },
1104     };
1105    
1106     $Element->{$ATOM_NS}->{id} = {
1107     attrs_checker => $GetAtomAttrsChecker->({}),
1108     checker => sub {
1109     my ($self, $todo) = @_;
1110    
1111     my @nodes = (@{$todo->{node}->child_nodes});
1112     my $new_todos = [];
1113 wakaba 1.2
1114     my $s = '';
1115     while (@nodes) {
1116     my $node = shift @nodes;
1117     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1118    
1119     my $nt = $node->node_type;
1120     if ($nt == 1) {
1121     ## not explicitly disallowed
1122     $self->{onerror}->(node => $node, type => 'element not allowed');
1123     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1124     unshift @nodes, @$sib;
1125     push @$new_todos, @$ch;
1126     } elsif ($nt == 3 or $nt == 4) {
1127     $s .= $node->data;
1128     } elsif ($nt == 5) {
1129     unshift @nodes, @{$node->child_nodes};
1130     }
1131     }
1132    
1133     ## NOTE: There MUST NOT be any white space.
1134     Whatpm::URIChecker->check_iri ($s, sub { # MUST
1135     my %opt = @_;
1136     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1137     type => 'URI::'.$opt{type}.
1138     (defined $opt{position} ? ':'.$opt{position} : ''));
1139     });
1140     ## TODO: SHOULD be normalized
1141    
1142     return ($new_todos);
1143     },
1144     };
1145    
1146     $Element->{$ATOM_NS}->{link} = {
1147     attrs_checker => $GetAtomAttrsChecker->({
1148     href => sub {
1149     my ($self, $attr) = @_;
1150     ## NOTE: There MUST NOT be any white space.
1151     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1152     my %opt = @_;
1153     $self->{onerror}->(node => $attr, level => $opt{level},
1154     type => 'URI::'.$opt{type}.
1155     (defined $opt{position} ? ':'.$opt{position} : ''));
1156     });
1157     },
1158 wakaba 1.4 hreflang => $AtomLanguageTagAttrChecker,
1159 wakaba 1.2 length => sub { }, # No MUST; in octets.
1160     rel => sub { # MUST
1161     my ($self, $attr) = @_;
1162     my $value = $attr->value;
1163     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1164 wakaba 1.3 $value = $LINK_REL . $value;
1165 wakaba 1.2 }
1166    
1167     ## NOTE: There MUST NOT be any white space.
1168     Whatpm::URIChecker->check_iri ($value, sub {
1169     my %opt = @_;
1170     $self->{onerror}->(node => $attr, level => $opt{level},
1171     type => 'URI::'.$opt{type}.
1172     (defined $opt{position} ? ':'.$opt{position} : ''));
1173     });
1174    
1175     ## TODO: Warn if unregistered
1176     },
1177 wakaba 1.5 title => sub { }, # No MUST
1178 wakaba 1.3 type => sub {
1179     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1180     my ($self, $attr) = @_;
1181     my $value = $attr->value;
1182     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1183     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1184     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1185     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1186     my @type = ($1, $2);
1187     my $param = $3;
1188     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1189     if (defined $2) {
1190     push @type, $1 => $2;
1191     } else {
1192     my $n = $1;
1193     my $v = $2;
1194     $v =~ s/\\(.)/$1/gs;
1195     push @type, $n => $v;
1196     }
1197     }
1198     require Whatpm::IMTChecker;
1199     Whatpm::IMTChecker->check_imt (sub {
1200     my %opt = @_;
1201     $self->{onerror}->(node => $attr, level => $opt{level},
1202     type => 'IMT:'.$opt{type});
1203     }, @type);
1204     } else {
1205     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1206     }
1207     },
1208 wakaba 1.2 }),
1209     checker => sub {
1210     my ($self, $todo) = @_;
1211    
1212     unless ($todo->{node}->has_attribute_ns (undef, 'href')) { # MUST
1213     $self->{onerror}->(node => $todo->{node},
1214     type => 'attribute missing:href');
1215     }
1216    
1217 wakaba 1.3 if ($todo->{node}->rel eq $LINK_REL . 'enclosure' and
1218 wakaba 1.2 not $todo->{node}->has_attribute_ns (undef, 'length')) {
1219     $self->{onerror}->(node => $todo->{node}, level => 's',
1220     type => 'attribute missing:length');
1221     }
1222    
1223     my @nodes = (@{$todo->{node}->child_nodes});
1224     my $new_todos = [];
1225 wakaba 1.1
1226     while (@nodes) {
1227     my $node = shift @nodes;
1228     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1229    
1230     my $nt = $node->node_type;
1231     if ($nt == 1) {
1232 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1233     unshift @nodes, @$sib;
1234     push @$new_todos, @$ch;
1235     } elsif ($nt == 3 or $nt == 4) {
1236     #
1237     } elsif ($nt == 5) {
1238     unshift @nodes, @{$node->child_nodes};
1239     }
1240     }
1241    
1242     return ($new_todos);
1243     },
1244     };
1245    
1246     $Element->{$ATOM_NS}->{logo} = {
1247     attrs_checker => $GetAtomAttrsChecker->({}),
1248     checker => sub {
1249     my ($self, $todo) = @_;
1250    
1251     my @nodes = (@{$todo->{node}->child_nodes});
1252     my $new_todos = [];
1253    
1254     my $s = '';
1255     while (@nodes) {
1256     my $node = shift @nodes;
1257     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1258    
1259     my $nt = $node->node_type;
1260     if ($nt == 1) {
1261 wakaba 1.1 ## not explicitly disallowed
1262     $self->{onerror}->(node => $node, type => 'element not allowed');
1263     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1264     unshift @nodes, @$sib;
1265     push @$new_todos, @$ch;
1266     } elsif ($nt == 3 or $nt == 4) {
1267 wakaba 1.2 $s .= $node->data;
1268 wakaba 1.1 } elsif ($nt == 5) {
1269     unshift @nodes, @{$node->child_nodes};
1270     }
1271     }
1272    
1273 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1274     Whatpm::URIChecker->check_iri_reference ($s, sub {
1275     my %opt = @_;
1276     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1277     type => 'URI::'.$opt{type}.
1278     (defined $opt{position} ? ':'.$opt{position} : ''));
1279     });
1280    
1281     ## NOTE: Image SHOULD be 2:1
1282    
1283     return ($new_todos);
1284     },
1285     };
1286    
1287     $Element->{$ATOM_NS}->{published} = $AtomDateConstruct;
1288    
1289     $Element->{$ATOM_NS}->{rights} = $AtomDateConstruct;
1290 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1291 wakaba 1.2
1292     $Element->{$ATOM_NS}->{source} = {
1293     attrs_checker => $GetAtomAttrsChecker->({}),
1294     checker => sub {
1295     my ($self, $todo) = @_;
1296    
1297     my @nodes = (@{$todo->{node}->child_nodes});
1298     my $new_todos = [];
1299     my $has_element = {};
1300     while (@nodes) {
1301     my $node = shift @nodes;
1302     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1303    
1304     my $nt = $node->node_type;
1305     if ($nt == 1) {
1306     my $nsuri = $node->namespace_uri;
1307     $nsuri = '' unless defined $nsuri;
1308     my $not_allowed;
1309     if ($nsuri eq $ATOM_NS) {
1310     my $ln = $node->manakai_local_name;
1311     if ($ln eq 'entry') {
1312     $has_element->{entry} = 1;
1313     } elsif ({
1314     generator => 1,
1315     icon => 1,
1316     id => 1,
1317     logo => 1,
1318     rights => 1,
1319     subtitle => 1,
1320     title => 1,
1321     updated => 1,
1322     }->{$ln}) {
1323     unless ($has_element->{$ln}) {
1324     $has_element->{$ln} = 1;
1325     $not_allowed = $has_element->{entry};
1326     } else {
1327     $not_allowed = 1;
1328     }
1329     } elsif ($ln eq 'link') {
1330 wakaba 1.3 if ($node->rel eq $LINK_REL . 'alternate') {
1331     my $type = $node->get_attribute_ns (undef, 'type');
1332     $type = '' unless defined $type;
1333     my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
1334     $hreflang = '' unless defined $hreflang;
1335     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1336     (defined $hreflang ? ':'.$hreflang : '');
1337     unless ($has_element->{$key}) {
1338     $has_element->{$key} = 1;
1339     } else {
1340     $not_allowed = 1;
1341     }
1342     }
1343     $not_allowed ||= $has_element->{entry};
1344 wakaba 1.2 } elsif ({
1345     author => 1,
1346     category => 1,
1347     contributor => 1,
1348     }->{$ln}) {
1349     $not_allowed = $has_element->{entry};
1350     } else {
1351     $not_allowed = 1;
1352     }
1353     } else {
1354     ## TODO: extension element
1355     $not_allowed = 1;
1356     }
1357     $self->{onerror}->(node => $node, type => 'element not allowed')
1358     if $not_allowed;
1359     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1360     unshift @nodes, @$sib;
1361     push @$new_todos, @$ch;
1362     } elsif ($nt == 3 or $nt == 4) {
1363     ## TODO: Are white spaces allowed?
1364     $self->{onerror}->(node => $node, type => 'character not allowed');
1365     } elsif ($nt == 5) {
1366     unshift @nodes, @{$node->child_nodes};
1367     }
1368     }
1369 wakaba 1.1
1370     return ($new_todos);
1371     },
1372     };
1373 wakaba 1.2
1374     $Element->{$ATOM_NS}->{subtitle} = $AtomTextConstruct;
1375    
1376     $Element->{$ATOM_NS}->{summary} = $AtomTextConstruct;
1377    
1378     $Element->{$ATOM_NS}->{title} = $AtomTextConstruct;
1379    
1380     $Element->{$ATOM_NS}->{updated} = $AtomDateConstruct;
1381    
1382     ## TODO: signature element
1383    
1384     ## TODO: simple extension element and structured extension element
1385 wakaba 1.1
1386     $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1387    
1388     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24