/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations) (download)
Sun Nov 25 08:04:20 2007 UTC (16 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.6: +52 -3 lines
++ whatpm/t/ChangeLog	25 Nov 2007 07:57:28 -0000
2007-11-25  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat, content-model-2.dat, content-model-3.dat,
	content-model-4.dat, table-1.dat: Test data are updated
	for the significant content check.

	* content-model-5.dat: New test data.

	* ContentChecker.t: New test data file is added.

++ whatpm/Whatpm/ChangeLog	25 Nov 2007 07:59:33 -0000
	* ContentChecker.pm ($AnyChecker): Old way to add child elements
	for checking had been used.

2007-11-25  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	25 Nov 2007 08:00:46 -0000
	* HTML.pm: Support for checking for significant content (HTML5
	revision 1114).  Note that the current implementation has
	an issue on treatment for transparent or semi-transparent
	elements.

	* Atom.pm: Support for significant content checking (for composed
	HTML-Atom documents).

2007-11-25  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 wakaba 1.3 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
9 wakaba 1.1
10     ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
11    
12     ## NOTE: Commants and PIs are not explicitly allowed.
13    
14     our $AttrChecker;
15    
16     ## Any element MAY have xml:base, xml:lang
17     my $GetAtomAttrsChecker = sub {
18     my $element_specific_checker = shift;
19     return sub {
20     my ($self, $todo) = @_;
21     for my $attr (@{$todo->{node}->attributes}) {
22     my $attr_ns = $attr->namespace_uri;
23     $attr_ns = '' unless defined $attr_ns;
24     my $attr_ln = $attr->manakai_local_name;
25     my $checker;
26     if ($attr_ns eq '') {
27     $checker = $element_specific_checker->{$attr_ln};
28     } else {
29     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
30     || $AttrChecker->{$attr_ns}->{''};
31     }
32     if ($checker) {
33     $checker->($self, $attr, $todo);
34     } else {
35     $self->{onerror}->(node => $attr, level => 'unsupported',
36     type => 'attribute');
37     ## ISSUE: No comformance createria for unknown attributes in the spec
38     }
39     }
40     };
41     }; # $GetAtomAttrsChecker
42    
43 wakaba 1.4 my $AtomLanguageTagAttrChecker = sub {
44     ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
45    
46     my ($self, $attr) = @_;
47     my $value = $attr->value;
48     require Whatpm::LangTag;
49     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
50     my %opt = @_;
51     my $type = 'LangTag:'.$opt{type};
52     $type .= ':' . $opt{subtag} if defined $opt{subtag};
53     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
54     level => $opt{level});
55     });
56     ## ISSUE: RFC 4646 (3066bis)?
57     }; # $AtomLanguageTagAttrChecker
58    
59 wakaba 1.1 my $AtomTextConstruct = {
60     attrs_checker => $GetAtomAttrsChecker->({
61     type => sub { 1 }, # checked in |checker|
62     }),
63     checker => sub {
64     my ($self, $todo) = @_;
65    
66     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
67     my $value = 'text';
68     if ($attr) {
69     $value = $attr->value;
70     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
71     # MUST
72     } else {
73     $self->{onerror}->(node => $attr, type => 'keyword:invalid');
74     }
75     # IMT MUST NOT be used
76     }
77    
78     if ($value eq 'text') {
79     my @nodes = (@{$todo->{node}->child_nodes});
80     my $new_todos = [];
81    
82     while (@nodes) {
83     my $node = shift @nodes;
84     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
85    
86     my $nt = $node->node_type;
87     if ($nt == 1) {
88     # MUST NOT
89     $self->{onerror}->(node => $node, type => 'element not allowed');
90     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
91     unshift @nodes, @$sib;
92     push @$new_todos, @$ch;
93     } elsif ($nt == 5) {
94     unshift @nodes, @{$node->child_nodes};
95     }
96     }
97    
98     return ($new_todos);
99     } elsif ($value eq 'html') {
100     my @nodes = (@{$todo->{node}->child_nodes});
101     my $new_todos = [];
102    
103     while (@nodes) {
104     my $node = shift @nodes;
105     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
106    
107     my $nt = $node->node_type;
108     if ($nt == 1) {
109     # MUST NOT
110     $self->{onerror}->(node => $node, type => 'element not allowed');
111     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
112     unshift @nodes, @$sib;
113     push @$new_todos, @$ch;
114     } elsif ($nt == 5) {
115     unshift @nodes, @{$node->child_nodes};
116     }
117     }
118    
119     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
120     # markup MUST be escaped
121     ## TODO: HTML SHOULD be valid as if within <div>
122    
123     return ($new_todos);
124     } elsif ($value eq 'xhtml') {
125     my @nodes = (@{$todo->{node}->child_nodes});
126     my $new_todos = [];
127    
128     my $has_div;
129     while (@nodes) {
130     my $node = shift @nodes;
131     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
132    
133     my $nt = $node->node_type;
134     if ($nt == 1) {
135     # MUST
136     my $nsuri = $node->namespace_uri;
137     if (defined $nsuri and
138     $nsuri eq q<http://www.w3.org/1999/xhtml> and
139     $node->manakai_local_name eq 'div' and
140     not $has_div) {
141     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
142     $has_div = 1;
143     } else {
144     $self->{onerror}->(node => $node, type => 'element not allowed');
145     }
146     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
147     unshift @nodes, @$sib;
148     push @$new_todos, @$ch;
149     } elsif ($nt == 3 or $nt == 4) {
150     ## TODO: Are white spaces allowed?
151     $self->{onerror}->(node => $node, type => 'character not allowed');
152     } elsif ($nt == 5) {
153     unshift @nodes, @{$node->child_nodes};
154     }
155     }
156    
157     unless ($has_div) {
158     $self->{onerror}->(node => $todo->{node},
159     type => 'element missing:div');
160     }
161    
162     return ($new_todos);
163     }
164    
165     },
166     }; # $AtomTextConstruct
167    
168     my $AtomPersonConstruct = {
169     attrs_checker => $GetAtomAttrsChecker->({}),
170     checker => sub {
171     my ($self, $todo) = @_;
172    
173     my @nodes = (@{$todo->{node}->child_nodes});
174     my $new_todos = [];
175    
176     my $has_name;
177     my $has_uri;
178     my $has_email;
179     while (@nodes) {
180     my $node = shift @nodes;
181     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
182    
183     my $nt = $node->node_type;
184     if ($nt == 1) {
185     # MUST
186     my $nsuri = $node->namespace_uri;
187     $nsuri = '' unless defined $nsuri;
188     my $not_allowed;
189     if ($nsuri eq $ATOM_NS) {
190     my $ln = $node->manakai_local_name;
191     if ($ln eq 'name') {
192     unless ($has_name) {
193     $has_name = 1;
194     } else {
195     $not_allowed = 1;
196     }
197     } elsif ($ln eq 'uri') {
198     unless ($has_uri) {
199     $has_uri = 1;
200     } else {
201     $not_allowed = 1; # MUST NOT
202     }
203     } elsif ($ln eq 'email') {
204     unless ($has_email) {
205     $has_email = 1;
206     } else {
207     $not_allowed = 1; # MUST NOT
208     }
209     } else {
210     $not_allowed = 1;
211     }
212     } else {
213     ## TODO: extension element
214     $not_allowed = 1;
215     }
216     $self->{onerror}->(node => $node, type => 'element not allowed')
217     if $not_allowed;
218     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
219     unshift @nodes, @$sib;
220     push @$new_todos, @$ch;
221     } elsif ($nt == 3 or $nt == 4) {
222     ## TODO: Are white spaces allowed?
223     $self->{onerror}->(node => $node, type => 'character not allowed');
224 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
225     $todo->{flag}->{has_descendant}->{significant} = 1;
226     }
227 wakaba 1.1 } elsif ($nt == 5) {
228     unshift @nodes, @{$node->child_nodes};
229     }
230     }
231    
232     unless ($has_name) { # MUST
233     $self->{onerror}->(node => $todo->{node},
234     type => 'element missing:atom.name');
235     }
236    
237     return ($new_todos);
238     },
239     }; # $AtomPersonConstruct
240    
241 wakaba 1.2 our $Element;
242    
243     $Element->{$ATOM_NS}->{name} = {
244     ## NOTE: Strictly speaking, structure and semantics for atom:name
245     ## element outside of Person construct is not defined.
246     attrs_checker => $GetAtomAttrsChecker->({}),
247     checker => sub {
248     my ($self, $todo) = @_;
249    
250     my @nodes = (@{$todo->{node}->child_nodes});
251     my $new_todos = [];
252    
253     while (@nodes) {
254     my $node = shift @nodes;
255     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
256    
257     my $nt = $node->node_type;
258     if ($nt == 1) {
259     ## NOTE: No constraint.
260     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
261     unshift @nodes, @$sib;
262     push @$new_todos, @$ch;
263     } elsif ($nt == 3 or $nt == 4) {
264 wakaba 1.7 $todo->{flag}->{has_descendant}->{significant} = 1;
265 wakaba 1.2 } elsif ($nt == 5) {
266     unshift @nodes, @{$node->child_nodes};
267     }
268     }
269    
270     return ($new_todos);
271     },
272     };
273    
274     $Element->{$ATOM_NS}->{uri} = {
275     ## NOTE: Strictly speaking, structure and semantics for atom:uri
276     ## element outside of Person construct is not defined.
277     attrs_checker => $GetAtomAttrsChecker->({}),
278     checker => sub {
279     my ($self, $todo) = @_;
280    
281     my @nodes = (@{$todo->{node}->child_nodes});
282     my $new_todos = [];
283    
284     my $s = '';
285     while (@nodes) {
286     my $node = shift @nodes;
287     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
288    
289     my $nt = $node->node_type;
290     if ($nt == 1) {
291 wakaba 1.6 my $node_ns = $node->namespace_uri;
292     $node_ns = '' unless defined $node_ns;
293     my $node_ln = $node->manakai_local_name;
294     unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
295     ## NOTE: Not explicitly disallowed.
296     $self->{onerror}->(node => $node, type => 'element not allowed');
297     }
298 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
299     unshift @nodes, @$sib;
300     push @$new_todos, @$ch;
301     } elsif ($nt == 3 or $nt == 4) {
302     $s .= $node->data;
303 wakaba 1.7 $todo->{flag}->{has_descendant}->{significant} = 1;
304 wakaba 1.2 } elsif ($nt == 5) {
305     unshift @nodes, @{$node->child_nodes};
306     }
307     }
308    
309     ## NOTE: There MUST NOT be any white space.
310     Whatpm::URIChecker->check_iri_reference ($s, sub {
311     my %opt = @_;
312     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
313     type => 'URI::'.$opt{type}.
314     (defined $opt{position} ? ':'.$opt{position} : ''));
315     });
316    
317     return ($new_todos);
318     },
319     };
320    
321     $Element->{$ATOM_NS}->{email} = {
322     ## NOTE: Strictly speaking, structure and semantics for atom:email
323     ## element outside of Person construct is not defined.
324     attrs_checker => $GetAtomAttrsChecker->({}),
325     checker => sub {
326     my ($self, $todo) = @_;
327    
328     my @nodes = (@{$todo->{node}->child_nodes});
329     my $new_todos = [];
330    
331     my $s = '';
332     while (@nodes) {
333     my $node = shift @nodes;
334     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
335    
336     my $nt = $node->node_type;
337     if ($nt == 1) {
338 wakaba 1.6 my $node_ns = $node->namespace_uri;
339     $node_ns = '' unless defined $node_ns;
340     my $node_ln = $node->manakai_local_name;
341     unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
342     ## NOTE: Not explicitly disallowed.
343     $self->{onerror}->(node => $node, type => 'element not allowed');
344     }
345 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
346     unshift @nodes, @$sib;
347     push @$new_todos, @$ch;
348     } elsif ($nt == 3 or $nt == 4) {
349     $s .= $node->data;
350 wakaba 1.7 $todo->{flag}->{has_descendant}->{significant} = 1;
351 wakaba 1.2 } elsif ($nt == 5) {
352     unshift @nodes, @{$node->child_nodes};
353     }
354     }
355    
356     ## TODO: addr-spec
357     $self->{onerror}->(node => $todo->{node}, type => 'addr-spec',
358     level => 'unsupported');
359    
360     return ($new_todos);
361     },
362     };
363    
364 wakaba 1.1 ## MUST NOT be any white space
365     my $AtomDateConstruct = {
366     attrs_checker => $GetAtomAttrsChecker->({}),
367     checker => sub {
368     my ($self, $todo) = @_;
369    
370     my @nodes = (@{$todo->{node}->child_nodes});
371     my $new_todos = [];
372    
373     my $s = '';
374     while (@nodes) {
375     my $node = shift @nodes;
376     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
377    
378     my $nt = $node->node_type;
379     if ($nt == 1) {
380 wakaba 1.6 my $node_ns = $node->namespace_uri;
381     $node_ns = '' unless defined $node_ns;
382     my $node_ln = $node->manakai_local_name;
383     unless ($self->{pluses}->{$node_ns}->{$node_ln}) {
384     ## NOTE: It does not explicitly say that there MUST NOT be any element.
385     $self->{onerror}->(node => $node, type => 'element not allowed');
386     }
387 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
388     unshift @nodes, @$sib;
389     push @$new_todos, @$ch;
390     } elsif ($nt == 3 or $nt == 4) {
391     $s .= $node->data;
392 wakaba 1.7 $todo->{flag}->{has_descendant}->{significant} = 1;
393 wakaba 1.1 } elsif ($nt == 5) {
394     unshift @nodes, @{$node->child_nodes};
395     }
396     }
397    
398 wakaba 1.4 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
399     if ($s =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
400     my ($y, $M, $d, $h, $m, $s, $zh, $zm)
401     = ($1, $2, $3, $4, $5, $6, $7, $8);
402     my $node = $todo->{node};
403    
404     ## Check additional constraints described or referenced in
405     ## comments of ABNF rules for |date-time|.
406     my $level = $self->{must_level};
407     if (0 < $M and $M < 13) {
408     $self->{onerror}->(node => $node, type => 'datetime:bad day',
409     level => $level)
410     if $d < 1 or
411     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
412     $self->{onerror}->(node => $node, type => 'datetime:bad day',
413     level => $level)
414     if $M == 2 and $d == 29 and
415     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
416     } else {
417     $self->{onerror}->(node => $node, type => 'datetime:bad month',
418     level => $level);
419     }
420     $self->{onerror}->(node => $node, type => 'datetime:bad hour',
421     level => $level) if $h > 23;
422     $self->{onerror}->(node => $node, type => 'datetime:bad minute',
423     level => $level) if $m > 59;
424     $self->{onerror}->(node => $node, type => 'datetime:bad second',
425     level => $level)
426     if $s > 60; ## NOTE: Validness of leap seconds are not checked.
427     $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
428     level => $level) if $zh > 23;
429     $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
430     level => $level) if $zm > 59;
431     } else {
432     $self->{onerror}->(node => $todo->{node},
433     type => 'datetime:syntax error',
434     level => $self->{must_level});
435     }
436     ## NOTE: SHOULD be accurate as possible (cannot be checked)
437 wakaba 1.1
438     return ($new_todos);
439     },
440     }; # $AtomDateConstruct
441    
442 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
443 wakaba 1.1 is_root => 1,
444     attrs_checker => $GetAtomAttrsChecker->({}),
445     checker => sub {
446     my ($self, $todo) = @_;
447    
448     my @nodes = (@{$todo->{node}->child_nodes});
449     my $new_todos = [];
450    
451     ## TODO: MUST author+ unless (child::source/child::author)
452     ## or (parent::feed/child::author)
453    
454     my $has_element = {};
455     while (@nodes) {
456     my $node = shift @nodes;
457     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
458    
459     my $nt = $node->node_type;
460     if ($nt == 1) {
461     # MUST
462     my $nsuri = $node->namespace_uri;
463     $nsuri = '' unless defined $nsuri;
464 wakaba 1.6 my $ln = $node->manakai_local_name;
465 wakaba 1.1 my $not_allowed;
466 wakaba 1.6 if ($self->{pluses}->{$nsuri}->{$ln}) {
467     #
468     } elsif ($nsuri eq $ATOM_NS) {
469 wakaba 1.1 if ({ # MUST (0, 1)
470     content => 1,
471     id => 1,
472     published => 1,
473     rights => 1,
474     source => 1,
475     summary => 1,
476     ## TODO: MUST if child::content/@src | child::content/@type = IMT, !text/ !/xml !+xml
477     title => 1,
478     updated => 1,
479     }->{$ln}) {
480     unless ($has_element->{$ln}) {
481     $has_element->{$ln} = 1;
482     $not_allowed = $has_element->{entry};
483     } else {
484     $not_allowed = 1;
485     }
486     } elsif ($ln eq 'link') { # MAY
487 wakaba 1.3 if ($node->rel eq $LINK_REL . 'alternate') {
488     my $type = $node->get_attribute_ns (undef, 'type');
489     $type = '' unless defined $type;
490     my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
491     $hreflang = '' unless defined $hreflang;
492     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
493     (defined $hreflang ? ':'.$hreflang : '');
494     unless ($has_element->{$key}) {
495     $has_element->{$key} = 1;
496     $has_element->{'link.alternate'} = 1;
497     } else {
498     $not_allowed = 1;
499     }
500     }
501    
502 wakaba 1.1 ## NOTE: MAY
503 wakaba 1.3 $not_allowed ||= $has_element->{entry};
504 wakaba 1.1 } elsif ({ # MAY
505     author => 1,
506     category => 1,
507     contributor => 1,
508     }->{$ln}) {
509     $not_allowed = $has_element->{entry};
510     } else {
511     $not_allowed = 1;
512     }
513     } else {
514     ## TODO: extension element
515     $not_allowed = 1;
516     }
517     $self->{onerror}->(node => $node, type => 'element not allowed')
518     if $not_allowed;
519     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
520     unshift @nodes, @$sib;
521     push @$new_todos, @$ch;
522     } elsif ($nt == 3 or $nt == 4) {
523     ## TODO: Are white spaces allowed?
524     $self->{onerror}->(node => $node, type => 'character not allowed');
525 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
526     $todo->{flag}->{has_descendant}->{significant} = 1;
527     }
528 wakaba 1.1 } elsif ($nt == 5) {
529     unshift @nodes, @{$node->child_nodes};
530     }
531     }
532    
533     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
534    
535     ## TODO: If entry's with same id, then updated SHOULD be different
536    
537     unless ($has_element->{id}) { # MUST
538     $self->{onerror}->(node => $todo->{node},
539     type => 'element missing:atom.id');
540     }
541     unless ($has_element->{title}) { # MUST
542     $self->{onerror}->(node => $todo->{node},
543     type => 'element missing:atom.title');
544     }
545     unless ($has_element->{updated}) { # MUST
546     $self->{onerror}->(node => $todo->{node},
547     type => 'element missing:atom.updated');
548     }
549 wakaba 1.3 if (not $has_element->{content} and
550     not $has_element->{'link.alternate'}) {
551     $self->{onerror}->(node => $todo->{node},
552     type => 'element missing:atom.link.alternate');
553     }
554 wakaba 1.1
555     return ($new_todos);
556     },
557     };
558    
559     $Element->{$ATOM_NS}->{feed} = {
560     is_root => 1,
561     attrs_checker => $GetAtomAttrsChecker->({}),
562     checker => sub {
563     my ($self, $todo) = @_;
564    
565     my @nodes = (@{$todo->{node}->child_nodes});
566     my $new_todos = [];
567    
568     ## TODO: MUST author+ unless all entry child has author+.
569    
570     my $has_element = {};
571     while (@nodes) {
572     my $node = shift @nodes;
573     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
574    
575     my $nt = $node->node_type;
576     if ($nt == 1) {
577     my $nsuri = $node->namespace_uri;
578     $nsuri = '' unless defined $nsuri;
579 wakaba 1.6 my $ln = $node->manakai_local_name;
580 wakaba 1.1 my $not_allowed;
581 wakaba 1.6 if ($self->{pluses}->{$nsuri}->{$ln}) {
582     #
583     } elsif ($nsuri eq $ATOM_NS) {
584 wakaba 1.1 if ($ln eq 'entry') {
585     $has_element->{entry} = 1;
586     } elsif ({ # MUST (0, 1)
587     generator => 1,
588     icon => 1,
589     id => 1,
590     logo => 1,
591     rights => 1,
592     subtitle => 1,
593     title => 1,
594     updated => 1,
595     }->{$ln}) {
596     unless ($has_element->{$ln}) {
597     $has_element->{$ln} = 1;
598     $not_allowed = $has_element->{entry};
599     } else {
600     $not_allowed = 1;
601     }
602 wakaba 1.3 } elsif ($ln eq 'link') {
603     my $rel = $node->rel;
604     if ($rel eq $LINK_REL . 'alternate') {
605     my $type = $node->get_attribute_ns (undef, 'type');
606     $type = '' unless defined $type;
607     my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
608     $hreflang = '' unless defined $hreflang;
609     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
610     (defined $hreflang ? ':'.$hreflang : '');
611     unless ($has_element->{$key}) {
612     $has_element->{$key} = 1;
613     } else {
614     $not_allowed = 1;
615     }
616     } elsif ($rel eq $LINK_REL . 'self') {
617     $has_element->{'link.self'} = 1;
618     }
619    
620     ## NOTE: MAY
621 wakaba 1.1 $not_allowed = $has_element->{entry};
622     } elsif ({ # MAY
623     author => 1,
624     category => 1,
625     contributor => 1,
626     }->{$ln}) {
627     $not_allowed = $has_element->{entry};
628     } else {
629     $not_allowed = 1;
630     }
631     } else {
632     ## TODO: extension element
633     $not_allowed = 1;
634     }
635     $self->{onerror}->(node => $node, type => 'element not allowed')
636     if $not_allowed;
637     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
638     unshift @nodes, @$sib;
639     push @$new_todos, @$ch;
640     } elsif ($nt == 3 or $nt == 4) {
641     ## TODO: Are white spaces allowed?
642     $self->{onerror}->(node => $node, type => 'character not allowed');
643 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
644     $todo->{flag}->{has_descendant}->{significant} = 1;
645     }
646 wakaba 1.1 } elsif ($nt == 5) {
647     unshift @nodes, @{$node->child_nodes};
648     }
649     }
650    
651     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
652    
653     ## TODO: If entry's with same id, then updated SHOULD be different
654    
655     unless ($has_element->{id}) { # MUST
656     $self->{onerror}->(node => $todo->{node},
657     type => 'element missing:atom.id');
658     }
659     unless ($has_element->{title}) { # MUST
660     $self->{onerror}->(node => $todo->{node},
661     type => 'element missing:atom.title');
662     }
663     unless ($has_element->{updated}) { # MUST
664     $self->{onerror}->(node => $todo->{node},
665     type => 'element missing:atom.updated');
666     }
667 wakaba 1.3 unless ($has_element->{'link.self'}) {
668     $self->{onerror}->(node => $todo->{node}, level => 's',
669     type => 'child element missing:atom.link.self');
670     }
671 wakaba 1.1
672     return ($new_todos);
673     },
674     };
675    
676     $Element->{$ATOM_NS}->{content} = {
677     attrs_checker => $GetAtomAttrsChecker->({
678     src => sub { 1 }, # checked in |checker|
679     type => sub { 1 }, # checked in |checker|
680     }),
681     checker => sub {
682     my ($self, $todo) = @_;
683    
684     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
685     my $src_attr = $todo->{node}->get_attribute_node_ns (undef, 'src');
686     my $value;
687     if ($attr) {
688     $value = $attr->value;
689     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
690     # MUST
691     } else {
692 wakaba 1.3 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
693     my $value = $attr->value;
694     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
695     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
696     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
697     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
698     my @type = ($1, $2);
699     my $param = $3;
700     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
701     if (defined $2) {
702     push @type, $1 => $2;
703     } else {
704     my $n = $1;
705     my $v = $2;
706     $v =~ s/\\(.)/$1/gs;
707     push @type, $n => $v;
708     }
709     }
710     require Whatpm::IMTChecker;
711     Whatpm::IMTChecker->check_imt (sub {
712     my %opt = @_;
713     $self->{onerror}->(node => $attr, level => $opt{level},
714     type => 'IMT:'.$opt{type});
715     }, @type);
716     } else {
717     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
718     }
719 wakaba 1.1 }
720     } elsif ($src_attr) {
721     $value = '';
722     $self->{onerror}->(node => $todo->{node},
723     type => 'attribute missing:type', level => 's');
724     } else {
725     $value = 'text';
726     }
727    
728     ## TODO: This implementation is not optimal.
729    
730     if ($src_attr) {
731 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
732     Whatpm::URIChecker->check_iri_reference ($src_attr->value, sub {
733     my %opt = @_;
734     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
735     type => 'URI::'.$opt{type}.
736     (defined $opt{position} ? ':'.$opt{position} : ''));
737     });
738 wakaba 1.1
739     ## NOTE: If @src, the element MUST be empty. What is "empty"?
740     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
741     ## empty replacement tree shuld be empty, since Atom is defined
742     ## in terms of XML Information Set where entities are expanded.
743     ## (but what if |&e;| is an unexpanded entity?)
744     }
745    
746     if ($value eq 'text') {
747     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
748    
749     my @nodes = (@{$todo->{node}->child_nodes});
750     my $new_todos = [];
751    
752     while (@nodes) {
753     my $node = shift @nodes;
754     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
755    
756     my $nt = $node->node_type;
757     if ($nt == 1) {
758 wakaba 1.6 my $node_ns = $node->namespace_uri;
759     $node_ns = '' unless defined $node_ns;
760     my $node_ln = $node->manakai_local_name;
761     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
762     #
763     } else {
764     # MUST NOT
765     $self->{onerror}->(node => $node, type => 'element not allowed');
766     }
767 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
768     unshift @nodes, @$sib;
769     push @$new_todos, @$ch;
770     } elsif ($nt == 3 or $nt == 4) {
771     $self->{onerror}->(node => $node, type => 'character not allowed')
772     if $src_attr;
773 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
774     $todo->{flag}->{has_descendant}->{significant} = 1;
775     }
776 wakaba 1.1 } elsif ($nt == 5) {
777     unshift @nodes, @{$node->child_nodes};
778     }
779     }
780    
781     return ($new_todos);
782     } elsif ($value eq 'html') {
783     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
784    
785     my @nodes = (@{$todo->{node}->child_nodes});
786     my $new_todos = [];
787    
788     while (@nodes) {
789     my $node = shift @nodes;
790     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
791    
792     my $nt = $node->node_type;
793     if ($nt == 1) {
794 wakaba 1.6 my $node_ns = $node->namespace_uri;
795     $node_ns = '' unless defined $node_ns;
796     my $node_ln = $node->manakai_local_name;
797     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
798     #
799     } else {
800     # MUST NOT
801     $self->{onerror}->(node => $node, type => 'element not allowed');
802     }
803 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
804     unshift @nodes, @$sib;
805     push @$new_todos, @$ch;
806     } elsif ($nt == 3 or $nt == 4) {
807     $self->{onerror}->(node => $node, type => 'character not allowed')
808     if $src_attr;
809 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
810     $todo->{flag}->{has_descendant}->{significant} = 1;
811     }
812 wakaba 1.1 } elsif ($nt == 5) {
813     unshift @nodes, @{$node->child_nodes};
814     }
815     }
816    
817     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
818     # markup MUST be escaped
819     ## TODO: HTML SHOULD be valid as if within <div>
820    
821     return ($new_todos);
822     } elsif ($value eq 'xhtml') {
823     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
824    
825     my @nodes = (@{$todo->{node}->child_nodes});
826     my $new_todos = [];
827    
828     my $has_div;
829     while (@nodes) {
830     my $node = shift @nodes;
831     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
832    
833     my $nt = $node->node_type;
834     if ($nt == 1) {
835     # MUST
836     my $nsuri = $node->namespace_uri;
837 wakaba 1.6 $nsuri = '' unless defined $nsuri;
838     my $node_ln = $node->manakai_local_name;
839     if ($self->{pluses}->{$nsuri}->{$node_ln}) {
840     #
841     } elsif ($nsuri eq q<http://www.w3.org/1999/xhtml> and
842     $node_ln eq 'div' and not $has_div) {
843 wakaba 1.1 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
844     $has_div = 1;
845     $self->{onerror}->(node => $node, type => 'element not allowed')
846     if $src_attr;
847     } else {
848     $self->{onerror}->(node => $node, type => 'element not allowed');
849     }
850     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
851     unshift @nodes, @$sib;
852     push @$new_todos, @$ch;
853     } elsif ($nt == 3 or $nt == 4) {
854     ## TODO: Are white spaces allowed?
855     $self->{onerror}->(node => $node, type => 'character not allowed');
856 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
857     $todo->{flag}->{has_descendant}->{significant} = 1;
858     }
859 wakaba 1.1 } elsif ($nt == 5) {
860     unshift @nodes, @{$node->child_nodes};
861     }
862     }
863    
864     unless ($has_div) {
865     $self->{onerror}->(node => $todo->{node},
866     type => 'element missing:div');
867     }
868    
869     return ($new_todos);
870     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
871     ## ISSUE: There is no definition for "XML media type" in RFC 3023.
872     ## Is |application/xml-dtd| an XML media type?
873    
874     my @nodes = (@{$todo->{node}->child_nodes});
875     my $new_todos = [];
876    
877     while (@nodes) {
878     my $node = shift @nodes;
879     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
880    
881     my $nt = $node->node_type;
882     if ($nt == 1) {
883     ## MAY contain elements
884 wakaba 1.6 if ($src_attr) {
885     my $node_ns = $node->namespace_uri;
886     $node_ns = '' unless defined $node_ns;
887     my $node_ln = $node->manakai_local_name;
888     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
889     #
890     } else {
891     $self->{onerror}->(node => $node, type => 'element not allowed');
892     }
893     }
894 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
895     unshift @nodes, @$sib;
896     push @$new_todos, @$ch;
897     } elsif ($nt == 3 or $nt == 4) {
898     ## TODO: Are white spaces allowed?
899     $self->{onerror}->(node => $node, type => 'character not allowed');
900 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
901     $todo->{flag}->{has_descendant}->{significant} = 1;
902     }
903 wakaba 1.1 } elsif ($nt == 5) {
904     unshift @nodes, @{$node->child_nodes};
905     }
906     }
907    
908 wakaba 1.3 ## NOTE: SHOULD be suitable for handling as $value.
909 wakaba 1.1 ## If no @src, this would normally mean it contains a
910     ## single child element that would serve as the root element.
911 wakaba 1.3 $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
912     type => 'content:'.$value);
913 wakaba 1.1
914     return ($new_todos);
915     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
916     my @nodes = (@{$todo->{node}->child_nodes});
917     my $new_todos = [];
918    
919     while (@nodes) {
920     my $node = shift @nodes;
921     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
922    
923     my $nt = $node->node_type;
924     if ($nt == 1) {
925 wakaba 1.6 my $node_ns = $node->namespace_uri;
926     $node_ns = '' unless defined $node_ns;
927     my $node_ln = $node->manakai_local_name;
928     if ($self->{pluses}->{$node_ns}->{$node_ln}) {
929     #
930     } else {
931     # MUST NOT
932     $self->{onerror}->(node => $node, type => 'element not allowed');
933     }
934 wakaba 1.1 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
935     unshift @nodes, @$sib;
936     push @$new_todos, @$ch;
937     } elsif ($nt == 3 or $nt == 4) {
938     $self->{onerror}->(node => $node, type => 'character not allowed')
939     if $src_attr;
940 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
941     $todo->{flag}->{has_descendant}->{significant} = 1;
942     }
943 wakaba 1.1 } elsif ($nt == 5) {
944     unshift @nodes, @{$node->child_nodes};
945     }
946     }
947    
948     ## NOTE: No further restriction (such as to conform to the type).
949    
950     return ($new_todos);
951     } else {
952     my @nodes = (@{$todo->{node}->child_nodes});
953     my $new_todos = [];
954    
955     if ($value =~ m!^(?>message|multipart)/!i) { # MUST NOT
956     $self->{onerror}->(node => $attr, type => 'IMT:composite');
957     }
958    
959     my $s = '';
960     while (@nodes) {
961     my $node = shift @nodes;
962     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
963    
964     my $nt = $node->node_type;
965     if ($nt == 1) {
966     ## not explicitly disallowed
967     $self->{onerror}->(node => $node, type => 'element not allowed');
968     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
969     unshift @nodes, @$sib;
970     push @$new_todos, @$ch;
971     } elsif ($nt == 3 or $nt == 4) {
972     $s .= $node->data;
973     $self->{onerror}->(node => $node, type => 'character not allowed')
974     if $src_attr;
975 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
976     $todo->{flag}->{has_descendant}->{significant} = 1;
977     }
978 wakaba 1.1 } elsif ($nt == 5) {
979     unshift @nodes, @{$node->child_nodes};
980     }
981     }
982    
983     ## TODO: $s = valid Base64ed [RFC 3548] where
984     ## MAY leading and following "white space" (what?)
985     ## and lines separated by a single U+000A
986 wakaba 1.3
987     ## NOTE: SHOULD be suitable for the indicated media type.
988     $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
989     type => 'content:'.$value);
990 wakaba 1.1
991     return ($new_todos);
992     }
993     },
994     };
995 wakaba 1.6 ## TODO: Tests for <html:nest/> in <atom:content/>
996 wakaba 1.1
997     $Element->{$ATOM_NS}->{author} = $AtomPersonConstruct;
998    
999     $Element->{$ATOM_NS}->{category} = {
1000     attrs_checker => $GetAtomAttrsChecker->({
1001     label => sub { 1 }, # no value constraint
1002 wakaba 1.2 scheme => sub { # NOTE: No MUST.
1003     my ($self, $attr) = @_;
1004     ## NOTE: There MUST NOT be any white space.
1005     Whatpm::URIChecker->check_iri ($attr->value, sub {
1006     my %opt = @_;
1007     $self->{onerror}->(node => $attr, level => $opt{level},
1008     type => 'URI::'.$opt{type}.
1009     (defined $opt{position} ? ':'.$opt{position} : ''));
1010     });
1011     },
1012 wakaba 1.1 term => sub { 1 }, # no value constraint
1013     }),
1014     checker => sub {
1015     my ($self, $todo) = @_;
1016    
1017     unless ($todo->{node}->has_attribute_ns (undef, 'term')) {
1018     $self->{onerror}->(node => $todo->{node},
1019     type => 'attribute missing:term');
1020     }
1021    
1022     my @nodes = (@{$todo->{node}->child_nodes});
1023     my $new_todos = [];
1024    
1025     while (@nodes) {
1026     my $node = shift @nodes;
1027     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1028    
1029     my $nt = $node->node_type;
1030     if ($nt == 1) {
1031     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1032     unshift @nodes, @$sib;
1033     push @$new_todos, @$ch;
1034     } elsif ($nt == 3 or $nt == 4) {
1035 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1036     $todo->{flag}->{has_descendant}->{significant} = 1;
1037     }
1038 wakaba 1.1 } elsif ($nt == 5) {
1039     unshift @nodes, @{$node->child_nodes};
1040     }
1041     }
1042    
1043     return ($new_todos);
1044     },
1045     };
1046    
1047     $Element->{$ATOM_NS}->{contributor} = $AtomPersonConstruct;
1048 wakaba 1.6
1049     ## TODO: Anything below does not support <html:nest/> yet.
1050 wakaba 1.1
1051     $Element->{$ATOM_NS}->{generator} = {
1052     attrs_checker => $GetAtomAttrsChecker->({
1053 wakaba 1.2 uri => sub { # MUST
1054     my ($self, $attr) = @_;
1055     ## NOTE: There MUST NOT be any white space.
1056     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1057     my %opt = @_;
1058     $self->{onerror}->(node => $attr, level => $opt{level},
1059     type => 'URI::'.$opt{type}.
1060     (defined $opt{position} ? ':'.$opt{position} : ''));
1061     });
1062     ## NOTE: Dereferencing SHOULD produce a representation
1063     ## that is relevant to the agent.
1064     },
1065 wakaba 1.1 version => sub { 1 }, # no value constraint
1066     }),
1067     checker => sub {
1068     my ($self, $todo) = @_;
1069    
1070     my @nodes = (@{$todo->{node}->child_nodes});
1071     my $new_todos = [];
1072    
1073     while (@nodes) {
1074     my $node = shift @nodes;
1075     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1076    
1077     my $nt = $node->node_type;
1078     if ($nt == 1) {
1079     ## not explicitly disallowed
1080     $self->{onerror}->(node => $node, type => 'element not allowed');
1081     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1082     unshift @nodes, @$sib;
1083     push @$new_todos, @$ch;
1084     } elsif ($nt == 3 or $nt == 4) {
1085     ## MUST be a string that is a human-readable name for
1086     ## the generating agent
1087 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1088     $todo->{flag}->{has_descendant}->{significant} = 1;
1089     }
1090 wakaba 1.1 } elsif ($nt == 5) {
1091     unshift @nodes, @{$node->child_nodes};
1092     }
1093     }
1094    
1095     return ($new_todos);
1096     },
1097     };
1098    
1099     $Element->{$ATOM_NS}->{icon} = {
1100     attrs_checker => $GetAtomAttrsChecker->({}),
1101     checker => sub {
1102     my ($self, $todo) = @_;
1103    
1104     my @nodes = (@{$todo->{node}->child_nodes});
1105     my $new_todos = [];
1106    
1107 wakaba 1.2 my $s = '';
1108 wakaba 1.1 while (@nodes) {
1109     my $node = shift @nodes;
1110     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1111    
1112     my $nt = $node->node_type;
1113     if ($nt == 1) {
1114     ## not explicitly disallowed
1115     $self->{onerror}->(node => $node, type => 'element not allowed');
1116     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1117     unshift @nodes, @$sib;
1118     push @$new_todos, @$ch;
1119     } elsif ($nt == 3 or $nt == 4) {
1120 wakaba 1.2 $s .= $node->data;
1121 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1122     $todo->{flag}->{has_descendant}->{significant} = 1;
1123     }
1124 wakaba 1.1 } elsif ($nt == 5) {
1125     unshift @nodes, @{$node->child_nodes};
1126     }
1127     }
1128    
1129 wakaba 1.2 ## NOTE: No MUST.
1130     ## NOTE: There MUST NOT be any white space.
1131     Whatpm::URIChecker->check_iri_reference ($s, sub {
1132     my %opt = @_;
1133     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1134     type => 'URI::'.$opt{type}.
1135     (defined $opt{position} ? ':'.$opt{position} : ''));
1136     });
1137    
1138 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
1139    
1140     return ($new_todos);
1141     },
1142     };
1143    
1144     $Element->{$ATOM_NS}->{id} = {
1145     attrs_checker => $GetAtomAttrsChecker->({}),
1146     checker => sub {
1147     my ($self, $todo) = @_;
1148    
1149     my @nodes = (@{$todo->{node}->child_nodes});
1150     my $new_todos = [];
1151 wakaba 1.2
1152     my $s = '';
1153     while (@nodes) {
1154     my $node = shift @nodes;
1155     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1156    
1157     my $nt = $node->node_type;
1158     if ($nt == 1) {
1159     ## not explicitly disallowed
1160     $self->{onerror}->(node => $node, type => 'element not allowed');
1161     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1162     unshift @nodes, @$sib;
1163     push @$new_todos, @$ch;
1164     } elsif ($nt == 3 or $nt == 4) {
1165     $s .= $node->data;
1166 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1167     $todo->{flag}->{has_descendant}->{significant} = 1;
1168     }
1169 wakaba 1.2 } elsif ($nt == 5) {
1170     unshift @nodes, @{$node->child_nodes};
1171     }
1172     }
1173    
1174     ## NOTE: There MUST NOT be any white space.
1175     Whatpm::URIChecker->check_iri ($s, sub { # MUST
1176     my %opt = @_;
1177     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1178     type => 'URI::'.$opt{type}.
1179     (defined $opt{position} ? ':'.$opt{position} : ''));
1180     });
1181     ## TODO: SHOULD be normalized
1182    
1183     return ($new_todos);
1184     },
1185     };
1186    
1187     $Element->{$ATOM_NS}->{link} = {
1188     attrs_checker => $GetAtomAttrsChecker->({
1189     href => sub {
1190     my ($self, $attr) = @_;
1191     ## NOTE: There MUST NOT be any white space.
1192     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
1193     my %opt = @_;
1194     $self->{onerror}->(node => $attr, level => $opt{level},
1195     type => 'URI::'.$opt{type}.
1196     (defined $opt{position} ? ':'.$opt{position} : ''));
1197     });
1198     },
1199 wakaba 1.4 hreflang => $AtomLanguageTagAttrChecker,
1200 wakaba 1.2 length => sub { }, # No MUST; in octets.
1201     rel => sub { # MUST
1202     my ($self, $attr) = @_;
1203     my $value = $attr->value;
1204     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
1205 wakaba 1.3 $value = $LINK_REL . $value;
1206 wakaba 1.2 }
1207    
1208     ## NOTE: There MUST NOT be any white space.
1209     Whatpm::URIChecker->check_iri ($value, sub {
1210     my %opt = @_;
1211     $self->{onerror}->(node => $attr, level => $opt{level},
1212     type => 'URI::'.$opt{type}.
1213     (defined $opt{position} ? ':'.$opt{position} : ''));
1214     });
1215    
1216     ## TODO: Warn if unregistered
1217     },
1218 wakaba 1.5 title => sub { }, # No MUST
1219 wakaba 1.3 type => sub {
1220     ## NOTE: MUST be a MIME media type. What is "MIME media type"?
1221     my ($self, $attr) = @_;
1222     my $value = $attr->value;
1223     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
1224     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
1225     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
1226     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
1227     my @type = ($1, $2);
1228     my $param = $3;
1229     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
1230     if (defined $2) {
1231     push @type, $1 => $2;
1232     } else {
1233     my $n = $1;
1234     my $v = $2;
1235     $v =~ s/\\(.)/$1/gs;
1236     push @type, $n => $v;
1237     }
1238     }
1239     require Whatpm::IMTChecker;
1240     Whatpm::IMTChecker->check_imt (sub {
1241     my %opt = @_;
1242     $self->{onerror}->(node => $attr, level => $opt{level},
1243     type => 'IMT:'.$opt{type});
1244     }, @type);
1245     } else {
1246     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1247     }
1248     },
1249 wakaba 1.2 }),
1250     checker => sub {
1251     my ($self, $todo) = @_;
1252    
1253     unless ($todo->{node}->has_attribute_ns (undef, 'href')) { # MUST
1254     $self->{onerror}->(node => $todo->{node},
1255     type => 'attribute missing:href');
1256     }
1257    
1258 wakaba 1.3 if ($todo->{node}->rel eq $LINK_REL . 'enclosure' and
1259 wakaba 1.2 not $todo->{node}->has_attribute_ns (undef, 'length')) {
1260     $self->{onerror}->(node => $todo->{node}, level => 's',
1261     type => 'attribute missing:length');
1262     }
1263    
1264     my @nodes = (@{$todo->{node}->child_nodes});
1265     my $new_todos = [];
1266 wakaba 1.1
1267     while (@nodes) {
1268     my $node = shift @nodes;
1269     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1270    
1271     my $nt = $node->node_type;
1272     if ($nt == 1) {
1273 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1274     unshift @nodes, @$sib;
1275     push @$new_todos, @$ch;
1276     } elsif ($nt == 3 or $nt == 4) {
1277 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1278     $todo->{flag}->{has_descendant}->{significant} = 1;
1279     }
1280 wakaba 1.2 } elsif ($nt == 5) {
1281     unshift @nodes, @{$node->child_nodes};
1282     }
1283     }
1284    
1285     return ($new_todos);
1286     },
1287     };
1288    
1289     $Element->{$ATOM_NS}->{logo} = {
1290     attrs_checker => $GetAtomAttrsChecker->({}),
1291     checker => sub {
1292     my ($self, $todo) = @_;
1293    
1294     my @nodes = (@{$todo->{node}->child_nodes});
1295     my $new_todos = [];
1296    
1297     my $s = '';
1298     while (@nodes) {
1299     my $node = shift @nodes;
1300     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1301    
1302     my $nt = $node->node_type;
1303     if ($nt == 1) {
1304 wakaba 1.1 ## not explicitly disallowed
1305     $self->{onerror}->(node => $node, type => 'element not allowed');
1306     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1307     unshift @nodes, @$sib;
1308     push @$new_todos, @$ch;
1309     } elsif ($nt == 3 or $nt == 4) {
1310 wakaba 1.2 $s .= $node->data;
1311 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1312     $todo->{flag}->{has_descendant}->{significant} = 1;
1313     }
1314 wakaba 1.1 } elsif ($nt == 5) {
1315     unshift @nodes, @{$node->child_nodes};
1316     }
1317     }
1318    
1319 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1320     Whatpm::URIChecker->check_iri_reference ($s, sub {
1321     my %opt = @_;
1322     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1323     type => 'URI::'.$opt{type}.
1324     (defined $opt{position} ? ':'.$opt{position} : ''));
1325     });
1326    
1327     ## NOTE: Image SHOULD be 2:1
1328    
1329     return ($new_todos);
1330     },
1331     };
1332    
1333     $Element->{$ATOM_NS}->{published} = $AtomDateConstruct;
1334    
1335     $Element->{$ATOM_NS}->{rights} = $AtomDateConstruct;
1336 wakaba 1.3 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1337 wakaba 1.2
1338     $Element->{$ATOM_NS}->{source} = {
1339     attrs_checker => $GetAtomAttrsChecker->({}),
1340     checker => sub {
1341     my ($self, $todo) = @_;
1342    
1343     my @nodes = (@{$todo->{node}->child_nodes});
1344     my $new_todos = [];
1345     my $has_element = {};
1346     while (@nodes) {
1347     my $node = shift @nodes;
1348     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1349    
1350     my $nt = $node->node_type;
1351     if ($nt == 1) {
1352     my $nsuri = $node->namespace_uri;
1353     $nsuri = '' unless defined $nsuri;
1354     my $not_allowed;
1355     if ($nsuri eq $ATOM_NS) {
1356     my $ln = $node->manakai_local_name;
1357     if ($ln eq 'entry') {
1358     $has_element->{entry} = 1;
1359     } elsif ({
1360     generator => 1,
1361     icon => 1,
1362     id => 1,
1363     logo => 1,
1364     rights => 1,
1365     subtitle => 1,
1366     title => 1,
1367     updated => 1,
1368     }->{$ln}) {
1369     unless ($has_element->{$ln}) {
1370     $has_element->{$ln} = 1;
1371     $not_allowed = $has_element->{entry};
1372     } else {
1373     $not_allowed = 1;
1374     }
1375     } elsif ($ln eq 'link') {
1376 wakaba 1.3 if ($node->rel eq $LINK_REL . 'alternate') {
1377     my $type = $node->get_attribute_ns (undef, 'type');
1378     $type = '' unless defined $type;
1379     my $hreflang = $node->get_attribute_ns (undef, 'hreflang');
1380     $hreflang = '' unless defined $hreflang;
1381     my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1382     (defined $hreflang ? ':'.$hreflang : '');
1383     unless ($has_element->{$key}) {
1384     $has_element->{$key} = 1;
1385     } else {
1386     $not_allowed = 1;
1387     }
1388     }
1389     $not_allowed ||= $has_element->{entry};
1390 wakaba 1.2 } elsif ({
1391     author => 1,
1392     category => 1,
1393     contributor => 1,
1394     }->{$ln}) {
1395     $not_allowed = $has_element->{entry};
1396     } else {
1397     $not_allowed = 1;
1398     }
1399     } else {
1400     ## TODO: extension element
1401     $not_allowed = 1;
1402     }
1403     $self->{onerror}->(node => $node, type => 'element not allowed')
1404     if $not_allowed;
1405     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1406     unshift @nodes, @$sib;
1407     push @$new_todos, @$ch;
1408     } elsif ($nt == 3 or $nt == 4) {
1409     ## TODO: Are white spaces allowed?
1410     $self->{onerror}->(node => $node, type => 'character not allowed');
1411 wakaba 1.7 if ($node->data =~ /[^\x09-\x0D\x20]/) {
1412     $todo->{flag}->{has_descendant}->{significant} = 1;
1413     }
1414 wakaba 1.2 } elsif ($nt == 5) {
1415     unshift @nodes, @{$node->child_nodes};
1416     }
1417     }
1418 wakaba 1.1
1419     return ($new_todos);
1420     },
1421     };
1422 wakaba 1.2
1423     $Element->{$ATOM_NS}->{subtitle} = $AtomTextConstruct;
1424    
1425     $Element->{$ATOM_NS}->{summary} = $AtomTextConstruct;
1426    
1427     $Element->{$ATOM_NS}->{title} = $AtomTextConstruct;
1428    
1429     $Element->{$ATOM_NS}->{updated} = $AtomDateConstruct;
1430    
1431     ## TODO: signature element
1432    
1433     ## TODO: simple extension element and structured extension element
1434 wakaba 1.1
1435     $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1436    
1437     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24