/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Sun Aug 5 07:12:45 2007 UTC (17 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +364 -22 lines
++ whatpm/Whatpm/ChangeLog	5 Aug 2007 07:12:00 -0000
2007-08-05  Wakaba  <wakaba@suika.fam.cx>

	* H2H.pm: |samp| is added to the list of allowed
	HTML elements.

	* URIChecker.pm (check_iri): New.
	(check_iri_reference): Error type for IRI reference
	syntax error is changed.

++ whatpm/Whatpm/ContentChecker/ChangeLog	5 Aug 2007 07:12:26 -0000
	* Atom.pm: Checker for all elements are defined (with
	a number of "TODO"s).

2007-08-05  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.2 require Whatpm::URIChecker;
6    
7 wakaba 1.1 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8    
9     ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
10    
11     ## NOTE: Commants and PIs are not explicitly allowed.
12    
13     our $AttrChecker;
14    
15     ## Any element MAY have xml:base, xml:lang
16     my $GetAtomAttrsChecker = sub {
17     my $element_specific_checker = shift;
18     return sub {
19     my ($self, $todo) = @_;
20     for my $attr (@{$todo->{node}->attributes}) {
21     my $attr_ns = $attr->namespace_uri;
22     $attr_ns = '' unless defined $attr_ns;
23     my $attr_ln = $attr->manakai_local_name;
24     my $checker;
25     if ($attr_ns eq '') {
26     $checker = $element_specific_checker->{$attr_ln};
27     } else {
28     $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
29     || $AttrChecker->{$attr_ns}->{''};
30     }
31     if ($checker) {
32     $checker->($self, $attr, $todo);
33     } else {
34     $self->{onerror}->(node => $attr, level => 'unsupported',
35     type => 'attribute');
36     ## ISSUE: No comformance createria for unknown attributes in the spec
37     }
38     }
39     };
40     }; # $GetAtomAttrsChecker
41    
42     my $AtomTextConstruct = {
43     attrs_checker => $GetAtomAttrsChecker->({
44     type => sub { 1 }, # checked in |checker|
45     }),
46     checker => sub {
47     my ($self, $todo) = @_;
48    
49     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
50     my $value = 'text';
51     if ($attr) {
52     $value = $attr->value;
53     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
54     # MUST
55     } else {
56     $self->{onerror}->(node => $attr, type => 'keyword:invalid');
57     }
58     # IMT MUST NOT be used
59     }
60    
61     if ($value eq 'text') {
62     my @nodes = (@{$todo->{node}->child_nodes});
63     my $new_todos = [];
64    
65     while (@nodes) {
66     my $node = shift @nodes;
67     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
68    
69     my $nt = $node->node_type;
70     if ($nt == 1) {
71     # MUST NOT
72     $self->{onerror}->(node => $node, type => 'element not allowed');
73     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
74     unshift @nodes, @$sib;
75     push @$new_todos, @$ch;
76     } elsif ($nt == 5) {
77     unshift @nodes, @{$node->child_nodes};
78     }
79     }
80    
81     return ($new_todos);
82     } elsif ($value eq 'html') {
83     my @nodes = (@{$todo->{node}->child_nodes});
84     my $new_todos = [];
85    
86     while (@nodes) {
87     my $node = shift @nodes;
88     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
89    
90     my $nt = $node->node_type;
91     if ($nt == 1) {
92     # MUST NOT
93     $self->{onerror}->(node => $node, type => 'element not allowed');
94     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
95     unshift @nodes, @$sib;
96     push @$new_todos, @$ch;
97     } elsif ($nt == 5) {
98     unshift @nodes, @{$node->child_nodes};
99     }
100     }
101    
102     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
103     # markup MUST be escaped
104     ## TODO: HTML SHOULD be valid as if within <div>
105    
106     return ($new_todos);
107     } elsif ($value eq 'xhtml') {
108     my @nodes = (@{$todo->{node}->child_nodes});
109     my $new_todos = [];
110    
111     my $has_div;
112     while (@nodes) {
113     my $node = shift @nodes;
114     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
115    
116     my $nt = $node->node_type;
117     if ($nt == 1) {
118     # MUST
119     my $nsuri = $node->namespace_uri;
120     if (defined $nsuri and
121     $nsuri eq q<http://www.w3.org/1999/xhtml> and
122     $node->manakai_local_name eq 'div' and
123     not $has_div) {
124     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
125     $has_div = 1;
126     } else {
127     $self->{onerror}->(node => $node, type => 'element not allowed');
128     }
129     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
130     unshift @nodes, @$sib;
131     push @$new_todos, @$ch;
132     } elsif ($nt == 3 or $nt == 4) {
133     ## TODO: Are white spaces allowed?
134     $self->{onerror}->(node => $node, type => 'character not allowed');
135     } elsif ($nt == 5) {
136     unshift @nodes, @{$node->child_nodes};
137     }
138     }
139    
140     unless ($has_div) {
141     $self->{onerror}->(node => $todo->{node},
142     type => 'element missing:div');
143     }
144    
145     return ($new_todos);
146     }
147    
148     },
149     }; # $AtomTextConstruct
150    
151     my $AtomPersonConstruct = {
152     attrs_checker => $GetAtomAttrsChecker->({}),
153     checker => sub {
154     my ($self, $todo) = @_;
155    
156     my @nodes = (@{$todo->{node}->child_nodes});
157     my $new_todos = [];
158    
159     my $has_name;
160     my $has_uri;
161     my $has_email;
162     while (@nodes) {
163     my $node = shift @nodes;
164     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
165    
166     my $nt = $node->node_type;
167     if ($nt == 1) {
168     # MUST
169     my $nsuri = $node->namespace_uri;
170     $nsuri = '' unless defined $nsuri;
171     my $not_allowed;
172     if ($nsuri eq $ATOM_NS) {
173     my $ln = $node->manakai_local_name;
174     if ($ln eq 'name') {
175     unless ($has_name) {
176     $has_name = 1;
177     } else {
178     $not_allowed = 1;
179     }
180     } elsif ($ln eq 'uri') {
181     unless ($has_uri) {
182     $has_uri = 1;
183     } else {
184     $not_allowed = 1; # MUST NOT
185     }
186     } elsif ($ln eq 'email') {
187     unless ($has_email) {
188     $has_email = 1;
189     } else {
190     $not_allowed = 1; # MUST NOT
191     }
192     } else {
193     $not_allowed = 1;
194     }
195     } else {
196     ## TODO: extension element
197     $not_allowed = 1;
198     }
199     $self->{onerror}->(node => $node, type => 'element not allowed')
200     if $not_allowed;
201     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
202     unshift @nodes, @$sib;
203     push @$new_todos, @$ch;
204     } elsif ($nt == 3 or $nt == 4) {
205     ## TODO: Are white spaces allowed?
206     $self->{onerror}->(node => $node, type => 'character not allowed');
207     } elsif ($nt == 5) {
208     unshift @nodes, @{$node->child_nodes};
209     }
210     }
211    
212     unless ($has_name) { # MUST
213     $self->{onerror}->(node => $todo->{node},
214     type => 'element missing:atom.name');
215     }
216    
217     return ($new_todos);
218     },
219     }; # $AtomPersonConstruct
220    
221 wakaba 1.2 our $Element;
222    
223     $Element->{$ATOM_NS}->{name} = {
224     ## NOTE: Strictly speaking, structure and semantics for atom:name
225     ## element outside of Person construct is not defined.
226     attrs_checker => $GetAtomAttrsChecker->({}),
227     checker => sub {
228     my ($self, $todo) = @_;
229    
230     my @nodes = (@{$todo->{node}->child_nodes});
231     my $new_todos = [];
232    
233     while (@nodes) {
234     my $node = shift @nodes;
235     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
236    
237     my $nt = $node->node_type;
238     if ($nt == 1) {
239     ## NOTE: No constraint.
240     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
241     unshift @nodes, @$sib;
242     push @$new_todos, @$ch;
243     } elsif ($nt == 3 or $nt == 4) {
244     #
245     } elsif ($nt == 5) {
246     unshift @nodes, @{$node->child_nodes};
247     }
248     }
249    
250     return ($new_todos);
251     },
252     };
253    
254     $Element->{$ATOM_NS}->{uri} = {
255     ## NOTE: Strictly speaking, structure and semantics for atom:uri
256     ## element outside of Person construct is not defined.
257     attrs_checker => $GetAtomAttrsChecker->({}),
258     checker => sub {
259     my ($self, $todo) = @_;
260    
261     my @nodes = (@{$todo->{node}->child_nodes});
262     my $new_todos = [];
263    
264     my $s = '';
265     while (@nodes) {
266     my $node = shift @nodes;
267     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
268    
269     my $nt = $node->node_type;
270     if ($nt == 1) {
271     ## NOTE: Not explicitly disallowed.
272     $self->{onerror}->(node => $node, type => 'element not allowed');
273     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
274     unshift @nodes, @$sib;
275     push @$new_todos, @$ch;
276     } elsif ($nt == 3 or $nt == 4) {
277     $s .= $node->data;
278     } elsif ($nt == 5) {
279     unshift @nodes, @{$node->child_nodes};
280     }
281     }
282    
283     ## NOTE: There MUST NOT be any white space.
284     Whatpm::URIChecker->check_iri_reference ($s, sub {
285     my %opt = @_;
286     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
287     type => 'URI::'.$opt{type}.
288     (defined $opt{position} ? ':'.$opt{position} : ''));
289     });
290    
291     return ($new_todos);
292     },
293     };
294    
295     $Element->{$ATOM_NS}->{email} = {
296     ## NOTE: Strictly speaking, structure and semantics for atom:email
297     ## element outside of Person construct is not defined.
298     attrs_checker => $GetAtomAttrsChecker->({}),
299     checker => sub {
300     my ($self, $todo) = @_;
301    
302     my @nodes = (@{$todo->{node}->child_nodes});
303     my $new_todos = [];
304    
305     my $s = '';
306     while (@nodes) {
307     my $node = shift @nodes;
308     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
309    
310     my $nt = $node->node_type;
311     if ($nt == 1) {
312     ## NOTE: Not explicitly disallowed.
313     $self->{onerror}->(node => $node, type => 'element not allowed');
314     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
315     unshift @nodes, @$sib;
316     push @$new_todos, @$ch;
317     } elsif ($nt == 3 or $nt == 4) {
318     $s .= $node->data;
319     } elsif ($nt == 5) {
320     unshift @nodes, @{$node->child_nodes};
321     }
322     }
323    
324     ## TODO: addr-spec
325     $self->{onerror}->(node => $todo->{node}, type => 'addr-spec',
326     level => 'unsupported');
327    
328     return ($new_todos);
329     },
330     };
331    
332 wakaba 1.1 ## MUST NOT be any white space
333     my $AtomDateConstruct = {
334     attrs_checker => $GetAtomAttrsChecker->({}),
335     checker => sub {
336     my ($self, $todo) = @_;
337    
338     my @nodes = (@{$todo->{node}->child_nodes});
339     my $new_todos = [];
340    
341     my $s = '';
342     while (@nodes) {
343     my $node = shift @nodes;
344     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
345    
346     my $nt = $node->node_type;
347     if ($nt == 1) {
348     ## NOTE: It does not explicitly say that there MUST NOT be any element.
349     $self->{onerror}->(node => $node, type => 'element not allowed');
350     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
351     unshift @nodes, @$sib;
352     push @$new_todos, @$ch;
353     } elsif ($nt == 3 or $nt == 4) {
354     $s .= $node->data;
355     } elsif ($nt == 5) {
356     unshift @nodes, @{$node->child_nodes};
357     }
358     }
359    
360     ## TODO: $s =~ MUST RFC 3339 date-time, uppercase T, Z
361     # SHOULD be accurate as possible
362    
363     return ($new_todos);
364     },
365     }; # $AtomDateConstruct
366    
367 wakaba 1.2 $Element->{$ATOM_NS}->{entry} = {
368 wakaba 1.1 is_root => 1,
369     attrs_checker => $GetAtomAttrsChecker->({}),
370     checker => sub {
371     my ($self, $todo) = @_;
372    
373     my @nodes = (@{$todo->{node}->child_nodes});
374     my $new_todos = [];
375    
376     ## TODO: MUST author+ unless (child::source/child::author)
377     ## or (parent::feed/child::author)
378    
379     my $has_element = {};
380     while (@nodes) {
381     my $node = shift @nodes;
382     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
383    
384     my $nt = $node->node_type;
385     if ($nt == 1) {
386     # MUST
387     my $nsuri = $node->namespace_uri;
388     $nsuri = '' unless defined $nsuri;
389     my $not_allowed;
390     if ($nsuri eq $ATOM_NS) {
391     my $ln = $node->manakai_local_name;
392     if ({ # MUST (0, 1)
393     content => 1,
394     id => 1,
395     published => 1,
396     rights => 1,
397     source => 1,
398     summary => 1,
399     ## TODO: MUST if child::content/@src | child::content/@type = IMT, !text/ !/xml !+xml
400     title => 1,
401     updated => 1,
402     }->{$ln}) {
403     unless ($has_element->{$ln}) {
404     $has_element->{$ln} = 1;
405     $not_allowed = $has_element->{entry};
406     } else {
407     $not_allowed = 1;
408     }
409     } elsif ($ln eq 'link') { # MAY
410     ## TODO: MUST link rel=alternate + unless child::content
411     ## TODO: MUST NOT rel=alternate with same (type, hreflang) +
412     ## NOTE: MAY
413     #
414     $not_allowed = $has_element->{entry};
415     } elsif ({ # MAY
416     author => 1,
417     category => 1,
418     contributor => 1,
419     }->{$ln}) {
420     $not_allowed = $has_element->{entry};
421     } else {
422     $not_allowed = 1;
423     }
424     } else {
425     ## TODO: extension element
426     $not_allowed = 1;
427     }
428     $self->{onerror}->(node => $node, type => 'element not allowed')
429     if $not_allowed;
430     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
431     unshift @nodes, @$sib;
432     push @$new_todos, @$ch;
433     } elsif ($nt == 3 or $nt == 4) {
434     ## TODO: Are white spaces allowed?
435     $self->{onerror}->(node => $node, type => 'character not allowed');
436     } elsif ($nt == 5) {
437     unshift @nodes, @{$node->child_nodes};
438     }
439     }
440    
441     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
442    
443     ## TODO: If entry's with same id, then updated SHOULD be different
444    
445     unless ($has_element->{id}) { # MUST
446     $self->{onerror}->(node => $todo->{node},
447     type => 'element missing:atom.id');
448     }
449     unless ($has_element->{title}) { # MUST
450     $self->{onerror}->(node => $todo->{node},
451     type => 'element missing:atom.title');
452     }
453     unless ($has_element->{updated}) { # MUST
454     $self->{onerror}->(node => $todo->{node},
455     type => 'element missing:atom.updated');
456     }
457    
458     return ($new_todos);
459     },
460     };
461    
462     $Element->{$ATOM_NS}->{feed} = {
463     is_root => 1,
464     attrs_checker => $GetAtomAttrsChecker->({}),
465     checker => sub {
466     my ($self, $todo) = @_;
467    
468     my @nodes = (@{$todo->{node}->child_nodes});
469     my $new_todos = [];
470    
471     ## TODO: MUST author+ unless all entry child has author+.
472    
473     my $has_element = {};
474     while (@nodes) {
475     my $node = shift @nodes;
476     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
477    
478     my $nt = $node->node_type;
479     if ($nt == 1) {
480     my $nsuri = $node->namespace_uri;
481     $nsuri = '' unless defined $nsuri;
482     my $not_allowed;
483     if ($nsuri eq $ATOM_NS) {
484     my $ln = $node->manakai_local_name;
485     if ($ln eq 'entry') {
486     $has_element->{entry} = 1;
487     } elsif ({ # MUST (0, 1)
488     generator => 1,
489     icon => 1,
490     id => 1,
491     logo => 1,
492     rights => 1,
493     subtitle => 1,
494     title => 1,
495     updated => 1,
496     }->{$ln}) {
497     unless ($has_element->{$ln}) {
498     $has_element->{$ln} = 1;
499     $not_allowed = $has_element->{entry};
500     } else {
501     $not_allowed = 1;
502     }
503     } elsif ($ln eq 'link') { # MAY
504     ## TODO: SHOULD rel=self
505     ## TODO: MUST NOT rel=alternate with same (type, hreflang)
506     #
507     $not_allowed = $has_element->{entry};
508     } elsif ({ # MAY
509     author => 1,
510     category => 1,
511     contributor => 1,
512     }->{$ln}) {
513     $not_allowed = $has_element->{entry};
514     } else {
515     $not_allowed = 1;
516     }
517     } else {
518     ## TODO: extension element
519     $not_allowed = 1;
520     }
521     $self->{onerror}->(node => $node, type => 'element not allowed')
522     if $not_allowed;
523     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
524     unshift @nodes, @$sib;
525     push @$new_todos, @$ch;
526     } elsif ($nt == 3 or $nt == 4) {
527     ## TODO: Are white spaces allowed?
528     $self->{onerror}->(node => $node, type => 'character not allowed');
529     } elsif ($nt == 5) {
530     unshift @nodes, @{$node->child_nodes};
531     }
532     }
533    
534     ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
535    
536     ## TODO: If entry's with same id, then updated SHOULD be different
537    
538     unless ($has_element->{id}) { # MUST
539     $self->{onerror}->(node => $todo->{node},
540     type => 'element missing:atom.id');
541     }
542     unless ($has_element->{title}) { # MUST
543     $self->{onerror}->(node => $todo->{node},
544     type => 'element missing:atom.title');
545     }
546     unless ($has_element->{updated}) { # MUST
547     $self->{onerror}->(node => $todo->{node},
548     type => 'element missing:atom.updated');
549     }
550    
551     return ($new_todos);
552     },
553     };
554    
555     $Element->{$ATOM_NS}->{content} = {
556     attrs_checker => $GetAtomAttrsChecker->({
557     src => sub { 1 }, # checked in |checker|
558     type => sub { 1 }, # checked in |checker|
559     }),
560     checker => sub {
561     my ($self, $todo) = @_;
562    
563     my $attr = $todo->{node}->get_attribute_node_ns (undef, 'type');
564     my $src_attr = $todo->{node}->get_attribute_node_ns (undef, 'src');
565     my $value;
566     if ($attr) {
567     $value = $attr->value;
568     if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
569     # MUST
570     } else {
571    
572     }
573     # IMT MUST NOT be used
574     } elsif ($src_attr) {
575     $value = '';
576     $self->{onerror}->(node => $todo->{node},
577     type => 'attribute missing:type', level => 's');
578     } else {
579     $value = 'text';
580     }
581    
582     ## TODO: type MUST be text/html/xhtml or MIME media type
583    
584     ## TODO: This implementation is not optimal.
585    
586     if ($src_attr) {
587 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
588     Whatpm::URIChecker->check_iri_reference ($src_attr->value, sub {
589     my %opt = @_;
590     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
591     type => 'URI::'.$opt{type}.
592     (defined $opt{position} ? ':'.$opt{position} : ''));
593     });
594 wakaba 1.1
595     ## NOTE: If @src, the element MUST be empty. What is "empty"?
596     ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
597     ## empty replacement tree shuld be empty, since Atom is defined
598     ## in terms of XML Information Set where entities are expanded.
599     ## (but what if |&e;| is an unexpanded entity?)
600     }
601    
602     if ($value eq 'text') {
603     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
604    
605     my @nodes = (@{$todo->{node}->child_nodes});
606     my $new_todos = [];
607    
608     while (@nodes) {
609     my $node = shift @nodes;
610     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
611    
612     my $nt = $node->node_type;
613     if ($nt == 1) {
614     # MUST NOT
615     $self->{onerror}->(node => $node, type => 'element not allowed');
616     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
617     unshift @nodes, @$sib;
618     push @$new_todos, @$ch;
619     } elsif ($nt == 3 or $nt == 4) {
620     $self->{onerror}->(node => $node, type => 'character not allowed')
621     if $src_attr;
622     } elsif ($nt == 5) {
623     unshift @nodes, @{$node->child_nodes};
624     }
625     }
626    
627     return ($new_todos);
628     } elsif ($value eq 'html') {
629     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
630    
631     my @nodes = (@{$todo->{node}->child_nodes});
632     my $new_todos = [];
633    
634     while (@nodes) {
635     my $node = shift @nodes;
636     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
637    
638     my $nt = $node->node_type;
639     if ($nt == 1) {
640     # MUST NOT
641     $self->{onerror}->(node => $node, type => 'element not allowed');
642     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
643     unshift @nodes, @$sib;
644     push @$new_todos, @$ch;
645     } elsif ($nt == 3 or $nt == 4) {
646     $self->{onerror}->(node => $node, type => 'character not allowed')
647     if $src_attr;
648     } elsif ($nt == 5) {
649     unshift @nodes, @{$node->child_nodes};
650     }
651     }
652    
653     ## TODO: SHOULD be suitable for handling as HTML [HTML4]
654     # markup MUST be escaped
655     ## TODO: HTML SHOULD be valid as if within <div>
656    
657     return ($new_todos);
658     } elsif ($value eq 'xhtml') {
659     $self->{onerror}->(node => $attr, type => 'not IMT') if $src_attr;
660    
661     my @nodes = (@{$todo->{node}->child_nodes});
662     my $new_todos = [];
663    
664     my $has_div;
665     while (@nodes) {
666     my $node = shift @nodes;
667     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
668    
669     my $nt = $node->node_type;
670     if ($nt == 1) {
671     # MUST
672     my $nsuri = $node->namespace_uri;
673     if (defined $nsuri and
674     $nsuri eq q<http://www.w3.org/1999/xhtml> and
675     $node->manakai_local_name eq 'div' and
676     not $has_div) {
677     ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
678     $has_div = 1;
679     $self->{onerror}->(node => $node, type => 'element not allowed')
680     if $src_attr;
681     } else {
682     $self->{onerror}->(node => $node, type => 'element not allowed');
683     }
684     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
685     unshift @nodes, @$sib;
686     push @$new_todos, @$ch;
687     } elsif ($nt == 3 or $nt == 4) {
688     ## TODO: Are white spaces allowed?
689     $self->{onerror}->(node => $node, type => 'character not allowed');
690     } elsif ($nt == 5) {
691     unshift @nodes, @{$node->child_nodes};
692     }
693     }
694    
695     unless ($has_div) {
696     $self->{onerror}->(node => $todo->{node},
697     type => 'element missing:div');
698     }
699    
700     return ($new_todos);
701     } elsif ($value =~ m![+/][Xx][Mm][Ll]\z!) {
702     ## ISSUE: There is no definition for "XML media type" in RFC 3023.
703     ## Is |application/xml-dtd| an XML media type?
704    
705     my @nodes = (@{$todo->{node}->child_nodes});
706     my $new_todos = [];
707    
708     while (@nodes) {
709     my $node = shift @nodes;
710     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
711    
712     my $nt = $node->node_type;
713     if ($nt == 1) {
714     ## MAY contain elements
715     $self->{onerror}->(node => $node, type => 'element not allowed')
716     if $src_attr;
717     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
718     unshift @nodes, @$sib;
719     push @$new_todos, @$ch;
720     } elsif ($nt == 3 or $nt == 4) {
721     ## TODO: Are white spaces allowed?
722     $self->{onerror}->(node => $node, type => 'character not allowed');
723     } elsif ($nt == 5) {
724     unshift @nodes, @{$node->child_nodes};
725     }
726     }
727    
728     ## TODO: SHOULD be suitable for handling as $value.
729     ## If no @src, this would normally mean it contains a
730     ## single child element that would serve as the root element.
731    
732     return ($new_todos);
733     } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
734     my @nodes = (@{$todo->{node}->child_nodes});
735     my $new_todos = [];
736    
737     while (@nodes) {
738     my $node = shift @nodes;
739     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
740    
741     my $nt = $node->node_type;
742     if ($nt == 1) {
743     # MUST NOT
744     $self->{onerror}->(node => $node, type => 'element not allowed');
745     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
746     unshift @nodes, @$sib;
747     push @$new_todos, @$ch;
748     } elsif ($nt == 3 or $nt == 4) {
749     $self->{onerror}->(node => $node, type => 'character not allowed')
750     if $src_attr;
751     } elsif ($nt == 5) {
752     unshift @nodes, @{$node->child_nodes};
753     }
754     }
755    
756     ## NOTE: No further restriction (such as to conform to the type).
757    
758     return ($new_todos);
759     } else {
760     my @nodes = (@{$todo->{node}->child_nodes});
761     my $new_todos = [];
762    
763     if ($value =~ m!^(?>message|multipart)/!i) { # MUST NOT
764     $self->{onerror}->(node => $attr, type => 'IMT:composite');
765     }
766    
767     my $s = '';
768     while (@nodes) {
769     my $node = shift @nodes;
770     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
771    
772     my $nt = $node->node_type;
773     if ($nt == 1) {
774     ## not explicitly disallowed
775     $self->{onerror}->(node => $node, type => 'element not allowed');
776     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
777     unshift @nodes, @$sib;
778     push @$new_todos, @$ch;
779     } elsif ($nt == 3 or $nt == 4) {
780     $s .= $node->data;
781     $self->{onerror}->(node => $node, type => 'character not allowed')
782     if $src_attr;
783     } elsif ($nt == 5) {
784     unshift @nodes, @{$node->child_nodes};
785     }
786     }
787    
788     ## TODO: $s = valid Base64ed [RFC 3548] where
789     ## MAY leading and following "white space" (what?)
790     ## and lines separated by a single U+000A
791     ## SHOULD be suitable for the indicated media type
792    
793     return ($new_todos);
794     }
795     },
796     };
797    
798     $Element->{$ATOM_NS}->{author} = $AtomPersonConstruct;
799    
800     $Element->{$ATOM_NS}->{category} = {
801     attrs_checker => $GetAtomAttrsChecker->({
802     label => sub { 1 }, # no value constraint
803 wakaba 1.2 scheme => sub { # NOTE: No MUST.
804     my ($self, $attr) = @_;
805     ## NOTE: There MUST NOT be any white space.
806     Whatpm::URIChecker->check_iri ($attr->value, sub {
807     my %opt = @_;
808     $self->{onerror}->(node => $attr, level => $opt{level},
809     type => 'URI::'.$opt{type}.
810     (defined $opt{position} ? ':'.$opt{position} : ''));
811     });
812     },
813 wakaba 1.1 term => sub { 1 }, # no value constraint
814     }),
815     checker => sub {
816     my ($self, $todo) = @_;
817    
818     unless ($todo->{node}->has_attribute_ns (undef, 'term')) {
819     $self->{onerror}->(node => $todo->{node},
820     type => 'attribute missing:term');
821     }
822    
823     my @nodes = (@{$todo->{node}->child_nodes});
824     my $new_todos = [];
825    
826     while (@nodes) {
827     my $node = shift @nodes;
828     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
829    
830     my $nt = $node->node_type;
831     if ($nt == 1) {
832     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
833     unshift @nodes, @$sib;
834     push @$new_todos, @$ch;
835     } elsif ($nt == 3 or $nt == 4) {
836     #
837     } elsif ($nt == 5) {
838     unshift @nodes, @{$node->child_nodes};
839     }
840     }
841    
842     return ($new_todos);
843     },
844     };
845    
846     $Element->{$ATOM_NS}->{contributor} = $AtomPersonConstruct;
847    
848     $Element->{$ATOM_NS}->{generator} = {
849     attrs_checker => $GetAtomAttrsChecker->({
850 wakaba 1.2 uri => sub { # MUST
851     my ($self, $attr) = @_;
852     ## NOTE: There MUST NOT be any white space.
853     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
854     my %opt = @_;
855     $self->{onerror}->(node => $attr, level => $opt{level},
856     type => 'URI::'.$opt{type}.
857     (defined $opt{position} ? ':'.$opt{position} : ''));
858     });
859     ## NOTE: Dereferencing SHOULD produce a representation
860     ## that is relevant to the agent.
861     },
862 wakaba 1.1 version => sub { 1 }, # no value constraint
863     }),
864     checker => sub {
865     my ($self, $todo) = @_;
866    
867     my @nodes = (@{$todo->{node}->child_nodes});
868     my $new_todos = [];
869    
870     while (@nodes) {
871     my $node = shift @nodes;
872     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
873    
874     my $nt = $node->node_type;
875     if ($nt == 1) {
876     ## not explicitly disallowed
877     $self->{onerror}->(node => $node, type => 'element not allowed');
878     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
879     unshift @nodes, @$sib;
880     push @$new_todos, @$ch;
881     } elsif ($nt == 3 or $nt == 4) {
882     ## MUST be a string that is a human-readable name for
883     ## the generating agent
884     } elsif ($nt == 5) {
885     unshift @nodes, @{$node->child_nodes};
886     }
887     }
888    
889     return ($new_todos);
890     },
891     };
892    
893     $Element->{$ATOM_NS}->{icon} = {
894     attrs_checker => $GetAtomAttrsChecker->({}),
895     checker => sub {
896     my ($self, $todo) = @_;
897    
898     my @nodes = (@{$todo->{node}->child_nodes});
899     my $new_todos = [];
900    
901 wakaba 1.2 my $s = '';
902 wakaba 1.1 while (@nodes) {
903     my $node = shift @nodes;
904     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
905    
906     my $nt = $node->node_type;
907     if ($nt == 1) {
908     ## not explicitly disallowed
909     $self->{onerror}->(node => $node, type => 'element not allowed');
910     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
911     unshift @nodes, @$sib;
912     push @$new_todos, @$ch;
913     } elsif ($nt == 3 or $nt == 4) {
914 wakaba 1.2 $s .= $node->data;
915 wakaba 1.1 } elsif ($nt == 5) {
916     unshift @nodes, @{$node->child_nodes};
917     }
918     }
919    
920 wakaba 1.2 ## NOTE: No MUST.
921     ## NOTE: There MUST NOT be any white space.
922     Whatpm::URIChecker->check_iri_reference ($s, sub {
923     my %opt = @_;
924     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
925     type => 'URI::'.$opt{type}.
926     (defined $opt{position} ? ':'.$opt{position} : ''));
927     });
928    
929 wakaba 1.1 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
930    
931     return ($new_todos);
932     },
933     };
934    
935     $Element->{$ATOM_NS}->{id} = {
936     attrs_checker => $GetAtomAttrsChecker->({}),
937     checker => sub {
938     my ($self, $todo) = @_;
939    
940     my @nodes = (@{$todo->{node}->child_nodes});
941     my $new_todos = [];
942 wakaba 1.2
943     my $s = '';
944     while (@nodes) {
945     my $node = shift @nodes;
946     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
947    
948     my $nt = $node->node_type;
949     if ($nt == 1) {
950     ## not explicitly disallowed
951     $self->{onerror}->(node => $node, type => 'element not allowed');
952     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
953     unshift @nodes, @$sib;
954     push @$new_todos, @$ch;
955     } elsif ($nt == 3 or $nt == 4) {
956     $s .= $node->data;
957     } elsif ($nt == 5) {
958     unshift @nodes, @{$node->child_nodes};
959     }
960     }
961    
962     ## NOTE: There MUST NOT be any white space.
963     Whatpm::URIChecker->check_iri ($s, sub { # MUST
964     my %opt = @_;
965     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
966     type => 'URI::'.$opt{type}.
967     (defined $opt{position} ? ':'.$opt{position} : ''));
968     });
969     ## TODO: SHOULD be normalized
970    
971     return ($new_todos);
972     },
973     };
974    
975     $Element->{$ATOM_NS}->{link} = {
976     attrs_checker => $GetAtomAttrsChecker->({
977     href => sub {
978     my ($self, $attr) = @_;
979     ## NOTE: There MUST NOT be any white space.
980     Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
981     my %opt = @_;
982     $self->{onerror}->(node => $attr, level => $opt{level},
983     type => 'URI::'.$opt{type}.
984     (defined $opt{position} ? ':'.$opt{position} : ''));
985     });
986     },
987     hreflang => sub { }, # TODO: MUST be an RFC 3066 language tag
988     length => sub { }, # No MUST; in octets.
989     rel => sub { # MUST
990     my ($self, $attr) = @_;
991     my $value = $attr->value;
992     if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
993     $value = q<http://www.iana.org/assignments/relation/> . $value;
994     }
995    
996     ## NOTE: There MUST NOT be any white space.
997     Whatpm::URIChecker->check_iri ($value, sub {
998     my %opt = @_;
999     $self->{onerror}->(node => $attr, level => $opt{level},
1000     type => 'URI::'.$opt{type}.
1001     (defined $opt{position} ? ':'.$opt{position} : ''));
1002     });
1003    
1004     ## TODO: Warn if unregistered
1005     },
1006     type => sub { }, # TODO: MUST be a MIME media type
1007     }),
1008     checker => sub {
1009     my ($self, $todo) = @_;
1010    
1011     unless ($todo->{node}->has_attribute_ns (undef, 'href')) { # MUST
1012     $self->{onerror}->(node => $todo->{node},
1013     type => 'attribute missing:href');
1014     }
1015    
1016     if ($todo->{node}->rel eq
1017     q<http://www.iana.org/assignments/relation/enclosure> and
1018     not $todo->{node}->has_attribute_ns (undef, 'length')) {
1019     $self->{onerror}->(node => $todo->{node}, level => 's',
1020     type => 'attribute missing:length');
1021     }
1022    
1023     my @nodes = (@{$todo->{node}->child_nodes});
1024     my $new_todos = [];
1025 wakaba 1.1
1026     while (@nodes) {
1027     my $node = shift @nodes;
1028     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1029    
1030     my $nt = $node->node_type;
1031     if ($nt == 1) {
1032 wakaba 1.2 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1033     unshift @nodes, @$sib;
1034     push @$new_todos, @$ch;
1035     } elsif ($nt == 3 or $nt == 4) {
1036     #
1037     } elsif ($nt == 5) {
1038     unshift @nodes, @{$node->child_nodes};
1039     }
1040     }
1041    
1042     return ($new_todos);
1043     },
1044     };
1045    
1046     $Element->{$ATOM_NS}->{logo} = {
1047     attrs_checker => $GetAtomAttrsChecker->({}),
1048     checker => sub {
1049     my ($self, $todo) = @_;
1050    
1051     my @nodes = (@{$todo->{node}->child_nodes});
1052     my $new_todos = [];
1053    
1054     my $s = '';
1055     while (@nodes) {
1056     my $node = shift @nodes;
1057     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1058    
1059     my $nt = $node->node_type;
1060     if ($nt == 1) {
1061 wakaba 1.1 ## not explicitly disallowed
1062     $self->{onerror}->(node => $node, type => 'element not allowed');
1063     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1064     unshift @nodes, @$sib;
1065     push @$new_todos, @$ch;
1066     } elsif ($nt == 3 or $nt == 4) {
1067 wakaba 1.2 $s .= $node->data;
1068 wakaba 1.1 } elsif ($nt == 5) {
1069     unshift @nodes, @{$node->child_nodes};
1070     }
1071     }
1072    
1073 wakaba 1.2 ## NOTE: There MUST NOT be any white space.
1074     Whatpm::URIChecker->check_iri_reference ($s, sub {
1075     my %opt = @_;
1076     $self->{onerror}->(node => $todo->{node}, level => $opt{level},
1077     type => 'URI::'.$opt{type}.
1078     (defined $opt{position} ? ':'.$opt{position} : ''));
1079     });
1080    
1081     ## NOTE: Image SHOULD be 2:1
1082    
1083     return ($new_todos);
1084     },
1085     };
1086    
1087     $Element->{$ATOM_NS}->{published} = $AtomDateConstruct;
1088    
1089     $Element->{$ATOM_NS}->{rights} = $AtomDateConstruct;
1090     ## SHOULD NOT be used to convey machine-readable information.
1091    
1092     $Element->{$ATOM_NS}->{source} = {
1093     attrs_checker => $GetAtomAttrsChecker->({}),
1094     checker => sub {
1095     my ($self, $todo) = @_;
1096    
1097     my @nodes = (@{$todo->{node}->child_nodes});
1098     my $new_todos = [];
1099     my $has_element = {};
1100     while (@nodes) {
1101     my $node = shift @nodes;
1102     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
1103    
1104     my $nt = $node->node_type;
1105     if ($nt == 1) {
1106     my $nsuri = $node->namespace_uri;
1107     $nsuri = '' unless defined $nsuri;
1108     my $not_allowed;
1109     if ($nsuri eq $ATOM_NS) {
1110     my $ln = $node->manakai_local_name;
1111     if ($ln eq 'entry') {
1112     $has_element->{entry} = 1;
1113     } elsif ({
1114     generator => 1,
1115     icon => 1,
1116     id => 1,
1117     logo => 1,
1118     rights => 1,
1119     subtitle => 1,
1120     title => 1,
1121     updated => 1,
1122     }->{$ln}) {
1123     unless ($has_element->{$ln}) {
1124     $has_element->{$ln} = 1;
1125     $not_allowed = $has_element->{entry};
1126     } else {
1127     $not_allowed = 1;
1128     }
1129     } elsif ($ln eq 'link') {
1130     ## TODO: MUST NOT rel=alternate with same (type, hreflang)
1131     #
1132     $not_allowed = $has_element->{entry};
1133     } elsif ({
1134     author => 1,
1135     category => 1,
1136     contributor => 1,
1137     }->{$ln}) {
1138     $not_allowed = $has_element->{entry};
1139     } else {
1140     $not_allowed = 1;
1141     }
1142     } else {
1143     ## TODO: extension element
1144     $not_allowed = 1;
1145     }
1146     $self->{onerror}->(node => $node, type => 'element not allowed')
1147     if $not_allowed;
1148     my ($sib, $ch) = $self->_check_get_children ($node, $todo);
1149     unshift @nodes, @$sib;
1150     push @$new_todos, @$ch;
1151     } elsif ($nt == 3 or $nt == 4) {
1152     ## TODO: Are white spaces allowed?
1153     $self->{onerror}->(node => $node, type => 'character not allowed');
1154     } elsif ($nt == 5) {
1155     unshift @nodes, @{$node->child_nodes};
1156     }
1157     }
1158 wakaba 1.1
1159     return ($new_todos);
1160     },
1161     };
1162 wakaba 1.2
1163     $Element->{$ATOM_NS}->{subtitle} = $AtomTextConstruct;
1164    
1165     $Element->{$ATOM_NS}->{summary} = $AtomTextConstruct;
1166    
1167     $Element->{$ATOM_NS}->{title} = $AtomTextConstruct;
1168    
1169     $Element->{$ATOM_NS}->{updated} = $AtomDateConstruct;
1170    
1171     ## TODO: signature element
1172    
1173     ## TODO: simple extension element and structured extension element
1174 wakaba 1.1
1175     $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1176    
1177     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24