/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/Atom.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/Atom.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (show annotations) (download)
Thu Mar 20 08:23:42 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.11: +11 -4 lines
++ whatpm/t/ChangeLog	20 Mar 2008 08:23:06 -0000
	* content-model-1.dat: Some test results were incorrect, again... orz

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Mar 2008 08:23:36 -0000
	* Atom.pm: Bug fix for validation of |feed| on |author| child.

2008-03-20  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::ContentChecker;
2 use strict;
3 require Whatpm::ContentChecker;
4
5 require Whatpm::URIChecker;
6
7 my $ATOM_NS = q<http://www.w3.org/2005/Atom>;
8 my $LINK_REL = q<http://www.iana.org/assignments/relation/>;
9
10 sub FEATURE_RFC4287 () {
11 Whatpm::ContentChecker::FEATURE_STATUS_CR |
12 Whatpm::ContentChecker::FEATURE_ALLOWED
13 }
14
15 ## MUST be well-formed XML (RFC 4287 references XML 1.0 REC 20040204)
16
17 ## NOTE: Commants and PIs are not explicitly allowed.
18
19 our $AttrChecker;
20
21 ## Any element MAY have xml:base, xml:lang
22 my $GetAtomAttrsChecker = sub {
23 my $element_specific_checker = shift;
24 my $element_specific_status = shift;
25 return sub {
26 my ($self, $todo, $element_state) = @_;
27 for my $attr (@{$todo->{node}->attributes}) {
28 my $attr_ns = $attr->namespace_uri;
29 $attr_ns = '' unless defined $attr_ns;
30 my $attr_ln = $attr->manakai_local_name;
31 my $checker;
32 if ($attr_ns eq '') {
33 $checker = $element_specific_checker->{$attr_ln};
34 } else {
35 $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
36 || $AttrChecker->{$attr_ns}->{''};
37 }
38 if ($checker) {
39 $checker->($self, $attr, $todo, $element_state);
40 } elsif ($attr_ln eq '') {
41 #
42 } else {
43 $self->{onerror}->(node => $attr, level => 'unsupported',
44 type => 'attribute');
45 ## ISSUE: No comformance createria for unknown attributes in the spec
46 }
47
48 if ($attr_ns eq '') {
49 $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
50 }
51 ## TODO: global attribute
52 }
53 };
54 }; # $GetAtomAttrsChecker
55
56 my $AtomLanguageTagAttrChecker = sub {
57 ## NOTE: See also $HTMLLanguageTagAttrChecker in HTML.pm.
58
59 my ($self, $attr) = @_;
60 my $value = $attr->value;
61 require Whatpm::LangTag;
62 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
63 my %opt = @_;
64 my $type = 'LangTag:'.$opt{type};
65 $type .= ':' . $opt{subtag} if defined $opt{subtag};
66 $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
67 level => $opt{level});
68 });
69 ## ISSUE: RFC 4646 (3066bis)?
70 }; # $AtomLanguageTagAttrChecker
71
72 my %AtomChecker = (
73 %Whatpm::ContentChecker::AnyChecker,
74 status => FEATURE_RFC4287,
75 check_attrs => $GetAtomAttrsChecker->({}, {}),
76 );
77
78 my %AtomTextConstruct = (
79 %AtomChecker,
80 check_start => sub {
81 my ($self, $item, $element_state) = @_;
82 $element_state->{type} = 'text';
83 $element_state->{value} = '';
84 },
85 check_attrs => $GetAtomAttrsChecker->({
86 type => sub {
87 my ($self, $attr, $item, $element_state) = @_;
88 my $value = $attr->value;
89 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') { # MUST
90 $element_state->{type} = $value;
91 } else {
92 ## NOTE: IMT MUST NOT be used here.
93 $self->{onerror}->(node => $attr, type => 'keyword:invalid');
94 }
95 }, # checked in |checker|
96 }, {
97 type => FEATURE_RFC4287,
98 }),
99 check_child_element => sub {
100 my ($self, $item, $child_el, $child_nsuri, $child_ln,
101 $child_is_transparent, $element_state) = @_;
102 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
103 $self->{onerror}->(node => $child_el,
104 type => 'element not allowed:minus',
105 level => $self->{must_level});
106 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
107 #
108 } else {
109 if ($element_state->{type} eq 'text' or
110 $element_state->{type} eq 'html') { # MUST NOT
111 $self->{onerror}->(node => $child_el,
112 type => 'element not allowed:atom|TextConstruct',
113 level => $self->{must_level});
114 } elsif ($element_state->{type} eq 'xhtml') {
115 if ($child_nsuri eq q<http://www.w3.org/1999/xhtml> and
116 $child_ln eq 'div') { # MUST
117 if ($element_state->{has_div}) {
118 $self->{onerror}
119 ->(node => $child_el,
120 type => 'element not allowed:atom|TextConstruct',
121 level => $self->{must_level});
122 } else {
123 $element_state->{has_div} = 1;
124 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
125 }
126 } else {
127 $self->{onerror}->(node => $child_el,
128 type => 'element not allowed:atom|TextConstruct',
129 level => $self->{must_level});
130 }
131 } else {
132 die "atom:TextConstruct type error: $element_state->{type}";
133 }
134 }
135 },
136 check_child_text => sub {
137 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
138 if ($element_state->{type} eq 'text') {
139 #
140 } elsif ($element_state->{type} eq 'html') {
141 $element_state->{value} .= $child_node->text_content;
142 ## NOTE: Markup MUST be escaped.
143 } elsif ($element_state->{type} eq 'xhtml') {
144 if ($has_significant) {
145 $self->{onerror}->(node => $child_node,
146 type => 'character not allowed:atom|TextConstruct',
147 level => $self->{must_level});
148 }
149 } else {
150 die "atom:TextConstruct type error: $element_state->{type}";
151 }
152 },
153 ## type=html
154 ## TODO: SHOULD be suitable for handling as HTML [HTML4]
155 ## TODO: HTML SHOULD be valid as if within <div>
156 check_end => sub {
157 my ($self, $item, $element_state) = @_;
158 if ($element_state->{type} eq 'xhtml' and
159 not $element_state->{has_div}) {
160 $self->{onerror}->(node => $item->{node},
161 type => 'element missing:div',
162 level => $self->{must_level});
163 }
164
165 $AtomChecker{check_end}->(@_);
166 },
167 ); # %AtomTextConstruct
168
169 my %AtomPersonConstruct = (
170 %AtomChecker,
171 check_child_element => sub {
172 my ($self, $item, $child_el, $child_nsuri, $child_ln,
173 $child_is_transparent, $element_state) = @_;
174 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
175 $self->{onerror}->(node => $child_el,
176 type => 'element not allowed:minus',
177 level => $self->{must_level});
178 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
179 #
180 } elsif ($child_nsuri eq $ATOM_NS) {
181 if ($child_ln eq 'name') {
182 if ($element_state->{has_name}) {
183 $self->{onerror}
184 ->(node => $child_el,
185 type => 'element not allowed:atom|PersonConstruct',
186 level => $self->{must_level});
187 } else {
188 $element_state->{has_name} = 1;
189 }
190 } elsif ($child_ln eq 'uri') {
191 if ($element_state->{has_uri}) {
192 $self->{onerror}
193 ->(node => $child_el,
194 type => 'element not allowed:atom|PersonConstruct',
195 level => $self->{must_level});
196 } else {
197 $element_state->{has_uri} = 1;
198 }
199 } elsif ($child_ln eq 'email') {
200 if ($element_state->{has_email}) {
201 $self->{onerror}
202 ->(node => $child_el,
203 type => 'element not allowed:atom|PersonConstruct',
204 level => $self->{must_level});
205 } else {
206 $element_state->{has_email} = 1;
207 }
208 } else {
209 $self->{onerror}
210 ->(node => $child_el,
211 type => 'element not allowed:atom|PersonConstruct',
212 level => $self->{must_level});
213 }
214 } else {
215 $self->{onerror}
216 ->(node => $child_el,
217 type => 'element not allowed:atom|PersonConstruct',
218 level => $self->{must_level});
219 }
220 ## TODO: extension element
221 },
222 check_child_text => sub {
223 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
224 if ($has_significant) {
225 $self->{onerror}->(node => $child_node,
226 type => 'character not allowed:atom|PersonConstruct',
227 level => $self->{must_level});
228 }
229 },
230 check_end => sub {
231 my ($self, $item, $element_state) = @_;
232
233 unless ($element_state->{has_name}) {
234 $self->{onerror}->(node => $item->{node},
235 type => 'element missing:atom|name',
236 level => $self->{must_level});
237 }
238
239 $AtomChecker{check_end}->(@_);
240 },
241 ); # %AtomPersonConstruct
242
243 our $Element;
244
245 $Element->{$ATOM_NS}->{''} = {
246 %AtomChecker,
247 status => 0,
248 };
249
250 $Element->{$ATOM_NS}->{name} = {
251 %AtomChecker,
252
253 ## NOTE: Strictly speaking, structure and semantics for atom:name
254 ## element outside of Person construct is not defined.
255
256 ## NOTE: No constraint.
257 };
258
259 $Element->{$ATOM_NS}->{uri} = {
260 %AtomChecker,
261
262 ## NOTE: Strictly speaking, structure and semantics for atom:uri
263 ## element outside of Person construct is not defined.
264
265 ## NOTE: Elements are not explicitly disallowed.
266
267 check_start => sub {
268 my ($self, $item, $element_state) = @_;
269 $element_state->{value} = '';
270 },
271 check_child_text => sub {
272 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
273 $element_state->{value} .= $child_node->data;
274 },
275 check_end => sub {
276 my ($self, $item, $element_state) = @_;
277
278 ## NOTE: There MUST NOT be any white space.
279 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
280 my %opt = @_;
281 $self->{onerror}->(node => $item->{node}, level => $opt{level},
282 type => 'URI::'.$opt{type}.
283 (defined $opt{position} ? ':'.$opt{position} : ''));
284 });
285
286 $AtomChecker{check_end}->(@_);
287 },
288 };
289
290 $Element->{$ATOM_NS}->{email} = {
291 %AtomChecker,
292
293 ## NOTE: Strictly speaking, structure and semantics for atom:email
294 ## element outside of Person construct is not defined.
295
296 ## NOTE: Elements are not explicitly disallowed.
297
298 check_end => sub {
299 my ($self, $item, $element_state) = @_;
300
301 ## TODO: addr-spec
302 $self->{onerror}->(node => $item->{node},
303 type => 'addr-spec not supported',
304 level => $self->{unsupported_level});
305
306 $AtomChecker{check_end}->(@_);
307 },
308 };
309
310 ## MUST NOT be any white space
311 my %AtomDateConstruct = (
312 %AtomChecker,
313
314 ## NOTE: It does not explicitly say that there MUST NOT be any element.
315
316 check_start => sub {
317 my ($self, $item, $element_state) = @_;
318 $element_state->{value} = '';
319 },
320 check_child_text => sub {
321 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
322 $element_state->{value} .= $child_node->data;
323 },
324 check_end => sub {
325 my ($self, $item, $element_state) = @_;
326
327 ## MUST: RFC 3339 |date-time| with uppercase |T| and |Z|
328 if ($element_state->{value} =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?>\.[0-9]+)?(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
329 my ($y, $M, $d, $h, $m, $s, $zh, $zm)
330 = ($1, $2, $3, $4, $5, $6, $7, $8);
331 my $node = $item->{node};
332
333 ## Check additional constraints described or referenced in
334 ## comments of ABNF rules for |date-time|.
335 my $level = $self->{must_level};
336 if (0 < $M and $M < 13) {
337 $self->{onerror}->(node => $node, type => 'datetime:bad day',
338 level => $level)
339 if $d < 1 or
340 $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
341 $self->{onerror}->(node => $node, type => 'datetime:bad day',
342 level => $level)
343 if $M == 2 and $d == 29 and
344 not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
345 } else {
346 $self->{onerror}->(node => $node, type => 'datetime:bad month',
347 level => $level);
348 }
349 $self->{onerror}->(node => $node, type => 'datetime:bad hour',
350 level => $level) if $h > 23;
351 $self->{onerror}->(node => $node, type => 'datetime:bad minute',
352 level => $level) if $m > 59;
353 $self->{onerror}->(node => $node, type => 'datetime:bad second',
354 level => $level)
355 if $s > 60; ## NOTE: Validness of leap seconds are not checked.
356 $self->{onerror}->(node => $node, type => 'datetime:bad timezone hour',
357 level => $level) if $zh > 23;
358 $self->{onerror}->(node => $node, type => 'datetime:bad timezone minute',
359 level => $level) if $zm > 59;
360 } else {
361 $self->{onerror}->(node => $item->{node},
362 type => 'datetime:syntax error',
363 level => $self->{must_level});
364 }
365 ## NOTE: SHOULD be accurate as possible (cannot be checked)
366
367 $AtomChecker{check_end}->(@_);
368 },
369 ); # %AtomDateConstruct
370
371 $Element->{$ATOM_NS}->{entry} = {
372 %AtomChecker,
373 is_root => 1,
374 check_child_element => sub {
375 my ($self, $item, $child_el, $child_nsuri, $child_ln,
376 $child_is_transparent, $element_state) = @_;
377
378 ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
379
380 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
381 $self->{onerror}->(node => $child_el,
382 type => 'element not allowed:minus',
383 level => $self->{must_level});
384 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
385 #
386 } elsif ($child_nsuri eq $ATOM_NS) {
387 my $not_allowed;
388 if ({ # MUST (0, 1)
389 content => 1,
390 id => 1,
391 published => 1,
392 rights => 1,
393 source => 1,
394 summary => 1,
395 ## TODO: MUST if child::content/@src | child::content/@type = IMT, !text/ !/xml !+xml
396 title => 1,
397 updated => 1,
398 }->{$child_ln}) {
399 unless ($element_state->{has_element}->{$child_ln}) {
400 $element_state->{has_element}->{$child_ln} = 1;
401 $not_allowed = $element_state->{has_element}->{entry};
402 } else {
403 $not_allowed = 1;
404 }
405 } elsif ($child_ln eq 'link') { # MAY
406 if ($child_el->rel eq $LINK_REL . 'alternate') {
407 my $type = $child_el->get_attribute_ns (undef, 'type');
408 $type = '' unless defined $type;
409 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
410 $hreflang = '' unless defined $hreflang;
411 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
412 (defined $hreflang ? ':'.$hreflang : '');
413 unless ($element_state->{has_element}->{$key}) {
414 $element_state->{has_element}->{$key} = 1;
415 $element_state->{has_element}->{'link.alternate'} = 1;
416 } else {
417 $not_allowed = 1;
418 }
419 }
420
421 ## NOTE: MAY
422 $not_allowed ||= $element_state->{has_element}->{entry};
423 } elsif ({ # MAY
424 category => 1,
425 contributor => 1,
426 }->{$child_ln}) {
427 $not_allowed = $element_state->{has_element}->{entry};
428 } elsif ($child_ln eq 'author') { # MAY
429 $not_allowed = $element_state->{has_element}->{entry};
430 $element_state->{has_author} = 1; # ./author | ./source/author
431 $element_state->{has_element}->{$child_ln} = 1; # ./author
432 } else {
433 $not_allowed = 1;
434 }
435 if ($not_allowed) {
436 $self->{onerror}->(node => $child_el, type => 'element not allowed');
437 }
438 } else {
439 ## TODO: extension element
440 $self->{onerror}->(node => $child_el, type => 'element not allowed');
441 }
442 },
443 check_child_text => sub {
444 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
445 if ($has_significant) {
446 $self->{onerror}->(node => $child_node, type => 'character not allowed',
447 level => $self->{must_level});
448 }
449 },
450 check_end => sub {
451 my ($self, $item, $element_state) = @_;
452
453 if ($element_state->{has_author}) {
454 ## NOTE: There is either a child atom:author element
455 ## or a child atom:source element which contains an atom:author
456 ## child element.
457 #
458 } else {
459 A: {
460 my $root = $item->{node}->owner_document->document_element;
461 if ($root and $root->manakai_local_name eq 'feed') {
462 my $nsuri = $root->namespace_uri;
463 if (defined $nsuri and $nsuri eq $ATOM_NS) {
464 ## NOTE: An Atom Feed Document.
465 for my $root_child (@{$root->child_nodes}) {
466 ## NOTE: Entity references are not supported.
467 next unless $root_child->node_type == 1; # ELEMENT_NODE
468 next unless $root_child->manakai_local_name eq 'author';
469 my $root_child_nsuri = $root_child->namespace_uri;
470 next unless defined $root_child_nsuri;
471 next unless $root_child_nsuri eq $ATOM_NS;
472 last A;
473 }
474 }
475 }
476
477 $self->{onerror}->(node => $item->{node},
478 type => 'element missing:atom|author',
479 level => $self->{must_level});
480 } # A
481 }
482
483 unless ($element_state->{has_element}->{author}) {
484 $item->{parent_state}->{has_no_author_entry} = 1; # for atom:feed's check
485 }
486
487 ## TODO: If entry's with same id, then updated SHOULD be different
488
489 unless ($element_state->{has_element}->{id}) { # MUST
490 $self->{onerror}->(node => $item->{node},
491 type => 'element missing:atom|id');
492 }
493 unless ($element_state->{has_element}->{title}) { # MUST
494 $self->{onerror}->(node => $item->{node},
495 type => 'element missing:atom|title');
496 }
497 unless ($element_state->{has_element}->{updated}) { # MUST
498 $self->{onerror}->(node => $item->{node},
499 type => 'element missing:atom|updated');
500 }
501 if (not $element_state->{has_element}->{content} and
502 not $element_state->{has_element}->{'link.alternate'}) {
503 $self->{onerror}->(node => $item->{node},
504 type => 'element missing:atom|link|alternate');
505 }
506 },
507 };
508
509 $Element->{$ATOM_NS}->{feed} = {
510 %AtomChecker,
511 is_root => 1,
512 check_child_element => sub {
513 my ($self, $item, $child_el, $child_nsuri, $child_ln,
514 $child_is_transparent, $element_state) = @_;
515
516 ## NOTE: metadata elements, followed by atom:entry* (no explicit MAY)
517
518 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
519 $self->{onerror}->(node => $child_el,
520 type => 'element not allowed:minus',
521 level => $self->{must_level});
522 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
523 #
524 } elsif ($child_nsuri eq $ATOM_NS) {
525 my $not_allowed;
526 if ($child_ln eq 'entry') {
527 $element_state->{has_element}->{entry} = 1;
528 } elsif ({ # MUST (0, 1)
529 generator => 1,
530 icon => 1,
531 id => 1,
532 logo => 1,
533 rights => 1,
534 subtitle => 1,
535 title => 1,
536 updated => 1,
537 }->{$child_ln}) {
538 unless ($element_state->{has_element}->{$child_ln}) {
539 $element_state->{has_element}->{$child_ln} = 1;
540 $not_allowed = $element_state->{has_element}->{entry};
541 } else {
542 $not_allowed = 1;
543 }
544 } elsif ($child_ln eq 'link') {
545 my $rel = $child_el->rel;
546 if ($rel eq $LINK_REL . 'alternate') {
547 my $type = $child_el->get_attribute_ns (undef, 'type');
548 $type = '' unless defined $type;
549 my $hreflang = $child_el->get_attribute_ns (undef, 'hreflang');
550 $hreflang = '' unless defined $hreflang;
551 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
552 (defined $hreflang ? ':'.$hreflang : '');
553 unless ($element_state->{has_element}->{$key}) {
554 $element_state->{has_element}->{$key} = 1;
555 } else {
556 $not_allowed = 1;
557 }
558 } elsif ($rel eq $LINK_REL . 'self') {
559 $element_state->{has_element}->{'link.self'} = 1;
560 }
561
562 ## NOTE: MAY
563 $not_allowed = $element_state->{has_element}->{entry};
564 } elsif ({ # MAY
565 category => 1,
566 contributor => 1,
567 }->{$child_ln}) {
568 $not_allowed = $element_state->{has_element}->{entry};
569 } elsif ($child_ln eq 'author') { # MAY
570 $not_allowed = $element_state->{has_element}->{entry};
571 $element_state->{has_element}->{author} = 1;
572 } else {
573 $not_allowed = 1;
574 }
575 $self->{onerror}->(node => $child_el, type => 'element not allowed')
576 if $not_allowed;
577 } else {
578 ## TODO: extension element
579 $self->{onerror}->(node => $child_el, type => 'element not allowed');
580 }
581 },
582 check_child_text => sub {
583 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
584 if ($has_significant) {
585 $self->{onerror}->(node => $child_node, type => 'character not allowed',
586 level => $self->{must_level});
587 }
588 },
589 check_end => sub {
590 my ($self, $item, $element_state) = @_;
591
592 if ($element_state->{has_no_author_entry} and
593 not $element_state->{has_element}->{author}) {
594 $self->{onerror}->(node => $item->{node},
595 type => 'element missing:atom|author',
596 level => $self->{must_level});
597 ## ISSUE: If there is no |atom:entry| element,
598 ## there should be an |atom:author| element?
599 }
600
601 ## TODO: If entry's with same id, then updated SHOULD be different
602
603 unless ($element_state->{has_element}->{id}) { # MUST
604 $self->{onerror}->(node => $item->{node},
605 type => 'element missing:atom|id');
606 }
607 unless ($element_state->{has_element}->{title}) { # MUST
608 $self->{onerror}->(node => $item->{node},
609 type => 'element missing:atom|title');
610 }
611 unless ($element_state->{has_element}->{updated}) { # MUST
612 $self->{onerror}->(node => $item->{node},
613 type => 'element missing:atom|updated');
614 }
615 unless ($element_state->{has_element}->{'link.self'}) {
616 $self->{onerror}->(node => $item->{node}, level => 's',
617 type => 'element missing:atom|link|self');
618 }
619
620 $AtomChecker{check_end}->(@_);
621 },
622 };
623
624 $Element->{$ATOM_NS}->{content} = {
625 %AtomChecker,
626 check_start => sub {
627 my ($self, $item, $element_state) = @_;
628 $element_state->{type} = 'text';
629 $element_state->{text} = '';
630 },
631 check_attrs => $GetAtomAttrsChecker->({
632 src => sub {
633 my ($self, $attr, $item, $element_state) = @_;
634
635 $element_state->{has_src} = 1;
636
637 ## NOTE: There MUST NOT be any white space.
638 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
639 my %opt = @_;
640 $self->{onerror}->(node => $item->{node}, level => $opt{level},
641 type => 'URI::'.$opt{type}.
642 (defined $opt{position} ? ':'.$opt{position} : ''));
643 });
644 },
645 type => sub {
646 my ($self, $attr, $item, $element_state) = @_;
647
648 $element_state->{has_type} = 1;
649
650 my $value = $attr->value;
651 if ($value eq 'text' or $value eq 'html' or $value eq 'xhtml') {
652 # MUST
653 } else {
654 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
655 my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
656 my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
657 my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
658 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
659 my @type = ($1, $2);
660 my $param = $3;
661 while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
662 if (defined $2) {
663 push @type, $1 => $2;
664 } else {
665 my $n = $1;
666 my $v = $2;
667 $v =~ s/\\(.)/$1/gs;
668 push @type, $n => $v;
669 }
670 }
671 require Whatpm::IMTChecker;
672 Whatpm::IMTChecker->check_imt (sub {
673 my %opt = @_;
674 $self->{onerror}->(node => $attr, level => $opt{level},
675 type => 'IMT:'.$opt{type});
676 }, @type);
677 } else {
678 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
679 level => $self->{must_level});
680 }
681 }
682
683 if ($value =~ m![+/][Xx][Mm][Ll]\z!) {
684 ## ISSUE: There is no definition for "XML media type" in RFC 3023.
685 ## Is |application/xml-dtd| an XML media type?
686 $value = 'xml';
687 } elsif ($value =~ m!^[Tt][Ee][Xx][Tt]/!) {
688 $value = 'mime_text';
689 } elsif ($value =~ m!^(?>message|multipart)/!i) {
690 $self->{onerror}->(node => $attr, type => 'IMT:composite',
691 level => $self->{must_level});
692 }
693
694 $element_state->{type} = $value;
695 },
696 }, {
697 src => FEATURE_RFC4287,
698 type => FEATURE_RFC4287,
699 }),
700 check_child_element => sub {
701 my ($self, $item, $child_el, $child_nsuri, $child_ln,
702 $child_is_transparent, $element_state) = @_;
703
704 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
705 $self->{onerror}->(node => $child_el,
706 type => 'element not allowed:minus',
707 level => $self->{must_level});
708 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
709 #
710 } else {
711 if ($element_state->{type} eq 'text' or
712 $element_state->{type} eq 'html' or
713 $element_state->{type} eq 'mime_text') {
714 # MUST NOT
715 $self->{onerror}->(node => $child_el,
716 type => 'element not allowed:atom|content',
717 level => $self->{must_level});
718 } elsif ($element_state->{type} eq 'xhtml') {
719 if ($element_state->{has_div}) {
720 $self->{onerror}->(node => $child_el,
721 type => 'element not allowed:atom|content',
722 level => $self->{must_level});
723 } else {
724 ## TODO: SHOULD be suitable for handling as HTML [XHTML10]
725 $element_state->{has_div} = 1;
726 }
727 } elsif ($element_state->{type} eq 'xml') {
728 ## MAY contain elements
729 if ($element_state->{has_src}) {
730 $self->{onerror}->(node => $child_el,
731 type => 'element not allowed:atom|content',
732 level => $self->{must_level});
733 }
734 } else {
735 ## NOTE: Elements are not explicitly disallowed.
736 }
737 }
738 },
739 ## NOTE: If @src, the element MUST be empty. What is "empty"?
740 ## Is |<e><!----></e>| empty? |<e>&e;</e>| where |&e;| has
741 ## empty replacement tree shuld be empty, since Atom is defined
742 ## in terms of XML Information Set where entities are expanded.
743 ## (but what if |&e;| is an unexpanded entity?)
744 check_child_text => sub {
745 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
746 if ($has_significant) {
747 if ($element_state->{has_src}) {
748 $self->{onerror}->(node => $child_node,
749 type => 'character not allowed',
750 level => $self->{must_level});
751 } elsif ($element_state->{type} eq 'xhtml' or
752 $element_state->{type} eq 'xml') {
753 $self->{onerror}->(node => $child_node,
754 type => 'character not allowed:atom|content',
755 level => $self->{must_level});
756 }
757 }
758
759 $element_state->{text} .= $child_node->data;
760
761 ## NOTE: type=text/* has no further restriction (i.e. the content don't
762 ## have to conform to the definition of the type).
763 },
764 check_end => sub {
765 my ($self, $item, $element_state) = @_;
766
767 if ($element_state->{has_src}) {
768 if (not $element_state->{has_type}) {
769 $self->{onerror}->(node => $item->{node},
770 type => 'attribute missing:type',
771 level => $self->{should_level});
772 }
773 if ($element_state->{type} eq 'text' or
774 $element_state->{type} eq 'html' or
775 $element_state->{type} eq 'xhtml') {
776 $self->{onerror}
777 ->(node => $item->{node}->get_attribute_node_ns (undef, 'type'),
778 type => 'not IMT', level => $self->{must_level});
779 }
780 }
781
782 if ($element_state->{type} eq 'xhtml') {
783 unless ($element_state->{has_div}) {
784 $self->{onerror}->(node => $item->{node},
785 type => 'element missing:div',
786 level => $self->{must_level});
787 }
788 } elsif ($element_state->{type} eq 'html') {
789 ## TODO: SHOULD be suitable for handling as HTML [HTML4]
790 # markup MUST be escaped
791 $self->{onsubdoc}->({s => $element_state->{text},
792 container_node => $item->{node},
793 media_type => 'text/html',
794 inner_html_element => 'div',
795 is_char_string => 1});
796 } elsif ($element_state->{type} eq 'xml') {
797 ## NOTE: SHOULD be suitable for handling as $value.
798 ## If no @src, this would normally mean it contains a
799 ## single child element that would serve as the root element.
800 $self->{onerror}->(node => $item->{node},
801 level => $self->{unsupported_level},
802 type => 'atom|content not supported',
803 value => $item->{node}->get_attribute_ns
804 (undef, 'type'));
805 } elsif ($element_state->{type} eq 'text' or
806 $element_state->{type} eq 'mime-text') {
807 #
808 } else {
809 ## TODO: $s = valid Base64ed [RFC 3548] where
810 ## MAY leading and following "white space" (what?)
811 ## and lines separated by a single U+000A
812
813 ## NOTE: SHOULD be suitable for the indicated media type.
814 $self->{onerror}->(node => $item->{node},
815 level => $self->{unsupported_level},
816 type => 'atom|content not supported',
817 value => $item->{node}->get_attribute_ns
818 (undef, 'type'));
819 }
820
821 $AtomChecker{check_end}->(@_);
822 },
823 };
824 ## TODO: Tests for <html:nest/> in <atom:content/>
825
826 $Element->{$ATOM_NS}->{author} = \%AtomPersonConstruct;
827
828 $Element->{$ATOM_NS}->{category} = {
829 %AtomChecker,
830 check_attrs => $GetAtomAttrsChecker->({
831 label => sub { 1 }, # no value constraint
832 scheme => sub { # NOTE: No MUST.
833 my ($self, $attr) = @_;
834 ## NOTE: There MUST NOT be any white space.
835 Whatpm::URIChecker->check_iri ($attr->value, sub {
836 my %opt = @_;
837 $self->{onerror}->(node => $attr, level => $opt{level},
838 type => 'URI::'.$opt{type}.
839 (defined $opt{position} ? ':'.$opt{position} : ''));
840 });
841 },
842 term => sub {
843 my ($self, $attr, $item, $element_state) = @_;
844
845 ## NOTE: No value constraint.
846
847 $element_state->{has_term} = 1;
848 },
849 }, {
850 label => FEATURE_RFC4287,
851 scheme => FEATURE_RFC4287,
852 term => FEATURE_RFC4287,
853 }),
854 check_end => sub {
855 my ($self, $item, $element_state) = @_;
856 unless ($element_state->{has_term}) {
857 $self->{onerror}->(node => $item->{node},
858 type => 'attribute missing:term');
859 }
860
861 $AtomChecker{check_end}->(@_);
862 },
863 ## NOTE: Meaning of content is not defined.
864 };
865
866 $Element->{$ATOM_NS}->{contributor} = \%AtomPersonConstruct;
867
868 ## TODO: Anything below does not support <html:nest/> yet.
869
870 $Element->{$ATOM_NS}->{generator} = {
871 %AtomChecker,
872 check_attrs => $GetAtomAttrsChecker->({
873 uri => sub { # MUST
874 my ($self, $attr) = @_;
875 ## NOTE: There MUST NOT be any white space.
876 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
877 my %opt = @_;
878 $self->{onerror}->(node => $attr, level => $opt{level},
879 type => 'URI::'.$opt{type}.
880 (defined $opt{position} ? ':'.$opt{position} : ''));
881 });
882 ## NOTE: Dereferencing SHOULD produce a representation
883 ## that is relevant to the agent.
884 },
885 version => sub { 1 }, # no value constraint
886 }, {
887 uri => FEATURE_RFC4287,
888 version => FEATURE_RFC4287,
889 }),
890
891 ## NOTE: Elements are not explicitly disallowed.
892
893 ## NOTE: Content MUST be a string that is a human-readable name for
894 ## the generating agent.
895 };
896
897 $Element->{$ATOM_NS}->{icon} = {
898 %AtomChecker,
899 check_start => sub {
900 my ($self, $item, $element_state) = @_;
901 $element_state->{value} = '';
902 },
903 ## NOTE: Elements are not explicitly disallowed.
904 check_child_text => sub {
905 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
906 $element_state->{value} .= $child_node->data;
907 },
908 check_end => sub {
909 my ($self, $item, $element_state) = @_;
910
911 ## NOTE: No MUST.
912 ## NOTE: There MUST NOT be any white space.
913 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
914 my %opt = @_;
915 $self->{onerror}->(node => $item->{node}, level => $opt{level},
916 type => 'URI::'.$opt{type}.
917 (defined $opt{position} ? ':'.$opt{position} : ''));
918 });
919
920 ## NOTE: Image SHOULD be 1:1 and SHOULD be small
921
922 $AtomChecker{check_end}->(@_);
923 },
924 };
925
926 $Element->{$ATOM_NS}->{id} = {
927 %AtomChecker,
928 check_start => sub {
929 my ($self, $item, $element_state) = @_;
930 $element_state->{value} = '';
931 },
932 ## NOTE: Elements are not explicitly disallowed.
933 check_child_text => sub {
934 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
935 $element_state->{value} .= $child_node->data;
936 },
937 check_end => sub {
938 my ($self, $item, $element_state) = @_;
939
940 ## NOTE: There MUST NOT be any white space.
941 Whatpm::URIChecker->check_iri ($element_state->{value}, sub {
942 my %opt = @_;
943 $self->{onerror}->(node => $item->{node}, level => $opt{level},
944 type => 'URI::'.$opt{type}.
945 (defined $opt{position} ? ':'.$opt{position} : ''));
946 });
947 ## TODO: SHOULD be normalized
948
949 $AtomChecker{check_end}->(@_);
950 },
951 };
952
953 $Element->{$ATOM_NS}->{link} = {
954 %AtomChecker,
955 check_attrs => $GetAtomAttrsChecker->({
956 href => sub {
957 my ($self, $attr) = @_;
958 ## NOTE: There MUST NOT be any white space.
959 Whatpm::URIChecker->check_iri_reference ($attr->value, sub {
960 my %opt = @_;
961 $self->{onerror}->(node => $attr, level => $opt{level},
962 type => 'URI::'.$opt{type}.
963 (defined $opt{position} ? ':'.$opt{position} : ''));
964 });
965 },
966 hreflang => $AtomLanguageTagAttrChecker,
967 length => sub { }, # No MUST; in octets.
968 rel => sub { # MUST
969 my ($self, $attr) = @_;
970 my $value = $attr->value;
971 if ($value =~ /\A(?>[0-9A-Za-z._~!\$&'()*+,;=\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}-]|%[0-9A-Fa-f][0-9A-Fa-f]|\@)+\z/) {
972 $value = $LINK_REL . $value;
973 }
974
975 ## NOTE: There MUST NOT be any white space.
976 Whatpm::URIChecker->check_iri ($value, sub {
977 my %opt = @_;
978 $self->{onerror}->(node => $attr, level => $opt{level},
979 type => 'URI::'.$opt{type}.
980 (defined $opt{position} ? ':'.$opt{position} : ''));
981 });
982
983 ## TODO: Warn if unregistered
984 },
985 title => sub { }, # No MUST
986 type => sub {
987 ## NOTE: MUST be a MIME media type. What is "MIME media type"?
988 my ($self, $attr) = @_;
989 my $value = $attr->value;
990 my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
991 my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
992 my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
993 if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
994 my @type = ($1, $2);
995 my $param = $3;
996 while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
997 if (defined $2) {
998 push @type, $1 => $2;
999 } else {
1000 my $n = $1;
1001 my $v = $2;
1002 $v =~ s/\\(.)/$1/gs;
1003 push @type, $n => $v;
1004 }
1005 }
1006 require Whatpm::IMTChecker;
1007 Whatpm::IMTChecker->check_imt (sub {
1008 my %opt = @_;
1009 $self->{onerror}->(node => $attr, level => $opt{level},
1010 type => 'IMT:'.$opt{type});
1011 }, @type);
1012 } else {
1013 $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
1014 }
1015 },
1016 }, {
1017 href => FEATURE_RFC4287,
1018 hreflang => FEATURE_RFC4287,
1019 length => FEATURE_RFC4287,
1020 rel => FEATURE_RFC4287,
1021 title => FEATURE_RFC4287,
1022 type => FEATURE_RFC4287,
1023 }),
1024 check_start => sub {
1025 my ($self, $item, $element_state) = @_;
1026
1027 unless ($item->{node}->has_attribute_ns (undef, 'href')) { # MUST
1028 $self->{onerror}->(node => $item->{node},
1029 type => 'attribute missing:href');
1030 }
1031
1032 if ($item->{node}->rel eq $LINK_REL . 'enclosure' and
1033 not $item->{node}->has_attribute_ns (undef, 'length')) {
1034 $self->{onerror}->(node => $item->{node}, level => 's',
1035 type => 'attribute missing:length');
1036 }
1037 },
1038 };
1039
1040 $Element->{$ATOM_NS}->{logo} = {
1041 %AtomChecker,
1042 ## NOTE: Child elements are not explicitly disallowed
1043 check_start => sub {
1044 my ($self, $item, $element_state) = @_;
1045 $element_state->{value} = '';
1046 },
1047 ## NOTE: Elements are not explicitly disallowed.
1048 check_child_text => sub {
1049 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1050 $element_state->{value} .= $child_node->data;
1051 },
1052 check_end => sub {
1053 my ($self, $item, $element_state) = @_;
1054
1055 ## NOTE: There MUST NOT be any white space.
1056 Whatpm::URIChecker->check_iri_reference ($element_state->{value}, sub {
1057 my %opt = @_;
1058 $self->{onerror}->(node => $item->{node}, level => $opt{level},
1059 type => 'URI::'.$opt{type}.
1060 (defined $opt{position} ? ':'.$opt{position} : ''));
1061 });
1062
1063 ## NOTE: Image SHOULD be 2:1
1064
1065 $AtomChecker{check_end}->(@_);
1066 },
1067 };
1068
1069 $Element->{$ATOM_NS}->{published} = \%AtomDateConstruct;
1070
1071 $Element->{$ATOM_NS}->{rights} = \%AtomDateConstruct;
1072 ## NOTE: SHOULD NOT be used to convey machine-readable information.
1073
1074 $Element->{$ATOM_NS}->{source} = {
1075 %AtomChecker,
1076 check_child_element => sub {
1077 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1078 $child_is_transparent, $element_state) = @_;
1079
1080 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1081 $self->{onerror}->(node => $child_el,
1082 type => 'element not allowed:minus',
1083 level => $self->{must_level});
1084 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1085 #
1086 } elsif ($child_nsuri eq $ATOM_NS) {
1087 my $not_allowed;
1088 if ($child_ln eq 'entry') {
1089 $element_state->{has_element}->{entry} = 1;
1090 } elsif ({
1091 generator => 1,
1092 icon => 1,
1093 id => 1,
1094 logo => 1,
1095 rights => 1,
1096 subtitle => 1,
1097 title => 1,
1098 updated => 1,
1099 }->{$child_ln}) {
1100 unless ($element_state->{has_element}->{$child_ln}) {
1101 $element_state->{has_element}->{$child_ln} = 1;
1102 $not_allowed = $element_state->{has_element}->{entry};
1103 } else {
1104 $not_allowed = 1;
1105 }
1106 } elsif ($child_ln eq 'link') {
1107 if ($child_ln->rel eq $LINK_REL . 'alternate') {
1108 my $type = $child_ln->get_attribute_ns (undef, 'type');
1109 $type = '' unless defined $type;
1110 my $hreflang = $child_ln->get_attribute_ns (undef, 'hreflang');
1111 $hreflang = '' unless defined $hreflang;
1112 my $key = 'link:'.(defined $type ? ':'.$type : '').':'.
1113 (defined $hreflang ? ':'.$hreflang : '');
1114 unless ($element_state->{has_element}->{$key}) {
1115 $element_state->{has_element}->{$key} = 1;
1116 } else {
1117 $not_allowed = 1;
1118 }
1119 }
1120 $not_allowed ||= $element_state->{has_element}->{entry};
1121 } elsif ({
1122 category => 1,
1123 contributor => 1,
1124 }->{$child_ln}) {
1125 $not_allowed = $element_state->{has_element}->{entry};
1126 } elsif ($child_ln eq 'author') {
1127 $not_allowed = $element_state->{has_element}->{entry};
1128 $item->{parent_state}->{has_author} = 1; # parent::atom:entry's flag
1129 } else {
1130 $not_allowed = 1;
1131 }
1132 if ($not_allowed) {
1133 $self->{onerror}->(node => $child_el, type => 'element not allowed');
1134 }
1135 } else {
1136 ## TODO: extension element
1137 $self->{onerror}->(node => $child_el, type => 'element not allowed');
1138 }
1139 },
1140 check_child_text => sub {
1141 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1142 if ($has_significant) {
1143 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1144 level => $self->{must_level});
1145 }
1146 },
1147 };
1148
1149 $Element->{$ATOM_NS}->{subtitle} = \%AtomTextConstruct;
1150
1151 $Element->{$ATOM_NS}->{summary} = \%AtomTextConstruct;
1152
1153 $Element->{$ATOM_NS}->{title} = \%AtomTextConstruct;
1154
1155 $Element->{$ATOM_NS}->{updated} = \%AtomDateConstruct;
1156
1157 ## TODO: signature element
1158
1159 ## TODO: simple extension element and structured extension element
1160
1161 $Whatpm::ContentChecker::Namespace->{$ATOM_NS}->{loaded} = 1;
1162
1163 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24