/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.97 - (show annotations) (download)
Sun Sep 21 09:45:02 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.96: +4 -2 lines
++ whatpm/t/ChangeLog	21 Sep 2008 09:32:48 -0000
2008-09-21  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: Test data for |form| |name| are added.

++ whatpm/Whatpm/ChangeLog	21 Sep 2008 09:33:18 -0000
	* ContentChecker.pm: Prepare for |form| |name| attribute's
	duplication checking.

2008-09-21  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	21 Sep 2008 09:32:26 -0000
2008-09-21  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: |form| element's |name| attribute is implemented.

1 package Whatpm::ContentChecker;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.96 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4
5 require Whatpm::URIChecker;
6
7 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8 ## be applied to an in-memory representation (i.e. DOM)?
9
10 ## TODO: Conformance of an HTML document with non-html root element.
11
12 ## Stability
13 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14 sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15 sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16 sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17
18 ## Deprecated
19 sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20 sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21
22 ## Conformance
23 sub FEATURE_ALLOWED () { 0b10000 }
24
25 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28
29 my $Namespace = {
30 '' => {loaded => 1},
31 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
32 q<http://purl.org/syndication/history/1.0>
33 => {module => 'Whatpm::ContentChecker::Atom'},
34 q<http://purl.org/syndication/threading/1.0>
35 => {module => 'Whatpm::ContentChecker::Atom'},
36 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
37 $XML_NS => {loaded => 1},
38 $XMLNS_NS => {loaded => 1},
39 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
40 };
41
42 sub load_ns_module ($) {
43 my $nsuri = shift; # namespace URI or ''
44 unless ($Namespace->{$nsuri}->{loaded}) {
45 if ($Namespace->{$nsuri}->{module}) {
46 eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
47 } else {
48 $Namespace->{$nsuri}->{loaded} = 1;
49 }
50 }
51 } # load_ns_module
52
53 our $AttrChecker = {
54 $XML_NS => {
55 space => sub {
56 my ($self, $attr) = @_;
57 my $value = $attr->value;
58 if ($value eq 'default' or $value eq 'preserve') {
59 #
60 } else {
61 ## NOTE: An XML "error"
62 $self->{onerror}->(node => $attr, level => $self->{level}->{xml_error},
63 type => 'invalid attribute value');
64 }
65 },
66 lang => sub {
67 my ($self, $attr) = @_;
68 my $value = $attr->value;
69 if ($value eq '') {
70 #
71 } else {
72 require Whatpm::LangTag;
73 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
74 $self->{onerror}->(@_, node => $attr);
75 }, $self->{level});
76 }
77
78 ## NOTE: "The values of the attribute are language identifiers
79 ## as defined by [IETF RFC 3066], Tags for the Identification
80 ## of Languages, or its successor; in addition, the empty string
81 ## may be specified." ("may" in lower case)
82 ## NOTE: Is an RFC 3066-valid (but RFC 4646-invalid) language tag
83 ## allowed today?
84
85 ## TODO: test data
86
87 my $nsuri = $attr->owner_element->namespace_uri;
88 if (defined $nsuri and $nsuri eq $HTML_NS) {
89 my $lang_attr = $attr->owner_element->get_attribute_node_ns
90 (undef, 'lang');
91 if ($lang_attr) {
92 my $lang_attr_value = $lang_attr->value;
93 $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
94 my $value = $value;
95 $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
96 if ($lang_attr_value ne $value) {
97 ## NOTE: HTML5 Section "The |lang| and |xml:lang| attributes"
98 $self->{onerror}->(node => $attr,
99 type => 'xml:lang ne lang',
100 level => $self->{level}->{must});
101 }
102 }
103 }
104
105 if ($attr->owner_document->manakai_is_html) { # MUST NOT
106 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang',
107 level => $self->{level}->{must});
108 ## TODO: Test data...
109 }
110 },
111 base => sub {
112 my ($self, $attr) = @_;
113 my $value = $attr->value;
114 if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
115 $self->{onerror}->(node => $attr,
116 type => 'invalid attribute value',
117 level => $self->{level}->{fact}, ## TODO: correct?
118 );
119 }
120 ## NOTE: Conformance to URI standard is not checked since there is
121 ## no author requirement on conformance in the XML Base specification.
122 },
123 id => sub {
124 my ($self, $attr) = @_;
125 my $value = $attr->value;
126 $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
127 $value =~ s/^\x20//;
128 $value =~ s/\x20$//;
129 ## TODO: NCName in XML 1.0 or 1.1
130 ## TODO: declared type is ID?
131 if ($self->{id}->{$value}) {
132 $self->{onerror}->(node => $attr,
133 type => 'duplicate ID',
134 level => $self->{level}->{xml_id_error});
135 push @{$self->{id}->{$value}}, $attr;
136 } else {
137 $self->{id}->{$value} = [$attr];
138 }
139 },
140 },
141 $XMLNS_NS => {
142 '' => sub {
143 my ($self, $attr) = @_;
144 my $ln = $attr->manakai_local_name;
145 my $value = $attr->value;
146 if ($value eq $XML_NS and $ln ne 'xml') {
147 $self->{onerror}
148 ->(node => $attr,
149 type => 'Reserved Prefixes and Namespace Names:Name',
150 text => $value,
151 level => $self->{level}->{nc});
152 } elsif ($value eq $XMLNS_NS) {
153 $self->{onerror}
154 ->(node => $attr,
155 type => 'Reserved Prefixes and Namespace Names:Name',
156 text => $value,
157 level => $self->{level}->{nc});
158 }
159 if ($ln eq 'xml' and $value ne $XML_NS) {
160 $self->{onerror}
161 ->(node => $attr,
162 type => 'Reserved Prefixes and Namespace Names:Prefix',
163 text => $ln,
164 level => $self->{level}->{nc});
165 } elsif ($ln eq 'xmlns') {
166 $self->{onerror}
167 ->(node => $attr,
168 type => 'Reserved Prefixes and Namespace Names:Prefix',
169 text => $ln,
170 level => $self->{level}->{nc});
171 }
172 ## TODO: If XML 1.0 and empty
173 },
174 xmlns => sub {
175 my ($self, $attr) = @_;
176 ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
177 ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
178 ## TODO: relative references are deprecated
179 my $value = $attr->value;
180 if ($value eq $XML_NS) {
181 $self->{onerror}
182 ->(node => $attr,
183 type => 'Reserved Prefixes and Namespace Names:Name',
184 text => $value,
185 level => $self->{level}->{nc});
186 } elsif ($value eq $XMLNS_NS) {
187 $self->{onerror}
188 ->(node => $attr,
189 type => 'Reserved Prefixes and Namespace Names:Name',
190 text => $value,
191 level => $self->{level}->{nc});
192 }
193 },
194 },
195 };
196
197 ## ISSUE: Should we really allow these attributes?
198 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
199 $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
200 ## NOTE: Checker for (null, "xml:lang") attribute is shadowed for
201 ## HTML elements in Whatpm::ContentChecker::HTML.
202 $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
203 $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
204
205 our $AttrStatus;
206
207 for (qw/space lang base id/) {
208 $AttrStatus->{$XML_NS}->{$_} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
209 $AttrStatus->{''}->{"xml:$_"} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
210 ## XML 1.0: FEATURE_STATUS_CR
211 ## XML 1.1: FEATURE_STATUS_REC
212 ## XML Namespaces 1.0: FEATURE_STATUS_CR
213 ## XML Namespaces 1.1: FEATURE_STATUS_REC
214 ## XML Base: FEATURE_STATUS_REC
215 ## xml:id: FEATURE_STATUS_REC
216 }
217
218 $AttrStatus->{$XMLNS_NS}->{''} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
219
220 ## TODO: xsi:schemaLocation for XHTML2 support (very, very low priority)
221
222 our %AnyChecker = (
223 check_start => sub { },
224 check_attrs => sub {
225 my ($self, $item, $element_state) = @_;
226 for my $attr (@{$item->{node}->attributes}) {
227 my $attr_ns = $attr->namespace_uri;
228 if (defined $attr_ns) {
229 load_ns_module ($attr_ns);
230 } else {
231 $attr_ns = '';
232 }
233 my $attr_ln = $attr->manakai_local_name;
234
235 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
236 || $AttrChecker->{$attr_ns}->{''};
237 my $status = $AttrStatus->{$attr_ns}->{$attr_ln}
238 || $AttrStatus->{$attr_ns}->{''};
239 if (not defined $status) {
240 $status = FEATURE_ALLOWED;
241 ## NOTE: FEATURE_ALLOWED for all attributes, since the element
242 ## is not supported and therefore "attribute not defined" error
243 ## should not raised (too verbose) and global attributes should be
244 ## allowed anyway (if a global attribute has its specified creteria
245 ## for where it may be specified, then it should be checked in it's
246 ## checker function).
247 }
248 if ($checker) {
249 $checker->($self, $attr);
250 } else {
251 $self->{onerror}->(node => $attr,
252 type => 'unknown attribute',
253 level => $self->{level}->{uncertain});
254 }
255 $self->_attr_status_info ($attr, $status);
256 }
257 },
258 check_child_element => sub {
259 my ($self, $item, $child_el, $child_nsuri, $child_ln,
260 $child_is_transparent, $element_state) = @_;
261 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
262 $self->{onerror}->(node => $child_el,
263 type => 'element not allowed:minus',
264 level => $self->{level}->{must});
265 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
266 #
267 } else {
268 #
269 }
270 },
271 check_child_text => sub { },
272 check_end => sub {
273 my ($self, $item, $element_state) = @_;
274 ## NOTE: There is a modified copy of the code below for |html:ruby|.
275 if ($element_state->{has_significant}) {
276 $item->{real_parent_state}->{has_significant} = 1;
277 }
278 },
279 );
280
281 our $ElementDefault = {
282 %AnyChecker,
283 status => FEATURE_ALLOWED,
284 ## NOTE: No "element not defined" error - it is not supported anyway.
285 check_start => sub {
286 my ($self, $item, $element_state) = @_;
287 $self->{onerror}->(node => $item->{node},
288 type => 'unknown element',
289 level => $self->{level}->{uncertain});
290 },
291 };
292
293 our $HTMLEmbeddedContent = {
294 ## NOTE: All embedded content is also phrasing content.
295 $HTML_NS => {
296 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
297 canvas => 1,
298 },
299 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
300 q<http://www.w3.org/2000/svg> => {svg => 1},
301 ## NOTE: Foreign elements with content (but no metadata) are
302 ## embedded content.
303 };
304
305 our $IsInHTMLInteractiveContent = sub {
306 my ($el, $nsuri, $ln) = @_;
307
308 ## NOTE: This CODE returns whether an element that is conditionally
309 ## categorizzed as an interactive content is currently in that
310 ## condition or not. See $HTMLInteractiveContent list defined in
311 ## Whatpm::ContentChecler::HTML for the list of all (conditionally
312 ## or permanently) interactive content.
313
314 if ($nsuri eq $HTML_NS and ($ln eq 'video' or $ln eq 'audio')) {
315 return $el->has_attribute ('controls');
316 } elsif ($nsuri eq $HTML_NS and $ln eq 'menu') {
317 my $value = $el->get_attribute ('type');
318 $value =~ tr/A-Z/a-z/; # ASCII case-insensitive
319 return ($value eq 'toolbar');
320 } else {
321 return 1;
322 }
323 }; # $IsInHTMLInteractiveContent
324
325 my $HTMLTransparentElements = {
326 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1 a 1/},
327 ## NOTE: |html:noscript| is transparent if scripting is disabled
328 ## and not in |head|.
329 };
330
331 my $HTMLSemiTransparentElements = {
332 $HTML_NS => {object => 1, video => 1, audio => 1},
333 };
334
335 our $Element = {};
336
337 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
338 %AnyChecker,
339 status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
340 is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
341 check_start => sub {
342 my ($self, $item, $element_state) = @_;
343 my $triple = [];
344 push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
345 require Whatpm::RDFXML;
346 my $rdf = Whatpm::RDFXML->new;
347 ## TODO: Should we make bnodeid unique in a document?
348 $rdf->{onerror} = $self->{onerror};
349 $rdf->{level} = $self->{level};
350 $rdf->{ontriple} = sub {
351 my %opt = @_;
352 push @$triple,
353 [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
354 if (defined $opt{id}) {
355 push @$triple,
356 [$opt{node},
357 $opt{id},
358 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
359 $opt{subject}];
360 push @$triple,
361 [$opt{node},
362 $opt{id},
363 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
364 $opt{predicate}];
365 push @$triple,
366 [$opt{node},
367 $opt{id},
368 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
369 $opt{object}];
370 push @$triple,
371 [$opt{node},
372 $opt{id},
373 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
374 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
375 }
376 };
377 $rdf->convert_rdf_element ($item->{node});
378 },
379 };
380
381 my $default_error_level = {
382 must => 'm',
383 should => 's',
384 warn => 'w',
385 good => 'w',
386 undefined => 'w',
387 info => 'i',
388
389 uncertain => 'u',
390
391 html4_fact => 'm',
392 html5_no_may => 'm',
393
394 xml_error => 'm', ## TODO: correct?
395 xml_id_error => 'm', ## TODO: ?
396 nc => 'm', ## XML Namespace Constraints ## TODO: correct?
397
398 ## |Whatpm::URIChecker|
399 uri_syntax => 'm',
400 uri_fact => 'm',
401 uri_lc_must => 'm',
402 uri_lc_should => 'w',
403
404 ## |Whatpm::IMTChecker|
405 mime_must => 'm', # lowercase "must"
406 mime_fact => 'm',
407 mime_strongly_discouraged => 'w',
408 mime_discouraged => 'w',
409
410 ## |Whatpm::LangTag|
411 langtag_fact => 'm',
412
413 ## |Whatpm::RDFXML|
414 rdf_fact => 'm',
415 rdf_grammer => 'm',
416 rdf_lc_must => 'm',
417
418 ## |Message::Charset::Info| and |Whatpm::Charset::DecodeHandle|
419 charset_variant => 'm',
420 ## An error caused by use of a variant charset that is not conforming
421 ## to the original charset (e.g. use of 0x80 in an ISO-8859-1 document
422 ## which is interpreted as a Windows-1252 document instead).
423 charset_fact => 'm',
424 iso_shall => 'm',
425 };
426
427 sub check_document ($$$;$) {
428 my ($self, $doc, $onerror, $onsubdoc) = @_;
429 $self = bless {}, $self unless ref $self;
430 $self->{onerror} = $onerror;
431 $self->{onsubdoc} = $onsubdoc || sub {
432 warn "A subdocument is not conformance-checked";
433 };
434
435 $self->{level} ||= $default_error_level;
436
437 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
438
439 my $docel = $doc->document_element;
440 unless (defined $docel) {
441 ## ISSUE: Should we check content of Document node?
442 $onerror->(node => $doc, type => 'no document element',
443 level => $self->{level}->{must});
444 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
445 return {
446 class => {},
447 id => {}, table => [], term => {},
448 };
449 }
450
451 ## ISSUE: Unexpanded entity references and HTML5 conformance
452
453 my $docel_nsuri = $docel->namespace_uri;
454 if (defined $docel_nsuri) {
455 load_ns_module ($docel_nsuri);
456 } else {
457 $docel_nsuri = '';
458 }
459 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
460 $Element->{$docel_nsuri}->{''} ||
461 $ElementDefault;
462 if ($docel_def->{is_root}) {
463 #
464 } elsif ($docel_def->{is_xml_root}) {
465 unless ($doc->manakai_is_html) {
466 #
467 } else {
468 $onerror->(node => $docel, type => 'element not allowed:root:xml',
469 level => $self->{level}->{must});
470 }
471 } else {
472 $onerror->(node => $docel, type => 'element not allowed:root',
473 level => $self->{level}->{must});
474 }
475
476 ## TODO: Check for other items other than document element
477 ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
478
479 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
480
481 ## TODO: Test for these checks are necessary.
482 my $charset_name = $doc->input_encoding;
483 if (defined $charset_name) {
484 require Message::Charset::Info;
485 my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
486
487 if ($doc->manakai_is_html) {
488 if (not $doc->manakai_has_bom and
489 not defined $doc->manakai_charset) {
490 unless ($charset->{is_html_ascii_superset}) {
491 $onerror->(node => $doc,
492 level => $self->{level}->{must},
493 type => 'non ascii superset',
494 text => $charset_name);
495 }
496
497 if (not $self->{has_charset} and ## TODO: This does not work now.
498 not $charset->{iana_names}->{'us-ascii'}) {
499 $onerror->(node => $doc,
500 level => $self->{level}->{must},
501 type => 'no character encoding declaration',
502 text => $charset_name);
503 }
504 }
505
506 if ($charset->{iana_names}->{'utf-8'}) {
507 #
508 } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
509 $charset->{iana_names}->{'x-jis0208'} or
510 $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
511 ($charset->{category} & Message::Charset::Info::CHARSET_CATEGORY_EBCDIC ())) {
512 $onerror->(node => $doc,
513 type => 'bad character encoding',
514 text => $charset_name,
515 level => $self->{level}->{should},
516 layer => 'encode');
517 } elsif ($charset->{iana_names}->{'cesu-8'} or
518 $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
519 $charset->{iana_names}->{'bocu-1'} or
520 $charset->{iana_names}->{'scsu'}) {
521 $onerror->(node => $doc,
522 type => 'disallowed character encoding',
523 text => $charset_name,
524 level => $self->{level}->{must},
525 layer => 'encode');
526 } else {
527 $onerror->(node => $doc,
528 type => 'non-utf-8 character encoding',
529 text => $charset_name,
530 level => $self->{level}->{good},
531 layer => 'encode');
532 }
533 }
534 } elsif ($doc->manakai_is_html) {
535 ## NOTE: MUST and SHOULD requirements above cannot be tested,
536 ## since the document has no input charset encoding information.
537 $onerror->(node => $doc,
538 type => 'character encoding unchecked',
539 level => $self->{level}->{info},
540 layer => 'encode');
541 }
542
543 return $return;
544 } # check_document
545
546 ## Check an element. The element is checked as if it is an orphan node (i.e.
547 ## an element without a parent node).
548 sub check_element ($$$;$) {
549 my ($self, $el, $onerror, $onsubdoc) = @_;
550 $self = bless {}, $self unless ref $self;
551 $self->{onerror} = $onerror;
552 $self->{onsubdoc} = $onsubdoc || sub {
553 warn "A subdocument is not conformance-checked";
554 };
555
556 $self->{level} ||= $default_error_level;
557
558 $self->{plus_elements} = {};
559 $self->{minus_elements} = {};
560 $self->{id} = {};
561 $self->{form} = {};
562 $self->{term} = {};
563 $self->{usemap} = [];
564 $self->{ref} = []; # datetemplate data references
565 $self->{template} = []; # datatemplate template references
566 $self->{contextmenu} = [];
567 $self->{map} = {};
568 $self->{menu} = {};
569 $self->{has_link_type} = {};
570 $self->{flag} = {};
571 #$self->{has_uri_attr};
572 #$self->{has_hyperlink_element};
573 #$self->{has_charset};
574 #$self->{has_base};
575 $self->{return} = {
576 class => {},
577 id => $self->{id},
578 table => [], # table objects returned by Whatpm::HTMLTable
579 term => $self->{term},
580 uri => {}, # URIs other than those in RDF triples
581 ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
582 rdf => [],
583 };
584
585 my @item = ({type => 'element', node => $el, parent_state => {}});
586 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
587 while (@item) {
588 my $item = shift @item;
589 if (ref $item eq 'ARRAY') {
590 my $code = shift @$item;
591 next unless $code;## TODO: temp.
592 $code->(@$item);
593 } elsif ($item->{type} eq 'element') {
594 my $el_nsuri = $item->{node}->namespace_uri;
595 if (defined $el_nsuri) {
596 load_ns_module ($el_nsuri);
597 } else {
598 $el_nsuri = '';
599 }
600 my $el_ln = $item->{node}->manakai_local_name;
601
602 my $element_state = {};
603 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
604 $Element->{$el_nsuri}->{''} ||
605 $ElementDefault;
606 my $content_def = $item->{transparent}
607 ? $item->{parent_def} || $eldef : $eldef;
608 my $content_state = $item->{transparent}
609 ? $item->{parent_def}
610 ? $item->{parent_state} || $element_state : $element_state
611 : $element_state;
612
613 unless ($eldef->{status} & FEATURE_STATUS_REC) {
614 my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
615 $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
616 $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
617 $self->{onerror}->(node => $item->{node},
618 type => 'status:'.$status.':element',
619 level => $self->{level}->{info});
620 }
621 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
622 $self->{onerror}->(node => $item->{node},
623 type => 'element not defined',
624 level => $self->{level}->{must});
625 } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
626 $self->{onerror}->(node => $item->{node},
627 type => 'deprecated:element',
628 level => $self->{level}->{should});
629 } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
630 $self->{onerror}->(node => $item->{node},
631 type => 'deprecated:element',
632 level => $self->{level}->{info});
633 }
634
635 my @new_item;
636 push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
637 push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
638
639 my @child = @{$item->{node}->child_nodes};
640 while (@child) {
641 my $child = shift @child;
642 my $child_nt = $child->node_type;
643 if ($child_nt == 1) { # ELEMENT_NODE
644 my $child_nsuri = $child->namespace_uri;
645 $child_nsuri = '' unless defined $child_nsuri;
646 my $child_ln = $child->manakai_local_name;
647 if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
648 not (($self->{flag}->{in_head} or
649 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
650 $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
651 push @new_item, [$content_def->{check_child_element},
652 $self, $item, $child,
653 $child_nsuri, $child_ln, 1,
654 $content_state, $element_state];
655 push @new_item, {type => 'element', node => $child,
656 parent_state => $content_state,
657 parent_def => $content_def,
658 real_parent_state => $element_state,
659 transparent => 1};
660 } else {
661 if ($item->{parent_def} and # has parent
662 $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
663 if ($el_ln eq 'object') {
664 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
665 #
666 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
667 #
668 } else {
669 $content_def = $item->{parent_def} || $content_def;
670 $content_state = $item->{parent_state} || $content_state;
671 }
672 } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
673 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
674 #
675 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
676 $element_state->{has_source} = 1;
677 } else {
678 $content_def = $item->{parent_def} || $content_def;
679 $content_state = $item->{parent_state} || $content_state;
680 }
681 }
682 }
683
684 push @new_item, [$content_def->{check_child_element},
685 $self, $item, $child,
686 $child_nsuri, $child_ln,
687 $HTMLSemiTransparentElements
688 ->{$child_nsuri}->{$child_ln},
689 $content_state, $element_state];
690 push @new_item, {type => 'element', node => $child,
691 parent_def => $content_def,
692 real_parent_state => $element_state,
693 parent_state => $content_state};
694 }
695
696 if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
697 $element_state->{has_significant} = 1;
698 }
699 } elsif ($child_nt == 3 or # TEXT_NODE
700 $child_nt == 4) { # CDATA_SECTION_NODE
701 my $has_significant = ($child->data =~ /[^\x09\x0A\x0C\x0D\x20]/);
702 push @new_item, [$content_def->{check_child_text},
703 $self, $item, $child, $has_significant,
704 $content_state, $element_state];
705 $element_state->{has_significant} ||= $has_significant;
706 if ($has_significant and
707 $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
708 $content_def = $item->{parent_def} || $content_def;
709 }
710 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
711 push @child, @{$child->child_nodes};
712 }
713 ## TODO: PI_NODE
714 ## TODO: Unknown node type
715 }
716
717 push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
718
719 unshift @item, @new_item;
720 } else {
721 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
722 }
723 }
724
725 for (@{$self->{template}}) {
726 ## TODO: If the document is an XML document, ...
727 ## NOTE: If the document is an HTML document:
728 ## ISSUE: We need to percent-decode?
729 F: {
730 if ($self->{id}->{$_->[0]}) {
731 my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
732 if ($el->node_type == 1 and # ELEMENT_NODE
733 $el->manakai_local_name eq 'datatemplate') {
734 my $nsuri = $el->namespace_uri;
735 if (defined $nsuri and $nsuri eq $HTML_NS) {
736 if ($el eq $_->[1]->owner_element) {
737 $self->{onerror}->(node => $_->[1],
738 type => 'fragment points itself',
739 level => $self->{level}->{must});
740 }
741
742 last F;
743 }
744 }
745 }
746 ## TODO: Should we raise a "fragment points nothing" error instead
747 ## if the fragment identifier identifies no element?
748
749 $self->{onerror}->(node => $_->[1], type => 'template:not template',
750 level => $self->{level}->{must});
751 } # F
752 }
753
754 for (@{$self->{ref}}) {
755 ## TOOD: If XML
756 ## NOTE: If it is an HTML document:
757 if ($_->[0] eq '') {
758 ## NOTE: It points the top of the document.
759 } elsif ($self->{id}->{$_->[0]}) {
760 if ($self->{id}->{$_->[0]}->[0]->owner_element
761 eq $_->[1]->owner_element) {
762 $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
763 level => $self->{level}->{must});
764 }
765 } else {
766 $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
767 level => $self->{level}->{must});
768 }
769 }
770
771 ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
772
773 for (@{$self->{usemap}}) {
774 unless ($self->{map}->{$_->[0]}) {
775 $self->{onerror}->(node => $_->[1], type => 'no referenced map',
776 level => $self->{level}->{must});
777 }
778 }
779
780 for (@{$self->{contextmenu}}) {
781 unless ($self->{menu}->{$_->[0]}) {
782 $self->{onerror}->(node => $_->[1], type => 'no referenced menu',
783 level => $self->{level}->{must});
784 }
785 }
786
787 delete $self->{plus_elements};
788 delete $self->{minus_elements};
789 delete $self->{onerror};
790 delete $self->{id};
791 delete $self->{form};
792 delete $self->{usemap};
793 delete $self->{ref};
794 delete $self->{template};
795 delete $self->{map};
796 return $self->{return};
797 } # check_element
798
799 sub _add_minus_elements ($$@) {
800 my $self = shift;
801 my $element_state = shift;
802 for my $elements (@_) {
803 for my $nsuri (keys %$elements) {
804 for my $ln (keys %{$elements->{$nsuri}}) {
805 unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
806 $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
807 $self->{minus_elements}->{$nsuri}->{$ln} = 1;
808 }
809 }
810 }
811 }
812 } # _add_minus_elements
813
814 sub _remove_minus_elements ($$) {
815 my $self = shift;
816 my $element_state = shift;
817 for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
818 for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
819 delete $self->{minus_elements}->{$nsuri}->{$ln};
820 }
821 }
822 } # _remove_minus_elements
823
824 sub _add_plus_elements ($$@) {
825 my $self = shift;
826 my $element_state = shift;
827 for my $elements (@_) {
828 for my $nsuri (keys %$elements) {
829 for my $ln (keys %{$elements->{$nsuri}}) {
830 unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
831 $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
832 $self->{plus_elements}->{$nsuri}->{$ln} = 1;
833 }
834 }
835 }
836 }
837 } # _add_plus_elements
838
839 sub _remove_plus_elements ($$) {
840 my $self = shift;
841 my $element_state = shift;
842 for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
843 for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
844 delete $self->{plus_elements}->{$nsuri}->{$ln};
845 }
846 }
847 } # _remove_plus_elements
848
849 sub _attr_status_info ($$$) {
850 my ($self, $attr, $status_code) = @_;
851
852 if (not ($status_code & FEATURE_ALLOWED)) {
853 $self->{onerror}->(node => $attr,
854 type => 'attribute not defined',
855 level => $self->{level}->{must});
856 } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
857 $self->{onerror}->(node => $attr,
858 type => 'deprecated:attr',
859 level => $self->{level}->{should});
860 } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
861 $self->{onerror}->(node => $attr,
862 type => 'deprecated:attr',
863 level => $self->{level}->{info});
864 }
865
866 my $status;
867 if ($status_code & FEATURE_STATUS_REC) {
868 return;
869 } elsif ($status_code & FEATURE_STATUS_CR) {
870 $status = 'cr';
871 } elsif ($status_code & FEATURE_STATUS_LC) {
872 $status = 'lc';
873 } elsif ($status_code & FEATURE_STATUS_WD) {
874 $status = 'wd';
875 } else {
876 $status = 'non-standard';
877 }
878 $self->{onerror}->(node => $attr,
879 type => 'status:'.$status.':attr',
880 level => $self->{level}->{info});
881 } # _attr_status_info
882
883 sub _add_minuses ($@) {
884 my $self = shift;
885 my $r = {};
886 for my $list (@_) {
887 for my $ns (keys %$list) {
888 for my $ln (keys %{$list->{$ns}}) {
889 unless ($self->{minuses}->{$ns}->{$ln}) {
890 $self->{minuses}->{$ns}->{$ln} = 1;
891 $r->{$ns}->{$ln} = 1;
892 }
893 }
894 }
895 }
896 return {type => 'plus', list => $r};
897 } # _add_minuses
898
899 sub _add_pluses ($@) {
900 my $self = shift;
901 my $r = {};
902 for my $list (@_) {
903 for my $ns (keys %$list) {
904 for my $ln (keys %{$list->{$ns}}) {
905 unless ($self->{pluses}->{$ns}->{$ln}) {
906 $self->{pluses}->{$ns}->{$ln} = 1;
907 $r->{$ns}->{$ln} = 1;
908 }
909 }
910 }
911 }
912 return {type => 'minus', list => $r};
913 } # _add_pluses
914
915 sub _remove_minuses ($$) {
916 my ($self, $todo) = @_;
917 if ($todo->{type} eq 'minus') {
918 for my $ns (keys %{$todo->{list}}) {
919 for my $ln (keys %{$todo->{list}->{$ns}}) {
920 delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
921 }
922 }
923 } elsif ($todo->{type} eq 'plus') {
924 for my $ns (keys %{$todo->{list}}) {
925 for my $ln (keys %{$todo->{list}->{$ns}}) {
926 delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
927 }
928 }
929 } else {
930 die "$0: Unknown +- type: $todo->{type}";
931 }
932 1;
933 } # _remove_minuses
934
935 ## NOTE: Priority for "minuses" and "pluses" are currently left
936 ## undefined and implemented inconsistently; it is not a problem for
937 ## now, since no element belongs to both lists.
938
939 sub _check_get_children ($$$) {
940 my ($self, $node, $parent_todo) = @_;
941 my $new_todos = [];
942 my $sib = [];
943 TP: {
944 my $node_ns = $node->namespace_uri;
945 $node_ns = '' unless defined $node_ns;
946 my $node_ln = $node->manakai_local_name;
947 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
948 if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
949 if ($parent_todo->{flag}->{in_head}) {
950 #
951 } else {
952 my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
953 push @$sib, $end;
954
955 unshift @$sib, @{$node->child_nodes};
956 push @$new_todos, {type => 'element-attributes', node => $node};
957 last TP;
958 }
959 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
960 my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
961 unshift @$sib, @{$node->child_nodes};
962 push @$new_todos, {type => 'element-attributes', node => $node};
963 push @$new_todos,
964 {type => 'code',
965 code => sub {
966 $parent_todo->{flag}->{has_descendant}->{significant} = 0
967 if not $sig_flag;
968 }};
969 last TP;
970 } else {
971 unshift @$sib, @{$node->child_nodes};
972 push @$new_todos, {type => 'element-attributes', node => $node};
973 last TP;
974 }
975 }
976 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
977 if ($node->has_attribute_ns (undef, 'src')) {
978 unshift @$sib, @{$node->child_nodes};
979 push @$new_todos, {type => 'element-attributes', node => $node};
980 last TP;
981 } else {
982 my @cn = @{$node->child_nodes};
983 CN: while (@cn) {
984 my $cn = shift @cn;
985 my $cnt = $cn->node_type;
986 if ($cnt == 1) {
987 my $cn_nsuri = $cn->namespace_uri;
988 $cn_nsuri = '' unless defined $cn_nsuri;
989 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
990 #
991 } else {
992 last CN;
993 }
994 } elsif ($cnt == 3 or $cnt == 4) {
995 if ($cn->data =~ /[^\x09\x0A\x0C\x0D\x20]/) {
996 last CN;
997 }
998 }
999 } # CN
1000 unshift @$sib, @cn;
1001 }
1002 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
1003 my @cn = @{$node->child_nodes};
1004 CN: while (@cn) {
1005 my $cn = shift @cn;
1006 my $cnt = $cn->node_type;
1007 if ($cnt == 1) {
1008 my $cn_nsuri = $cn->namespace_uri;
1009 $cn_nsuri = '' unless defined $cn_nsuri;
1010 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
1011 #
1012 } else {
1013 last CN;
1014 }
1015 } elsif ($cnt == 3 or $cnt == 4) {
1016 if ($cn->data =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1017 last CN;
1018 }
1019 }
1020 } # CN
1021 unshift @$sib, @cn;
1022 }
1023 push @$new_todos, {type => 'element', node => $node};
1024 } # TP
1025
1026 for my $new_todo (@$new_todos) {
1027 $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
1028 }
1029
1030 return ($sib, $new_todos);
1031 } # _check_get_children
1032
1033 =head1 LICENSE
1034
1035 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1036
1037 This library is free software; you can redistribute it
1038 and/or modify it under the same terms as Perl itself.
1039
1040 =cut
1041
1042 1;
1043 # $Date: 2008/09/20 11:25:56 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24