/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.95 - (show annotations) (download)
Sat Sep 20 06:10:18 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.94: +22 -2 lines
++ whatpm/t/ChangeLog	20 Sep 2008 05:50:38 -0000
	* content-model-1.dat: Test data for interactive contents are
	added (cf. HTML5 revision 2018).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	20 Sep 2008 05:46:21 -0000
2008-09-20  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm ($IsInHTMLInteractiveContent): New.

++ whatpm/Whatpm/ContentChecker/ChangeLog	20 Sep 2008 05:51:55 -0000
	* HTML.pm, Atom.pm: Interactrive content implementation synced
	with the spec (HTML5 revision 2018).

2008-09-20  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::ContentChecker;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.94 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4
5 require Whatpm::URIChecker;
6
7 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8 ## be applied to an in-memory representation (i.e. DOM)?
9
10 ## TODO: Conformance of an HTML document with non-html root element.
11
12 ## Stability
13 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14 sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15 sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16 sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17
18 ## Deprecated
19 sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20 sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21
22 ## Conformance
23 sub FEATURE_ALLOWED () { 0b10000 }
24
25 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28
29 my $Namespace = {
30 '' => {loaded => 1},
31 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
32 q<http://purl.org/syndication/history/1.0>
33 => {module => 'Whatpm::ContentChecker::Atom'},
34 q<http://purl.org/syndication/threading/1.0>
35 => {module => 'Whatpm::ContentChecker::Atom'},
36 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
37 $XML_NS => {loaded => 1},
38 $XMLNS_NS => {loaded => 1},
39 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
40 };
41
42 sub load_ns_module ($) {
43 my $nsuri = shift; # namespace URI or ''
44 unless ($Namespace->{$nsuri}->{loaded}) {
45 if ($Namespace->{$nsuri}->{module}) {
46 eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
47 } else {
48 $Namespace->{$nsuri}->{loaded} = 1;
49 }
50 }
51 } # load_ns_module
52
53 our $AttrChecker = {
54 $XML_NS => {
55 space => sub {
56 my ($self, $attr) = @_;
57 my $value = $attr->value;
58 if ($value eq 'default' or $value eq 'preserve') {
59 #
60 } else {
61 ## NOTE: An XML "error"
62 $self->{onerror}->(node => $attr, level => $self->{level}->{xml_error},
63 type => 'invalid attribute value');
64 }
65 },
66 lang => sub {
67 my ($self, $attr) = @_;
68 my $value = $attr->value;
69 if ($value eq '') {
70 #
71 } else {
72 require Whatpm::LangTag;
73 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
74 $self->{onerror}->(@_, node => $attr);
75 }, $self->{level});
76 }
77
78 ## NOTE: "The values of the attribute are language identifiers
79 ## as defined by [IETF RFC 3066], Tags for the Identification
80 ## of Languages, or its successor; in addition, the empty string
81 ## may be specified." ("may" in lower case)
82 ## NOTE: Is an RFC 3066-valid (but RFC 4646-invalid) language tag
83 ## allowed today?
84
85 ## TODO: test data
86
87 my $nsuri = $attr->owner_element->namespace_uri;
88 if (defined $nsuri and $nsuri eq $HTML_NS) {
89 my $lang_attr = $attr->owner_element->get_attribute_node_ns
90 (undef, 'lang');
91 if ($lang_attr) {
92 my $lang_attr_value = $lang_attr->value;
93 $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
94 my $value = $value;
95 $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
96 if ($lang_attr_value ne $value) {
97 ## NOTE: HTML5 Section "The |lang| and |xml:lang| attributes"
98 $self->{onerror}->(node => $attr,
99 type => 'xml:lang ne lang',
100 level => $self->{level}->{must});
101 }
102 }
103 }
104
105 if ($attr->owner_document->manakai_is_html) { # MUST NOT
106 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang',
107 level => $self->{level}->{must});
108 ## TODO: Test data...
109 }
110 },
111 base => sub {
112 my ($self, $attr) = @_;
113 my $value = $attr->value;
114 if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
115 $self->{onerror}->(node => $attr,
116 type => 'invalid attribute value',
117 level => $self->{level}->{fact}, ## TODO: correct?
118 );
119 }
120 ## NOTE: Conformance to URI standard is not checked since there is
121 ## no author requirement on conformance in the XML Base specification.
122 },
123 id => sub {
124 my ($self, $attr) = @_;
125 my $value = $attr->value;
126 $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
127 $value =~ s/^\x20//;
128 $value =~ s/\x20$//;
129 ## TODO: NCName in XML 1.0 or 1.1
130 ## TODO: declared type is ID?
131 if ($self->{id}->{$value}) {
132 $self->{onerror}->(node => $attr,
133 type => 'duplicate ID',
134 level => $self->{level}->{xml_id_error});
135 push @{$self->{id}->{$value}}, $attr;
136 } else {
137 $self->{id}->{$value} = [$attr];
138 }
139 },
140 },
141 $XMLNS_NS => {
142 '' => sub {
143 my ($self, $attr) = @_;
144 my $ln = $attr->manakai_local_name;
145 my $value = $attr->value;
146 if ($value eq $XML_NS and $ln ne 'xml') {
147 $self->{onerror}
148 ->(node => $attr,
149 type => 'Reserved Prefixes and Namespace Names:Name',
150 text => $value,
151 level => $self->{level}->{nc});
152 } elsif ($value eq $XMLNS_NS) {
153 $self->{onerror}
154 ->(node => $attr,
155 type => 'Reserved Prefixes and Namespace Names:Name',
156 text => $value,
157 level => $self->{level}->{nc});
158 }
159 if ($ln eq 'xml' and $value ne $XML_NS) {
160 $self->{onerror}
161 ->(node => $attr,
162 type => 'Reserved Prefixes and Namespace Names:Prefix',
163 text => $ln,
164 level => $self->{level}->{nc});
165 } elsif ($ln eq 'xmlns') {
166 $self->{onerror}
167 ->(node => $attr,
168 type => 'Reserved Prefixes and Namespace Names:Prefix',
169 text => $ln,
170 level => $self->{level}->{nc});
171 }
172 ## TODO: If XML 1.0 and empty
173 },
174 xmlns => sub {
175 my ($self, $attr) = @_;
176 ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
177 ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
178 ## TODO: relative references are deprecated
179 my $value = $attr->value;
180 if ($value eq $XML_NS) {
181 $self->{onerror}
182 ->(node => $attr,
183 type => 'Reserved Prefixes and Namespace Names:Name',
184 text => $value,
185 level => $self->{level}->{nc});
186 } elsif ($value eq $XMLNS_NS) {
187 $self->{onerror}
188 ->(node => $attr,
189 type => 'Reserved Prefixes and Namespace Names:Name',
190 text => $value,
191 level => $self->{level}->{nc});
192 }
193 },
194 },
195 };
196
197 ## ISSUE: Should we really allow these attributes?
198 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
199 $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
200 ## NOTE: Checker for (null, "xml:lang") attribute is shadowed for
201 ## HTML elements in Whatpm::ContentChecker::HTML.
202 $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
203 $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
204
205 our $AttrStatus;
206
207 for (qw/space lang base id/) {
208 $AttrStatus->{$XML_NS}->{$_} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
209 $AttrStatus->{''}->{"xml:$_"} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
210 ## XML 1.0: FEATURE_STATUS_CR
211 ## XML 1.1: FEATURE_STATUS_REC
212 ## XML Namespaces 1.0: FEATURE_STATUS_CR
213 ## XML Namespaces 1.1: FEATURE_STATUS_REC
214 ## XML Base: FEATURE_STATUS_REC
215 ## xml:id: FEATURE_STATUS_REC
216 }
217
218 $AttrStatus->{$XMLNS_NS}->{''} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
219
220 ## TODO: xsi:schemaLocation for XHTML2 support (very, very low priority)
221
222 our %AnyChecker = (
223 check_start => sub { },
224 check_attrs => sub {
225 my ($self, $item, $element_state) = @_;
226 for my $attr (@{$item->{node}->attributes}) {
227 my $attr_ns = $attr->namespace_uri;
228 if (defined $attr_ns) {
229 load_ns_module ($attr_ns);
230 } else {
231 $attr_ns = '';
232 }
233 my $attr_ln = $attr->manakai_local_name;
234
235 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
236 || $AttrChecker->{$attr_ns}->{''};
237 my $status = $AttrStatus->{$attr_ns}->{$attr_ln}
238 || $AttrStatus->{$attr_ns}->{''};
239 if (not defined $status) {
240 $status = FEATURE_ALLOWED;
241 ## NOTE: FEATURE_ALLOWED for all attributes, since the element
242 ## is not supported and therefore "attribute not defined" error
243 ## should not raised (too verbose) and global attributes should be
244 ## allowed anyway (if a global attribute has its specified creteria
245 ## for where it may be specified, then it should be checked in it's
246 ## checker function).
247 }
248 if ($checker) {
249 $checker->($self, $attr);
250 } else {
251 $self->{onerror}->(node => $attr,
252 type => 'unknown attribute',
253 level => $self->{level}->{uncertain});
254 }
255 $self->_attr_status_info ($attr, $status);
256 }
257 },
258 check_child_element => sub {
259 my ($self, $item, $child_el, $child_nsuri, $child_ln,
260 $child_is_transparent, $element_state) = @_;
261 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
262 $self->{onerror}->(node => $child_el,
263 type => 'element not allowed:minus',
264 level => $self->{level}->{must});
265 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
266 #
267 } else {
268 #
269 }
270 },
271 check_child_text => sub { },
272 check_end => sub {
273 my ($self, $item, $element_state) = @_;
274 ## NOTE: There is a modified copy of the code below for |html:ruby|.
275 if ($element_state->{has_significant}) {
276 $item->{real_parent_state}->{has_significant} = 1;
277 }
278 },
279 );
280
281 our $ElementDefault = {
282 %AnyChecker,
283 status => FEATURE_ALLOWED,
284 ## NOTE: No "element not defined" error - it is not supported anyway.
285 check_start => sub {
286 my ($self, $item, $element_state) = @_;
287 $self->{onerror}->(node => $item->{node},
288 type => 'unknown element',
289 level => $self->{level}->{uncertain});
290 },
291 };
292
293 our $HTMLEmbeddedContent = {
294 ## NOTE: All embedded content is also phrasing content.
295 $HTML_NS => {
296 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
297 canvas => 1,
298 },
299 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
300 q<http://www.w3.org/2000/svg> => {svg => 1},
301 ## NOTE: Foreign elements with content (but no metadata) are
302 ## embedded content.
303 };
304
305 our $IsInHTMLInteractiveContent = sub {
306 my ($el, $nsuri, $ln) = @_;
307
308 ## NOTE: This CODE returns whether an element that is conditionally
309 ## categorizzed as an interactive content is currently in that
310 ## condition or not. See $HTMLInteractiveContent list defined in
311 ## Whatpm::ContentChecler::HTML for the list of all (conditionally
312 ## or permanently) interactive content.
313
314 if ($nsuri eq $HTML_NS and ($ln eq 'video' or $ln eq 'audio')) {
315 return $el->has_attribute ('controls');
316 } elsif ($nsuri eq $HTML_NS and $ln eq 'menu') {
317 my $value = $el->get_attribute ('type');
318 $value =~ tr/A-Z/a-z/; # ASCII case-insensitive
319 return ($value eq 'toolbar');
320 } else {
321 return 1;
322 }
323 }; # $IsInHTMLInteractiveContent
324
325 my $HTMLTransparentElements = {
326 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1 a 1/},
327 ## NOTE: |html:noscript| is transparent if scripting is disabled
328 ## and not in |head|.
329 };
330
331 my $HTMLSemiTransparentElements = {
332 $HTML_NS => {object => 1, video => 1, audio => 1},
333 };
334
335 our $Element = {};
336
337 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
338 %AnyChecker,
339 status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
340 is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
341 check_start => sub {
342 my ($self, $item, $element_state) = @_;
343 my $triple = [];
344 push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
345 require Whatpm::RDFXML;
346 my $rdf = Whatpm::RDFXML->new;
347 ## TODO: Should we make bnodeid unique in a document?
348 $rdf->{onerror} = $self->{onerror};
349 $rdf->{level} = $self->{level};
350 $rdf->{ontriple} = sub {
351 my %opt = @_;
352 push @$triple,
353 [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
354 if (defined $opt{id}) {
355 push @$triple,
356 [$opt{node},
357 $opt{id},
358 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
359 $opt{subject}];
360 push @$triple,
361 [$opt{node},
362 $opt{id},
363 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
364 $opt{predicate}];
365 push @$triple,
366 [$opt{node},
367 $opt{id},
368 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
369 $opt{object}];
370 push @$triple,
371 [$opt{node},
372 $opt{id},
373 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
374 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
375 }
376 };
377 $rdf->convert_rdf_element ($item->{node});
378 },
379 };
380
381 my $default_error_level = {
382 must => 'm',
383 should => 's',
384 warn => 'w',
385 good => 'w',
386 undefined => 'w',
387 info => 'i',
388
389 uncertain => 'u',
390
391 html4_fact => 'm',
392 html5_no_may => 'm',
393
394 xml_error => 'm', ## TODO: correct?
395 xml_id_error => 'm', ## TODO: ?
396 nc => 'm', ## XML Namespace Constraints ## TODO: correct?
397
398 ## |Whatpm::URIChecker|
399 uri_syntax => 'm',
400 uri_fact => 'm',
401 uri_lc_must => 'm',
402 uri_lc_should => 'w',
403
404 ## |Whatpm::IMTChecker|
405 mime_must => 'm', # lowercase "must"
406 mime_fact => 'm',
407 mime_strongly_discouraged => 'w',
408 mime_discouraged => 'w',
409
410 ## |Whatpm::LangTag|
411 langtag_fact => 'm',
412
413 ## |Whatpm::RDFXML|
414 rdf_fact => 'm',
415 rdf_grammer => 'm',
416 rdf_lc_must => 'm',
417
418 ## |Message::Charset::Info| and |Whatpm::Charset::DecodeHandle|
419 charset_variant => 'm',
420 ## An error caused by use of a variant charset that is not conforming
421 ## to the original charset (e.g. use of 0x80 in an ISO-8859-1 document
422 ## which is interpreted as a Windows-1252 document instead).
423 charset_fact => 'm',
424 iso_shall => 'm',
425 };
426
427 sub check_document ($$$;$) {
428 my ($self, $doc, $onerror, $onsubdoc) = @_;
429 $self = bless {}, $self unless ref $self;
430 $self->{onerror} = $onerror;
431 $self->{onsubdoc} = $onsubdoc || sub {
432 warn "A subdocument is not conformance-checked";
433 };
434
435 $self->{level} ||= $default_error_level;
436
437 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
438
439 my $docel = $doc->document_element;
440 unless (defined $docel) {
441 ## ISSUE: Should we check content of Document node?
442 $onerror->(node => $doc, type => 'no document element',
443 level => $self->{level}->{must});
444 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
445 return {
446 class => {},
447 id => {}, table => [], term => {},
448 };
449 }
450
451 ## ISSUE: Unexpanded entity references and HTML5 conformance
452
453 my $docel_nsuri = $docel->namespace_uri;
454 if (defined $docel_nsuri) {
455 load_ns_module ($docel_nsuri);
456 } else {
457 $docel_nsuri = '';
458 }
459 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
460 $Element->{$docel_nsuri}->{''} ||
461 $ElementDefault;
462 if ($docel_def->{is_root}) {
463 #
464 } elsif ($docel_def->{is_xml_root}) {
465 unless ($doc->manakai_is_html) {
466 #
467 } else {
468 $onerror->(node => $docel, type => 'element not allowed:root:xml',
469 level => $self->{level}->{must});
470 }
471 } else {
472 $onerror->(node => $docel, type => 'element not allowed:root',
473 level => $self->{level}->{must});
474 }
475
476 ## TODO: Check for other items other than document element
477 ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
478
479 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
480
481 ## TODO: Test for these checks are necessary.
482 my $charset_name = $doc->input_encoding;
483 if (defined $charset_name) {
484 require Message::Charset::Info;
485 my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
486
487 if ($doc->manakai_is_html) {
488 if (not $doc->manakai_has_bom and
489 not defined $doc->manakai_charset) {
490 unless ($charset->{is_html_ascii_superset}) {
491 $onerror->(node => $doc,
492 level => $self->{level}->{must},
493 type => 'non ascii superset',
494 text => $charset_name);
495 }
496
497 if (not $self->{has_charset} and ## TODO: This does not work now.
498 not $charset->{iana_names}->{'us-ascii'}) {
499 $onerror->(node => $doc,
500 level => $self->{level}->{must},
501 type => 'no character encoding declaration',
502 text => $charset_name);
503 }
504 }
505
506 if ($charset->{iana_names}->{'utf-8'}) {
507 #
508 } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
509 $charset->{iana_names}->{'x-jis0208'} or
510 $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
511 ($charset->{category} & Message::Charset::Info::CHARSET_CATEGORY_EBCDIC ())) {
512 $onerror->(node => $doc,
513 type => 'bad character encoding',
514 text => $charset_name,
515 level => $self->{level}->{should},
516 layer => 'encode');
517 } elsif ($charset->{iana_names}->{'cesu-8'} or
518 $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
519 $charset->{iana_names}->{'bocu-1'} or
520 $charset->{iana_names}->{'scsu'}) {
521 $onerror->(node => $doc,
522 type => 'disallowed character encoding',
523 text => $charset_name,
524 level => $self->{level}->{must},
525 layer => 'encode');
526 } else {
527 $onerror->(node => $doc,
528 type => 'non-utf-8 character encoding',
529 text => $charset_name,
530 level => $self->{level}->{good},
531 layer => 'encode');
532 }
533 }
534 } elsif ($doc->manakai_is_html) {
535 ## NOTE: MUST and SHOULD requirements above cannot be tested,
536 ## since the document has no input charset encoding information.
537 $onerror->(node => $doc,
538 type => 'character encoding unchecked',
539 level => $self->{level}->{info},
540 layer => 'encode');
541 }
542
543 return $return;
544 } # check_document
545
546 ## Check an element. The element is checked as if it is an orphan node (i.e.
547 ## an element without a parent node).
548 sub check_element ($$$;$) {
549 my ($self, $el, $onerror, $onsubdoc) = @_;
550 $self = bless {}, $self unless ref $self;
551 $self->{onerror} = $onerror;
552 $self->{onsubdoc} = $onsubdoc || sub {
553 warn "A subdocument is not conformance-checked";
554 };
555
556 $self->{level} ||= $default_error_level;
557
558 $self->{plus_elements} = {};
559 $self->{minus_elements} = {};
560 $self->{id} = {};
561 $self->{term} = {};
562 $self->{usemap} = [];
563 $self->{ref} = []; # datetemplate data references
564 $self->{template} = []; # datatemplate template references
565 $self->{contextmenu} = [];
566 $self->{map} = {};
567 $self->{menu} = {};
568 $self->{has_link_type} = {};
569 $self->{flag} = {};
570 #$self->{has_uri_attr};
571 #$self->{has_hyperlink_element};
572 #$self->{has_charset};
573 #$self->{has_base};
574 $self->{return} = {
575 class => {},
576 id => $self->{id},
577 table => [], # table objects returned by Whatpm::HTMLTable
578 term => $self->{term},
579 uri => {}, # URIs other than those in RDF triples
580 ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
581 rdf => [],
582 };
583
584 my @item = ({type => 'element', node => $el, parent_state => {}});
585 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
586 while (@item) {
587 my $item = shift @item;
588 if (ref $item eq 'ARRAY') {
589 my $code = shift @$item;
590 next unless $code;## TODO: temp.
591 $code->(@$item);
592 } elsif ($item->{type} eq 'element') {
593 my $el_nsuri = $item->{node}->namespace_uri;
594 if (defined $el_nsuri) {
595 load_ns_module ($el_nsuri);
596 } else {
597 $el_nsuri = '';
598 }
599 my $el_ln = $item->{node}->manakai_local_name;
600
601 my $element_state = {};
602 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
603 $Element->{$el_nsuri}->{''} ||
604 $ElementDefault;
605 my $content_def = $item->{transparent}
606 ? $item->{parent_def} || $eldef : $eldef;
607 my $content_state = $item->{transparent}
608 ? $item->{parent_def}
609 ? $item->{parent_state} || $element_state : $element_state
610 : $element_state;
611
612 unless ($eldef->{status} & FEATURE_STATUS_REC) {
613 my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
614 $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
615 $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
616 $self->{onerror}->(node => $item->{node},
617 type => 'status:'.$status.':element',
618 level => $self->{level}->{info});
619 }
620 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
621 $self->{onerror}->(node => $item->{node},
622 type => 'element not defined',
623 level => $self->{level}->{must});
624 } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
625 $self->{onerror}->(node => $item->{node},
626 type => 'deprecated:element',
627 level => $self->{level}->{should});
628 } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
629 $self->{onerror}->(node => $item->{node},
630 type => 'deprecated:element',
631 level => $self->{level}->{info});
632 }
633
634 my @new_item;
635 push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
636 push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
637
638 my @child = @{$item->{node}->child_nodes};
639 while (@child) {
640 my $child = shift @child;
641 my $child_nt = $child->node_type;
642 if ($child_nt == 1) { # ELEMENT_NODE
643 my $child_nsuri = $child->namespace_uri;
644 $child_nsuri = '' unless defined $child_nsuri;
645 my $child_ln = $child->manakai_local_name;
646 if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
647 not (($self->{flag}->{in_head} or
648 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
649 $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
650 push @new_item, [$content_def->{check_child_element},
651 $self, $item, $child,
652 $child_nsuri, $child_ln, 1,
653 $content_state, $element_state];
654 push @new_item, {type => 'element', node => $child,
655 parent_state => $content_state,
656 parent_def => $content_def,
657 real_parent_state => $element_state,
658 transparent => 1};
659 } else {
660 if ($item->{parent_def} and # has parent
661 $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
662 if ($el_ln eq 'object') {
663 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
664 #
665 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
666 #
667 } else {
668 $content_def = $item->{parent_def} || $content_def;
669 $content_state = $item->{parent_state} || $content_state;
670 }
671 } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
672 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
673 #
674 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
675 $element_state->{has_source} = 1;
676 } else {
677 $content_def = $item->{parent_def} || $content_def;
678 $content_state = $item->{parent_state} || $content_state;
679 }
680 }
681 }
682
683 push @new_item, [$content_def->{check_child_element},
684 $self, $item, $child,
685 $child_nsuri, $child_ln,
686 $HTMLSemiTransparentElements
687 ->{$child_nsuri}->{$child_ln},
688 $content_state, $element_state];
689 push @new_item, {type => 'element', node => $child,
690 parent_def => $content_def,
691 real_parent_state => $element_state,
692 parent_state => $content_state};
693 }
694
695 if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
696 $element_state->{has_significant} = 1;
697 }
698 } elsif ($child_nt == 3 or # TEXT_NODE
699 $child_nt == 4) { # CDATA_SECTION_NODE
700 my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
701 push @new_item, [$content_def->{check_child_text},
702 $self, $item, $child, $has_significant,
703 $content_state, $element_state];
704 $element_state->{has_significant} ||= $has_significant;
705 if ($has_significant and
706 $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
707 $content_def = $item->{parent_def} || $content_def;
708 }
709 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
710 push @child, @{$child->child_nodes};
711 }
712 ## TODO: PI_NODE
713 ## TODO: Unknown node type
714 }
715
716 push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
717
718 unshift @item, @new_item;
719 } else {
720 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
721 }
722 }
723
724 for (@{$self->{template}}) {
725 ## TODO: If the document is an XML document, ...
726 ## NOTE: If the document is an HTML document:
727 ## ISSUE: We need to percent-decode?
728 F: {
729 if ($self->{id}->{$_->[0]}) {
730 my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
731 if ($el->node_type == 1 and # ELEMENT_NODE
732 $el->manakai_local_name eq 'datatemplate') {
733 my $nsuri = $el->namespace_uri;
734 if (defined $nsuri and $nsuri eq $HTML_NS) {
735 if ($el eq $_->[1]->owner_element) {
736 $self->{onerror}->(node => $_->[1],
737 type => 'fragment points itself',
738 level => $self->{level}->{must});
739 }
740
741 last F;
742 }
743 }
744 }
745 ## TODO: Should we raise a "fragment points nothing" error instead
746 ## if the fragment identifier identifies no element?
747
748 $self->{onerror}->(node => $_->[1], type => 'template:not template',
749 level => $self->{level}->{must});
750 } # F
751 }
752
753 for (@{$self->{ref}}) {
754 ## TOOD: If XML
755 ## NOTE: If it is an HTML document:
756 if ($_->[0] eq '') {
757 ## NOTE: It points the top of the document.
758 } elsif ($self->{id}->{$_->[0]}) {
759 if ($self->{id}->{$_->[0]}->[0]->owner_element
760 eq $_->[1]->owner_element) {
761 $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
762 level => $self->{level}->{must});
763 }
764 } else {
765 $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
766 level => $self->{level}->{must});
767 }
768 }
769
770 ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
771
772 for (@{$self->{usemap}}) {
773 unless ($self->{map}->{$_->[0]}) {
774 $self->{onerror}->(node => $_->[1], type => 'no referenced map',
775 level => $self->{level}->{must});
776 }
777 }
778
779 for (@{$self->{contextmenu}}) {
780 unless ($self->{menu}->{$_->[0]}) {
781 $self->{onerror}->(node => $_->[1], type => 'no referenced menu',
782 level => $self->{level}->{must});
783 }
784 }
785
786 delete $self->{plus_elements};
787 delete $self->{minus_elements};
788 delete $self->{onerror};
789 delete $self->{id};
790 delete $self->{usemap};
791 delete $self->{ref};
792 delete $self->{template};
793 delete $self->{map};
794 return $self->{return};
795 } # check_element
796
797 sub _add_minus_elements ($$@) {
798 my $self = shift;
799 my $element_state = shift;
800 for my $elements (@_) {
801 for my $nsuri (keys %$elements) {
802 for my $ln (keys %{$elements->{$nsuri}}) {
803 unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
804 $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
805 $self->{minus_elements}->{$nsuri}->{$ln} = 1;
806 }
807 }
808 }
809 }
810 } # _add_minus_elements
811
812 sub _remove_minus_elements ($$) {
813 my $self = shift;
814 my $element_state = shift;
815 for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
816 for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
817 delete $self->{minus_elements}->{$nsuri}->{$ln};
818 }
819 }
820 } # _remove_minus_elements
821
822 sub _add_plus_elements ($$@) {
823 my $self = shift;
824 my $element_state = shift;
825 for my $elements (@_) {
826 for my $nsuri (keys %$elements) {
827 for my $ln (keys %{$elements->{$nsuri}}) {
828 unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
829 $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
830 $self->{plus_elements}->{$nsuri}->{$ln} = 1;
831 }
832 }
833 }
834 }
835 } # _add_plus_elements
836
837 sub _remove_plus_elements ($$) {
838 my $self = shift;
839 my $element_state = shift;
840 for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
841 for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
842 delete $self->{plus_elements}->{$nsuri}->{$ln};
843 }
844 }
845 } # _remove_plus_elements
846
847 sub _attr_status_info ($$$) {
848 my ($self, $attr, $status_code) = @_;
849
850 if (not ($status_code & FEATURE_ALLOWED)) {
851 $self->{onerror}->(node => $attr,
852 type => 'attribute not defined',
853 level => $self->{level}->{must});
854 } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
855 $self->{onerror}->(node => $attr,
856 type => 'deprecated:attr',
857 level => $self->{level}->{should});
858 } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
859 $self->{onerror}->(node => $attr,
860 type => 'deprecated:attr',
861 level => $self->{level}->{info});
862 }
863
864 my $status;
865 if ($status_code & FEATURE_STATUS_REC) {
866 return;
867 } elsif ($status_code & FEATURE_STATUS_CR) {
868 $status = 'cr';
869 } elsif ($status_code & FEATURE_STATUS_LC) {
870 $status = 'lc';
871 } elsif ($status_code & FEATURE_STATUS_WD) {
872 $status = 'wd';
873 } else {
874 $status = 'non-standard';
875 }
876 $self->{onerror}->(node => $attr,
877 type => 'status:'.$status.':attr',
878 level => $self->{level}->{info});
879 } # _attr_status_info
880
881 sub _add_minuses ($@) {
882 my $self = shift;
883 my $r = {};
884 for my $list (@_) {
885 for my $ns (keys %$list) {
886 for my $ln (keys %{$list->{$ns}}) {
887 unless ($self->{minuses}->{$ns}->{$ln}) {
888 $self->{minuses}->{$ns}->{$ln} = 1;
889 $r->{$ns}->{$ln} = 1;
890 }
891 }
892 }
893 }
894 return {type => 'plus', list => $r};
895 } # _add_minuses
896
897 sub _add_pluses ($@) {
898 my $self = shift;
899 my $r = {};
900 for my $list (@_) {
901 for my $ns (keys %$list) {
902 for my $ln (keys %{$list->{$ns}}) {
903 unless ($self->{pluses}->{$ns}->{$ln}) {
904 $self->{pluses}->{$ns}->{$ln} = 1;
905 $r->{$ns}->{$ln} = 1;
906 }
907 }
908 }
909 }
910 return {type => 'minus', list => $r};
911 } # _add_pluses
912
913 sub _remove_minuses ($$) {
914 my ($self, $todo) = @_;
915 if ($todo->{type} eq 'minus') {
916 for my $ns (keys %{$todo->{list}}) {
917 for my $ln (keys %{$todo->{list}->{$ns}}) {
918 delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
919 }
920 }
921 } elsif ($todo->{type} eq 'plus') {
922 for my $ns (keys %{$todo->{list}}) {
923 for my $ln (keys %{$todo->{list}->{$ns}}) {
924 delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
925 }
926 }
927 } else {
928 die "$0: Unknown +- type: $todo->{type}";
929 }
930 1;
931 } # _remove_minuses
932
933 ## NOTE: Priority for "minuses" and "pluses" are currently left
934 ## undefined and implemented inconsistently; it is not a problem for
935 ## now, since no element belongs to both lists.
936
937 sub _check_get_children ($$$) {
938 my ($self, $node, $parent_todo) = @_;
939 my $new_todos = [];
940 my $sib = [];
941 TP: {
942 my $node_ns = $node->namespace_uri;
943 $node_ns = '' unless defined $node_ns;
944 my $node_ln = $node->manakai_local_name;
945 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
946 if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
947 if ($parent_todo->{flag}->{in_head}) {
948 #
949 } else {
950 my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
951 push @$sib, $end;
952
953 unshift @$sib, @{$node->child_nodes};
954 push @$new_todos, {type => 'element-attributes', node => $node};
955 last TP;
956 }
957 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
958 my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
959 unshift @$sib, @{$node->child_nodes};
960 push @$new_todos, {type => 'element-attributes', node => $node};
961 push @$new_todos,
962 {type => 'code',
963 code => sub {
964 $parent_todo->{flag}->{has_descendant}->{significant} = 0
965 if not $sig_flag;
966 }};
967 last TP;
968 } else {
969 unshift @$sib, @{$node->child_nodes};
970 push @$new_todos, {type => 'element-attributes', node => $node};
971 last TP;
972 }
973 }
974 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
975 if ($node->has_attribute_ns (undef, 'src')) {
976 unshift @$sib, @{$node->child_nodes};
977 push @$new_todos, {type => 'element-attributes', node => $node};
978 last TP;
979 } else {
980 my @cn = @{$node->child_nodes};
981 CN: while (@cn) {
982 my $cn = shift @cn;
983 my $cnt = $cn->node_type;
984 if ($cnt == 1) {
985 my $cn_nsuri = $cn->namespace_uri;
986 $cn_nsuri = '' unless defined $cn_nsuri;
987 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
988 #
989 } else {
990 last CN;
991 }
992 } elsif ($cnt == 3 or $cnt == 4) {
993 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
994 last CN;
995 }
996 }
997 } # CN
998 unshift @$sib, @cn;
999 }
1000 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
1001 my @cn = @{$node->child_nodes};
1002 CN: while (@cn) {
1003 my $cn = shift @cn;
1004 my $cnt = $cn->node_type;
1005 if ($cnt == 1) {
1006 my $cn_nsuri = $cn->namespace_uri;
1007 $cn_nsuri = '' unless defined $cn_nsuri;
1008 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
1009 #
1010 } else {
1011 last CN;
1012 }
1013 } elsif ($cnt == 3 or $cnt == 4) {
1014 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
1015 last CN;
1016 }
1017 }
1018 } # CN
1019 unshift @$sib, @cn;
1020 }
1021 push @$new_todos, {type => 'element', node => $node};
1022 } # TP
1023
1024 for my $new_todo (@$new_todos) {
1025 $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
1026 }
1027
1028 return ($sib, $new_todos);
1029 } # _check_get_children
1030
1031 =head1 LICENSE
1032
1033 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1034
1035 This library is free software; you can redistribute it
1036 and/or modify it under the same terms as Perl itself.
1037
1038 =cut
1039
1040 1;
1041 # $Date: 2008/09/18 07:42:57 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24