/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.80 - (show annotations) (download)
Tue May 6 08:59:09 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.79: +5 -3 lines
++ whatpm/t/ChangeLog	6 May 2008 08:59:04 -0000
2008-05-06  Wakaba  <wakaba@suika.fam.cx>

	* content-model-2.dat: Test data for td/@headers are added.

++ whatpm/Whatpm/ChangeLog	6 May 2008 08:57:07 -0000
	* ContentChecker.pm: Noted that those returned in |table| are
	no longer table elements, but table objects returned
	by Whatpm::HTMLTable.

	* HTMLTable.pm (form_table): Return table element node
	as |$table->{element}|.
	(assign_header): Support for the |headers=""| attribute.

2008-05-06  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	6 May 2008 08:58:42 -0000
2008-05-06  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Invoke |Whatpm::HTMLTable->assign_header| for each
	table object.  Return the table object, not table element.
	The |headers=""| checker for |td| elements are now noop.
	Set the status of |headers=""| attribute as HTML5's one.

1 package Whatpm::ContentChecker;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.79 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4
5 require Whatpm::URIChecker;
6
7 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8 ## be applied to an in-memory representation (i.e. DOM)?
9
10 ## TODO: Conformance of an HTML document with non-html root element.
11
12 ## Stability
13 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14 sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15 sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16 sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17
18 ## Deprecated
19 sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20 sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21
22 ## Conformance
23 sub FEATURE_ALLOWED () { 0b10000 }
24
25 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28
29 my $Namespace = {
30 '' => {loaded => 1},
31 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
32 q<http://purl.org/syndication/history/1.0>
33 => {module => 'Whatpm::ContentChecker::Atom'},
34 q<http://purl.org/syndication/threading/1.0>
35 => {module => 'Whatpm::ContentChecker::Atom'},
36 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
37 $XML_NS => {loaded => 1},
38 $XMLNS_NS => {loaded => 1},
39 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
40 };
41
42 sub load_ns_module ($) {
43 my $nsuri = shift; # namespace URI or ''
44 unless ($Namespace->{$nsuri}->{loaded}) {
45 if ($Namespace->{$nsuri}->{module}) {
46 eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
47 } else {
48 $Namespace->{$nsuri}->{loaded} = 1;
49 }
50 }
51 } # load_ns_module
52
53 our $AttrChecker = {
54 $XML_NS => {
55 space => sub {
56 my ($self, $attr) = @_;
57 my $value = $attr->value;
58 if ($value eq 'default' or $value eq 'preserve') {
59 #
60 } else {
61 ## NOTE: An XML "error"
62 $self->{onerror}->(node => $attr, level => 'error',
63 type => 'invalid attribute value');
64 }
65 },
66 lang => sub {
67 my ($self, $attr) = @_;
68 my $value = $attr->value;
69 if ($value eq '') {
70 #
71 } else {
72 require Whatpm::LangTag;
73 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
74 my %opt = @_;
75 my $type = 'LangTag:'.$opt{type};
76 $type .= ':' . $opt{subtag} if defined $opt{subtag};
77 $self->{onerror}->(node => $attr, type => $type,
78 value => $opt{value}, level => $opt{level});
79 });
80 }
81
82 ## NOTE: "The values of the attribute are language identifiers
83 ## as defined by [IETF RFC 3066], Tags for the Identification
84 ## of Languages, or its successor; in addition, the empty string
85 ## may be specified." ("may" in lower case)
86 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
87 ## allowed today?
88
89 ## TODO: test data
90
91 if ($attr->owner_document->manakai_is_html) { # MUST NOT
92 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
93 ## TODO: Test data...
94 }
95 },
96 base => sub {
97 my ($self, $attr) = @_;
98 my $value = $attr->value;
99 if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
100 $self->{onerror}->(node => $attr,
101 type => 'invalid attribute value');
102 }
103 ## NOTE: Conformance to URI standard is not checked since there is
104 ## no author requirement on conformance in the XML Base specification.
105 },
106 id => sub {
107 my ($self, $attr) = @_;
108 my $value = $attr->value;
109 $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
110 $value =~ s/^\x20//;
111 $value =~ s/\x20$//;
112 ## TODO: NCName in XML 1.0 or 1.1
113 ## TODO: declared type is ID?
114 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
115 $self->{onerror}->(node => $attr, level => 'error',
116 type => 'duplicate ID');
117 push @{$self->{id}->{$value}}, $attr;
118 } else {
119 $self->{id}->{$value} = [$attr];
120 }
121 },
122 },
123 $XMLNS_NS => {
124 '' => sub {
125 my ($self, $attr) = @_;
126 my $ln = $attr->manakai_local_name;
127 my $value = $attr->value;
128 if ($value eq $XML_NS and $ln ne 'xml') {
129 $self->{onerror}
130 ->(node => $attr, level => 'NC',
131 type => 'Reserved Prefixes and Namespace Names:=xml');
132 } elsif ($value eq $XMLNS_NS) {
133 $self->{onerror}
134 ->(node => $attr, level => 'NC',
135 type => 'Reserved Prefixes and Namespace Names:=xmlns');
136 }
137 if ($ln eq 'xml' and $value ne $XML_NS) {
138 $self->{onerror}
139 ->(node => $attr, level => 'NC',
140 type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
141 } elsif ($ln eq 'xmlns') {
142 $self->{onerror}
143 ->(node => $attr, level => 'NC',
144 type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
145 }
146 ## TODO: If XML 1.0 and empty
147 },
148 xmlns => sub {
149 my ($self, $attr) = @_;
150 ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
151 ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
152 ## TODO: relative references are deprecated
153 my $value = $attr->value;
154 if ($value eq $XML_NS) {
155 $self->{onerror}
156 ->(node => $attr, level => 'NC',
157 type => 'Reserved Prefixes and Namespace Names:=xml');
158 } elsif ($value eq $XMLNS_NS) {
159 $self->{onerror}
160 ->(node => $attr, level => 'NC',
161 type => 'Reserved Prefixes and Namespace Names:=xmlns');
162 }
163 },
164 },
165 };
166
167 ## ISSUE: Should we really allow these attributes?
168 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
169 $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
170 $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
171 $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
172
173 our $AttrStatus;
174
175 for (qw/space lang base id/) {
176 $AttrStatus->{$XML_NS}->{$_} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
177 $AttrStatus->{''}->{"xml:$_"} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
178 ## XML 1.0: FEATURE_STATUS_CR
179 ## XML 1.1: FEATURE_STATUS_REC
180 ## XML Namespaces 1.0: FEATURE_STATUS_CR
181 ## XML Namespaces 1.1: FEATURE_STATUS_REC
182 ## XML Base: FEATURE_STATUS_REC
183 ## xml:id: FEATURE_STATUS_REC
184 }
185
186 $AttrStatus->{$XMLNS_NS}->{''} = FEATURE_STATUS_REC | FEATURE_ALLOWED;
187
188 ## TODO: xsi:schemaLocation for XHTML2 support (very, very low priority)
189
190 our %AnyChecker = (
191 check_start => sub { },
192 check_attrs => sub {
193 my ($self, $item, $element_state) = @_;
194 for my $attr (@{$item->{node}->attributes}) {
195 my $attr_ns = $attr->namespace_uri;
196 $attr_ns = '' unless defined $attr_ns;
197 my $attr_ln = $attr->manakai_local_name;
198
199 load_ns_module ($attr_ns);
200
201 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
202 || $AttrChecker->{$attr_ns}->{''};
203 my $status = $AttrStatus->{$attr_ns}->{$attr_ln}
204 || $AttrStatus->{$attr_ns}->{''};
205 if (not defined $status) {
206 $status = FEATURE_ALLOWED;
207 ## NOTE: FEATURE_ALLOWED for all attributes, since the element
208 ## is not supported and therefore "attribute not defined" error
209 ## should not raised (too verbose) and global attributes should be
210 ## allowed anyway (if a global attribute has its specified creteria
211 ## for where it may be specified, then it should be checked in it's
212 ## checker function).
213 }
214 if ($checker) {
215 $checker->($self, $attr);
216 } else {
217 $self->{onerror}->(node => $attr, level => 'unsupported',
218 type => 'attribute');
219 }
220 $self->_attr_status_info ($attr, $status);
221 }
222 },
223 check_child_element => sub {
224 my ($self, $item, $child_el, $child_nsuri, $child_ln,
225 $child_is_transparent, $element_state) = @_;
226 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
227 $self->{onerror}->(node => $child_el,
228 type => 'element not allowed:minus',
229 level => $self->{must_level});
230 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
231 #
232 } else {
233 #
234 }
235 },
236 check_child_text => sub { },
237 check_end => sub {
238 my ($self, $item, $element_state) = @_;
239 if ($element_state->{has_significant}) {
240 $item->{real_parent_state}->{has_significant} = 1;
241 }
242 },
243 );
244
245 our $ElementDefault = {
246 %AnyChecker,
247 status => FEATURE_ALLOWED,
248 ## NOTE: No "element not defined" error - it is not supported anyway.
249 check_start => sub {
250 my ($self, $item, $element_state) = @_;
251 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
252 type => 'element');
253 },
254 };
255
256 our $HTMLEmbeddedContent = {
257 ## NOTE: All embedded content is also phrasing content.
258 $HTML_NS => {
259 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
260 canvas => 1,
261 },
262 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
263 q<http://www.w3.org/2000/svg> => {svg => 1},
264 ## NOTE: Foreign elements with content (but no metadata) are
265 ## embedded content.
266 };
267
268 my $HTMLTransparentElements = {
269 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
270 ## NOTE: |html:noscript| is transparent if scripting is disabled
271 ## and not in |head|.
272 };
273
274 my $HTMLSemiTransparentElements = {
275 $HTML_NS => {object => 1, video => 1, audio => 1},
276 };
277
278 our $Element = {};
279
280 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
281 %AnyChecker,
282 status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
283 is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
284 check_start => sub {
285 my ($self, $item, $element_state) = @_;
286 my $triple = [];
287 push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
288 require Whatpm::RDFXML;
289 my $rdf = Whatpm::RDFXML->new;
290 ## TODO: Should we make bnodeid unique in a document?
291 $rdf->{onerror} = $self->{onerror};
292 $rdf->{ontriple} = sub {
293 my %opt = @_;
294 push @$triple,
295 [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
296 if (defined $opt{id}) {
297 push @$triple,
298 [$opt{node},
299 $opt{id},
300 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
301 $opt{subject}];
302 push @$triple,
303 [$opt{node},
304 $opt{id},
305 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
306 $opt{predicate}];
307 push @$triple,
308 [$opt{node},
309 $opt{id},
310 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
311 $opt{object}];
312 push @$triple,
313 [$opt{node},
314 $opt{id},
315 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
316 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
317 }
318 };
319 $rdf->convert_rdf_element ($item->{node});
320 },
321 };
322
323 sub check_document ($$$;$) {
324 my ($self, $doc, $onerror, $onsubdoc) = @_;
325 $self = bless {}, $self unless ref $self;
326 $self->{onerror} = $onerror;
327 $self->{onsubdoc} = $onsubdoc || sub {
328 warn "A subdocument is not conformance-checked";
329 };
330
331 $self->{must_level} = 'm';
332 $self->{fact_level} = 'm';
333 $self->{should_level} = 's';
334 $self->{good_level} = 'w';
335 $self->{info_level} = 'i';
336 $self->{unsupported_level} = 'u';
337
338 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
339
340 my $docel = $doc->document_element;
341 unless (defined $docel) {
342 ## ISSUE: Should we check content of Document node?
343 $onerror->(node => $doc, type => 'no document element');
344 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
345 return {
346 class => {},
347 id => {}, table => [], term => {},
348 };
349 }
350
351 ## ISSUE: Unexpanded entity references and HTML5 conformance
352
353 my $docel_nsuri = $docel->namespace_uri;
354 $docel_nsuri = '' unless defined $docel_nsuri;
355 load_ns_module ($docel_nsuri);
356 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
357 $Element->{$docel_nsuri}->{''} ||
358 $ElementDefault;
359 if ($docel_def->{is_root}) {
360 #
361 } elsif ($docel_def->{is_xml_root}) {
362 unless ($doc->manakai_is_html) {
363 #
364 } else {
365 $onerror->(node => $docel, type => 'element not allowed:root:xml');
366 }
367 } else {
368 $onerror->(node => $docel, type => 'element not allowed:root');
369 }
370
371 ## TODO: Check for other items other than document element
372 ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
373
374 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
375
376 ## TODO: Test for these checks are necessary.
377 my $charset_name = $doc->input_encoding;
378 if (defined $charset_name) {
379 require Message::Charset::Info;
380 my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
381
382 if ($doc->manakai_is_html) {
383 if (not $doc->manakai_has_bom and
384 not defined $doc->manakai_charset) {
385 unless ($charset->{is_html_ascii_superset}) {
386 $onerror->(node => $doc, level => $self->{must_level},
387 type => 'non ascii superset:'.$charset_name);
388 }
389
390 if (not $self->{has_charset} and ## TODO: This does not work now.
391 not $charset->{iana_names}->{'us-ascii'}) {
392 $onerror->(node => $doc, level => $self->{must_level},
393 type => 'no character encoding declaration:'.$charset_name);
394 }
395 }
396
397 if ($charset->{iana_names}->{'utf-8'}) {
398 #
399 } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
400 $charset->{iana_names}->{'x-jis0208'} or
401 $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
402 $charset->{is_ebcdic_based}) {
403 $onerror->(node => $doc,
404 type => 'character encoding:'.$charset_name,
405 level => $self->{should_level});
406 } elsif ($charset->{iana_names}->{'cesu-8'} or
407 $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
408 $charset->{iana_names}->{'bocu-1'} or
409 $charset->{iana_names}->{'scsu'}) {
410 $onerror->(node => $doc,
411 type => 'character encoding:'.$charset_name,
412 level => $self->{must_level});
413 } else {
414 $onerror->(node => $doc,
415 type => 'character encoding:'.$charset_name,
416 level => $self->{good_level});
417 }
418 }
419 } elsif ($doc->manakai_is_html) {
420 ## NOTE: MUST and SHOULD requirements above cannot be tested,
421 ## since the document has no input charset encoding information.
422 $onerror->(node => $doc,
423 type => 'character encoding:',
424 level => 'unsupported');
425 }
426
427 return $return;
428 } # check_document
429
430 sub check_element ($$$;$) {
431 my ($self, $el, $onerror, $onsubdoc) = @_;
432 $self = bless {}, $self unless ref $self;
433 $self->{onerror} = $onerror;
434 $self->{onsubdoc} = $onsubdoc || sub {
435 warn "A subdocument is not conformance-checked";
436 };
437
438 $self->{must_level} = 'm';
439 $self->{fact_level} = 'm';
440 $self->{should_level} = 's';
441 $self->{good_level} = 'w';
442 $self->{info_level} = 'i';
443 $self->{unsupported_level} = 'u';
444
445 $self->{plus_elements} = {};
446 $self->{minus_elements} = {};
447 $self->{id} = {};
448 $self->{term} = {};
449 $self->{usemap} = [];
450 $self->{ref} = []; # datetemplate data references
451 $self->{template} = []; # datatemplate template references
452 $self->{contextmenu} = [];
453 $self->{map} = {};
454 $self->{menu} = {};
455 $self->{has_link_type} = {};
456 $self->{flag} = {};
457 #$self->{has_uri_attr};
458 #$self->{has_hyperlink_element};
459 #$self->{has_charset};
460 #$self->{has_base};
461 $self->{return} = {
462 class => {},
463 id => $self->{id},
464 table => [], # table objects returned by Whatpm::HTMLTable
465 term => $self->{term},
466 uri => {}, # URIs other than those in RDF triples
467 ## TODO: xmlns="", SYSTEM "", atom:* src="", xml:base=""
468 rdf => [],
469 };
470
471 my @item = ({type => 'element', node => $el, parent_state => {}});
472 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
473 while (@item) {
474 my $item = shift @item;
475 if (ref $item eq 'ARRAY') {
476 my $code = shift @$item;
477 next unless $code;## TODO: temp.
478 $code->(@$item);
479 } elsif ($item->{type} eq 'element') {
480 my $el_nsuri = $item->{node}->namespace_uri;
481 $el_nsuri = '' unless defined $el_nsuri;
482 my $el_ln = $item->{node}->manakai_local_name;
483
484 load_ns_module ($el_nsuri);
485
486 my $element_state = {};
487 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
488 $Element->{$el_nsuri}->{''} ||
489 $ElementDefault;
490 my $content_def = $item->{transparent}
491 ? $item->{parent_def} || $eldef : $eldef;
492 my $content_state = $item->{transparent}
493 ? $item->{parent_def}
494 ? $item->{parent_state} || $element_state : $element_state
495 : $element_state;
496
497 unless ($eldef->{status} & FEATURE_STATUS_REC) {
498 my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
499 $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
500 $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
501 $self->{onerror}->(node => $item->{node},
502 type => 'status:'.$status.':element',
503 level => $self->{info_level});
504 }
505 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
506 $self->{onerror}->(node => $item->{node},
507 type => 'element not defined',
508 level => $self->{must_level});
509 } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
510 $self->{onerror}->(node => $item->{node},
511 type => 'deprecated:element',
512 level => $self->{should_level});
513 } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
514 $self->{onerror}->(node => $item->{node},
515 type => 'deprecated:element',
516 level => $self->{info_level});
517 }
518
519 my @new_item;
520 push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
521 push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
522
523 my @child = @{$item->{node}->child_nodes};
524 while (@child) {
525 my $child = shift @child;
526 my $child_nt = $child->node_type;
527 if ($child_nt == 1) { # ELEMENT_NODE
528 my $child_nsuri = $child->namespace_uri;
529 $child_nsuri = '' unless defined $child_nsuri;
530 my $child_ln = $child->manakai_local_name;
531 if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
532 not (($self->{flag}->{in_head} or
533 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
534 $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
535 push @new_item, [$content_def->{check_child_element},
536 $self, $item, $child,
537 $child_nsuri, $child_ln, 1,
538 $content_state, $element_state];
539 push @new_item, {type => 'element', node => $child,
540 parent_state => $content_state,
541 parent_def => $content_def,
542 real_parent_state => $element_state,
543 transparent => 1};
544 } else {
545 if ($item->{parent_def} and # has parent
546 $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
547 if ($el_ln eq 'object') {
548 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
549 #
550 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
551 #
552 } else {
553 $content_def = $item->{parent_def} || $content_def;
554 $content_state = $item->{parent_state} || $content_state;
555 }
556 } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
557 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
558 #
559 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
560 $element_state->{has_source} = 1;
561 } else {
562 $content_def = $item->{parent_def} || $content_def;
563 $content_state = $item->{parent_state} || $content_state;
564 }
565 }
566 }
567
568 push @new_item, [$content_def->{check_child_element},
569 $self, $item, $child,
570 $child_nsuri, $child_ln,
571 $HTMLSemiTransparentElements
572 ->{$child_nsuri}->{$child_ln},
573 $content_state, $element_state];
574 push @new_item, {type => 'element', node => $child,
575 parent_def => $content_def,
576 real_parent_state => $element_state,
577 parent_state => $content_state};
578 }
579
580 if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
581 $element_state->{has_significant} = 1;
582 }
583 } elsif ($child_nt == 3 or # TEXT_NODE
584 $child_nt == 4) { # CDATA_SECTION_NODE
585 my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
586 push @new_item, [$content_def->{check_child_text},
587 $self, $item, $child, $has_significant,
588 $content_state, $element_state];
589 $element_state->{has_significant} ||= $has_significant;
590 if ($has_significant and
591 $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
592 $content_def = $item->{parent_def} || $content_def;
593 }
594 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
595 push @child, @{$child->child_nodes};
596 }
597 ## TODO: PI_NODE
598 ## TODO: Unknown node type
599 }
600
601 push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
602
603 unshift @item, @new_item;
604 } else {
605 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
606 }
607 }
608
609 for (@{$self->{template}}) {
610 ## TODO: If the document is an XML document, ...
611 ## NOTE: If the document is an HTML document:
612 ## ISSUE: We need to percent-decode?
613 F: {
614 if ($self->{id}->{$_->[0]}) {
615 my $el = $self->{id}->{$_->[0]}->[0]->owner_element;
616 if ($el->node_type == 1 and # ELEMENT_NODE
617 $el->manakai_local_name eq 'datatemplate') {
618 my $nsuri = $el->namespace_uri;
619 if (defined $nsuri and $nsuri eq $HTML_NS) {
620 if ($el eq $_->[1]->owner_element) {
621 $self->{onerror}->(node => $_->[1],
622 type => 'fragment points itself',
623 level => $self->{must_level});
624 }
625
626 last F;
627 }
628 }
629 }
630 ## TODO: Should we raise a "fragment points nothing" error instead
631 ## if the fragment identifier identifies no element?
632
633 $self->{onerror}->(node => $_->[1], type => 'template:not template',
634 level => $self->{must_level});
635 } # F
636 }
637
638 for (@{$self->{ref}}) {
639 ## TOOD: If XML
640 ## NOTE: If it is an HTML document:
641 if ($_->[0] eq '') {
642 ## NOTE: It points the top of the document.
643 } elsif ($self->{id}->{$_->[0]}) {
644 if ($self->{id}->{$_->[0]}->[0]->owner_element
645 eq $_->[1]->owner_element) {
646 $self->{onerror}->(node => $_->[1], type => 'fragment points itself',
647 level => $self->{must_level});
648 }
649 } else {
650 $self->{onerror}->(node => $_->[1], type => 'fragment points nothing',
651 level => $self->{must_level});
652 }
653 }
654
655 ## TODO: Maybe we should have $document->manakai_get_by_fragment or something
656
657 for (@{$self->{usemap}}) {
658 unless ($self->{map}->{$_->[0]}) {
659 $self->{onerror}->(node => $_->[1], type => 'no referenced map');
660 }
661 }
662
663 for (@{$self->{contextmenu}}) {
664 unless ($self->{menu}->{$_->[0]}) {
665 $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
666 }
667 }
668
669 delete $self->{plus_elements};
670 delete $self->{minus_elements};
671 delete $self->{onerror};
672 delete $self->{id};
673 delete $self->{usemap};
674 delete $self->{ref};
675 delete $self->{template};
676 delete $self->{map};
677 return $self->{return};
678 } # check_element
679
680 sub _add_minus_elements ($$@) {
681 my $self = shift;
682 my $element_state = shift;
683 for my $elements (@_) {
684 for my $nsuri (keys %$elements) {
685 for my $ln (keys %{$elements->{$nsuri}}) {
686 unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
687 $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
688 $self->{minus_elements}->{$nsuri}->{$ln} = 1;
689 }
690 }
691 }
692 }
693 } # _add_minus_elements
694
695 sub _remove_minus_elements ($$) {
696 my $self = shift;
697 my $element_state = shift;
698 for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
699 for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
700 delete $self->{minus_elements}->{$nsuri}->{$ln};
701 }
702 }
703 } # _remove_minus_elements
704
705 sub _add_plus_elements ($$@) {
706 my $self = shift;
707 my $element_state = shift;
708 for my $elements (@_) {
709 for my $nsuri (keys %$elements) {
710 for my $ln (keys %{$elements->{$nsuri}}) {
711 unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
712 $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
713 $self->{plus_elements}->{$nsuri}->{$ln} = 1;
714 }
715 }
716 }
717 }
718 } # _add_plus_elements
719
720 sub _remove_plus_elements ($$) {
721 my $self = shift;
722 my $element_state = shift;
723 for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
724 for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
725 delete $self->{plus_elements}->{$nsuri}->{$ln};
726 }
727 }
728 } # _remove_plus_elements
729
730 sub _attr_status_info ($$$) {
731 my ($self, $attr, $status_code) = @_;
732
733 if (not ($status_code & FEATURE_ALLOWED)) {
734 $self->{onerror}->(node => $attr,
735 type => 'attribute not defined',
736 level => $self->{must_level});
737 } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
738 $self->{onerror}->(node => $attr,
739 type => 'deprecated:attr',
740 level => $self->{should_level});
741 } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
742 $self->{onerror}->(node => $attr,
743 type => 'deprecated:attr',
744 level => $self->{info_level});
745 }
746
747 my $status;
748 if ($status_code & FEATURE_STATUS_REC) {
749 return;
750 } elsif ($status_code & FEATURE_STATUS_CR) {
751 $status = 'cr';
752 } elsif ($status_code & FEATURE_STATUS_LC) {
753 $status = 'lc';
754 } elsif ($status_code & FEATURE_STATUS_WD) {
755 $status = 'wd';
756 } else {
757 $status = 'non-standard';
758 }
759 $self->{onerror}->(node => $attr,
760 type => 'status:'.$status.':attr',
761 level => $self->{info_level});
762 } # _attr_status_info
763
764 sub _add_minuses ($@) {
765 my $self = shift;
766 my $r = {};
767 for my $list (@_) {
768 for my $ns (keys %$list) {
769 for my $ln (keys %{$list->{$ns}}) {
770 unless ($self->{minuses}->{$ns}->{$ln}) {
771 $self->{minuses}->{$ns}->{$ln} = 1;
772 $r->{$ns}->{$ln} = 1;
773 }
774 }
775 }
776 }
777 return {type => 'plus', list => $r};
778 } # _add_minuses
779
780 sub _add_pluses ($@) {
781 my $self = shift;
782 my $r = {};
783 for my $list (@_) {
784 for my $ns (keys %$list) {
785 for my $ln (keys %{$list->{$ns}}) {
786 unless ($self->{pluses}->{$ns}->{$ln}) {
787 $self->{pluses}->{$ns}->{$ln} = 1;
788 $r->{$ns}->{$ln} = 1;
789 }
790 }
791 }
792 }
793 return {type => 'minus', list => $r};
794 } # _add_pluses
795
796 sub _remove_minuses ($$) {
797 my ($self, $todo) = @_;
798 if ($todo->{type} eq 'minus') {
799 for my $ns (keys %{$todo->{list}}) {
800 for my $ln (keys %{$todo->{list}->{$ns}}) {
801 delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
802 }
803 }
804 } elsif ($todo->{type} eq 'plus') {
805 for my $ns (keys %{$todo->{list}}) {
806 for my $ln (keys %{$todo->{list}->{$ns}}) {
807 delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
808 }
809 }
810 } else {
811 die "$0: Unknown +- type: $todo->{type}";
812 }
813 1;
814 } # _remove_minuses
815
816 ## NOTE: Priority for "minuses" and "pluses" are currently left
817 ## undefined and implemented inconsistently; it is not a problem for
818 ## now, since no element belongs to both lists.
819
820 sub _check_get_children ($$$) {
821 my ($self, $node, $parent_todo) = @_;
822 my $new_todos = [];
823 my $sib = [];
824 TP: {
825 my $node_ns = $node->namespace_uri;
826 $node_ns = '' unless defined $node_ns;
827 my $node_ln = $node->manakai_local_name;
828 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
829 if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
830 if ($parent_todo->{flag}->{in_head}) {
831 #
832 } else {
833 my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
834 push @$sib, $end;
835
836 unshift @$sib, @{$node->child_nodes};
837 push @$new_todos, {type => 'element-attributes', node => $node};
838 last TP;
839 }
840 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
841 my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
842 unshift @$sib, @{$node->child_nodes};
843 push @$new_todos, {type => 'element-attributes', node => $node};
844 push @$new_todos,
845 {type => 'code',
846 code => sub {
847 $parent_todo->{flag}->{has_descendant}->{significant} = 0
848 if not $sig_flag;
849 }};
850 last TP;
851 } else {
852 unshift @$sib, @{$node->child_nodes};
853 push @$new_todos, {type => 'element-attributes', node => $node};
854 last TP;
855 }
856 }
857 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
858 if ($node->has_attribute_ns (undef, 'src')) {
859 unshift @$sib, @{$node->child_nodes};
860 push @$new_todos, {type => 'element-attributes', node => $node};
861 last TP;
862 } else {
863 my @cn = @{$node->child_nodes};
864 CN: while (@cn) {
865 my $cn = shift @cn;
866 my $cnt = $cn->node_type;
867 if ($cnt == 1) {
868 my $cn_nsuri = $cn->namespace_uri;
869 $cn_nsuri = '' unless defined $cn_nsuri;
870 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
871 #
872 } else {
873 last CN;
874 }
875 } elsif ($cnt == 3 or $cnt == 4) {
876 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
877 last CN;
878 }
879 }
880 } # CN
881 unshift @$sib, @cn;
882 }
883 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
884 my @cn = @{$node->child_nodes};
885 CN: while (@cn) {
886 my $cn = shift @cn;
887 my $cnt = $cn->node_type;
888 if ($cnt == 1) {
889 my $cn_nsuri = $cn->namespace_uri;
890 $cn_nsuri = '' unless defined $cn_nsuri;
891 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
892 #
893 } else {
894 last CN;
895 }
896 } elsif ($cnt == 3 or $cnt == 4) {
897 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
898 last CN;
899 }
900 }
901 } # CN
902 unshift @$sib, @cn;
903 }
904 push @$new_todos, {type => 'element', node => $node};
905 } # TP
906
907 for my $new_todo (@$new_todos) {
908 $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
909 }
910
911 return ($sib, $new_todos);
912 } # _check_get_children
913
914 =head1 LICENSE
915
916 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
917
918 This library is free software; you can redistribute it
919 and/or modify it under the same terms as Perl itself.
920
921 =cut
922
923 1;
924 # $Date: 2008/05/03 14:42:26 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24