/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.75 - (show annotations) (download)
Fri Mar 21 09:44:57 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.74: +3 -2 lines
++ whatpm/Whatpm/ChangeLog	21 Mar 2008 09:44:45 -0000
	* RDFXML.pm: bnodeid implemented.  Relative references
	are now resolved.

2008-03-21  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::ContentChecker;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.74 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4
5 require Whatpm::URIChecker;
6
7 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8 ## be applied to an in-memory representation (i.e. DOM)?
9
10 ## TODO: Conformance of an HTML document with non-html root element.
11
12 ## Stability
13 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
14 sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
15 sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
16 sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
17
18 ## Deprecated
19 sub FEATURE_DEPRECATED_SHOULD () { 0b100000 } ## SHOULD-level
20 sub FEATURE_DEPRECATED_INFO () { 0b1000000 } ## Does not affect conformance
21
22 ## Conformance
23 sub FEATURE_ALLOWED () { 0b10000 }
24
25 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
26 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
27 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
28
29 my $Namespace = {
30 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
31 q<http://purl.org/syndication/history/1.0>
32 => {module => 'Whatpm::ContentChecker::Atom'},
33 q<http://purl.org/syndication/threading/1.0>
34 => {module => 'Whatpm::ContentChecker::Atom'},
35 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
36 $XML_NS => {loaded => 1},
37 $XMLNS_NS => {loaded => 1},
38 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {loaded => 1},
39 };
40
41 our $AttrChecker = {
42 $XML_NS => {
43 space => sub {
44 my ($self, $attr) = @_;
45 my $value = $attr->value;
46 if ($value eq 'default' or $value eq 'preserve') {
47 #
48 } else {
49 ## NOTE: An XML "error"
50 $self->{onerror}->(node => $attr, level => 'error',
51 type => 'invalid attribute value');
52 }
53 },
54 lang => sub {
55 my ($self, $attr) = @_;
56 my $value = $attr->value;
57 if ($value eq '') {
58 #
59 } else {
60 require Whatpm::LangTag;
61 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
62 my %opt = @_;
63 my $type = 'LangTag:'.$opt{type};
64 $type .= ':' . $opt{subtag} if defined $opt{subtag};
65 $self->{onerror}->(node => $attr, type => $type,
66 value => $opt{value}, level => $opt{level});
67 });
68 }
69
70 ## NOTE: "The values of the attribute are language identifiers
71 ## as defined by [IETF RFC 3066], Tags for the Identification
72 ## of Languages, or its successor; in addition, the empty string
73 ## may be specified." ("may" in lower case)
74 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
75 ## allowed today?
76
77 ## TODO: test data
78
79 if ($attr->owner_document->manakai_is_html) { # MUST NOT
80 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
81 ## TODO: Test data...
82 }
83 },
84 base => sub {
85 my ($self, $attr) = @_;
86 my $value = $attr->value;
87 if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
88 $self->{onerror}->(node => $attr,
89 type => 'invalid attribute value');
90 }
91 ## NOTE: Conformance to URI standard is not checked since there is
92 ## no author requirement on conformance in the XML Base specification.
93 },
94 id => sub {
95 my ($self, $attr) = @_;
96 my $value = $attr->value;
97 $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
98 $value =~ s/^\x20//;
99 $value =~ s/\x20$//;
100 ## TODO: NCName in XML 1.0 or 1.1
101 ## TODO: declared type is ID?
102 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
103 $self->{onerror}->(node => $attr, level => 'error',
104 type => 'duplicate ID');
105 push @{$self->{id}->{$value}}, $attr;
106 } else {
107 $self->{id}->{$value} = [$attr];
108 }
109 },
110 },
111 $XMLNS_NS => {
112 '' => sub {
113 my ($self, $attr) = @_;
114 my $ln = $attr->manakai_local_name;
115 my $value = $attr->value;
116 if ($value eq $XML_NS and $ln ne 'xml') {
117 $self->{onerror}
118 ->(node => $attr, level => 'NC',
119 type => 'Reserved Prefixes and Namespace Names:=xml');
120 } elsif ($value eq $XMLNS_NS) {
121 $self->{onerror}
122 ->(node => $attr, level => 'NC',
123 type => 'Reserved Prefixes and Namespace Names:=xmlns');
124 }
125 if ($ln eq 'xml' and $value ne $XML_NS) {
126 $self->{onerror}
127 ->(node => $attr, level => 'NC',
128 type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
129 } elsif ($ln eq 'xmlns') {
130 $self->{onerror}
131 ->(node => $attr, level => 'NC',
132 type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
133 }
134 ## TODO: If XML 1.0 and empty
135 },
136 xmlns => sub {
137 my ($self, $attr) = @_;
138 ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
139 ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
140 ## TODO: relative references are deprecated
141 my $value = $attr->value;
142 if ($value eq $XML_NS) {
143 $self->{onerror}
144 ->(node => $attr, level => 'NC',
145 type => 'Reserved Prefixes and Namespace Names:=xml');
146 } elsif ($value eq $XMLNS_NS) {
147 $self->{onerror}
148 ->(node => $attr, level => 'NC',
149 type => 'Reserved Prefixes and Namespace Names:=xmlns');
150 }
151 },
152 },
153 };
154
155 ## ISSUE: Should we really allow these attributes?
156 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
157 $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
158 $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
159 $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
160
161 our %AnyChecker = (
162 check_start => sub { },
163 check_attrs => sub {
164 my ($self, $item, $element_state) = @_;
165 for my $attr (@{$item->{node}->attributes}) {
166 my $attr_ns = $attr->namespace_uri;
167 $attr_ns = '' unless defined $attr_ns;
168 my $attr_ln = $attr->manakai_local_name;
169 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
170 || $AttrChecker->{$attr_ns}->{''};
171 if ($checker) {
172 $checker->($self, $attr);
173 } else {
174 $self->{onerror}->(node => $attr, level => 'unsupported',
175 type => 'attribute');
176 }
177 }
178 },
179 check_child_element => sub {
180 my ($self, $item, $child_el, $child_nsuri, $child_ln,
181 $child_is_transparent, $element_state) = @_;
182 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
183 $self->{onerror}->(node => $child_el,
184 type => 'element not allowed:minus',
185 level => $self->{must_level});
186 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
187 #
188 } else {
189 #
190 }
191 },
192 check_child_text => sub { },
193 check_end => sub {
194 my ($self, $item, $element_state) = @_;
195 if ($element_state->{has_significant}) {
196 $item->{real_parent_state}->{has_significant} = 1;
197 }
198 },
199 );
200
201 our $ElementDefault = {
202 %AnyChecker,
203 status => FEATURE_ALLOWED,
204 ## NOTE: No "element not defined" error - it is not supported anyway.
205 check_start => sub {
206 my ($self, $item, $element_state) = @_;
207 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
208 type => 'element');
209 },
210 };
211
212 our $HTMLEmbeddedContent = {
213 ## NOTE: All embedded content is also phrasing content.
214 $HTML_NS => {
215 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
216 canvas => 1,
217 },
218 ## NOTE: MathML is mentioned in the HTML5 spec.
219 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
220 ## NOTE: SVG is mentioned in the HTML5 spec.
221 q<http://www.w3.org/2000/svg> => {svg => 1},
222 ## NOTE: Foreign elements with content (but no metadata) are
223 ## embedded content.
224 };
225
226 my $HTMLTransparentElements = {
227 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
228 ## NOTE: |html:noscript| is transparent if scripting is disabled
229 ## and not in |head|.
230 };
231
232 my $HTMLSemiTransparentElements = {
233 $HTML_NS => {object => 1, video => 1, audio => 1},
234 };
235
236 our $Element = {};
237
238 $Element->{q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>}->{RDF} = {
239 %AnyChecker,
240 status => FEATURE_STATUS_REC | FEATURE_ALLOWED,
241 is_root => 1, ## ISSUE: Not explicitly allowed for non application/rdf+xml
242 check_start => sub {
243 my ($self, $item, $element_state) = @_;
244 my $triple = [];
245 push @{$self->{return}->{rdf}}, [$item->{node}, $triple];
246 require Whatpm::RDFXML;
247 my $rdf = Whatpm::RDFXML->new;
248 ## TODO: Should we make bnodeid unique in a document?
249 $rdf->{onerror} = $self->{onerror};
250 $rdf->{ontriple} = sub {
251 my %opt = @_;
252 push @$triple,
253 [$opt{node}, $opt{subject}, $opt{predicate}, $opt{object}];
254 if (defined $opt{id}) {
255 push @$triple,
256 [$opt{node},
257 $opt{id},
258 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#subject>},
259 $opt{subject}];
260 push @$triple,
261 [$opt{node},
262 $opt{id},
263 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate>},
264 $opt{predicate}];
265 push @$triple,
266 [$opt{node},
267 $opt{id},
268 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#object>},
269 $opt{object}];
270 push @$triple,
271 [$opt{node},
272 $opt{id},
273 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>},
274 {uri => q<http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement>}];
275 }
276 };
277 $rdf->convert_rdf_element ($item->{node});
278 },
279 };
280
281 sub check_document ($$$;$) {
282 my ($self, $doc, $onerror, $onsubdoc) = @_;
283 $self = bless {}, $self unless ref $self;
284 $self->{onerror} = $onerror;
285 $self->{onsubdoc} = $onsubdoc || sub {
286 warn "A subdocument is not conformance-checked";
287 };
288
289 $self->{must_level} = 'm';
290 $self->{fact_level} = 'f';
291 $self->{should_level} = 's';
292 $self->{good_level} = 'w';
293 $self->{info_level} = 'i';
294 $self->{unsupported_level} = 'u';
295
296 ## TODO: If application/rdf+xml, RDF/XML mode should be invoked.
297
298 my $docel = $doc->document_element;
299 unless (defined $docel) {
300 ## ISSUE: Should we check content of Document node?
301 $onerror->(node => $doc, type => 'no document element');
302 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
303 return {
304 class => {},
305 id => {}, table => [], term => {},
306 };
307 }
308
309 ## ISSUE: Unexpanded entity references and HTML5 conformance
310
311 my $docel_nsuri = $docel->namespace_uri;
312 $docel_nsuri = '' unless defined $docel_nsuri;
313 unless ($Namespace->{$docel_nsuri}->{loaded}) {
314 if ($Namespace->{$docel_nsuri}->{module}) {
315 eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
316 } else {
317 $Namespace->{$docel_nsuri}->{loaded} = 1;
318 }
319 }
320 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
321 $Element->{$docel_nsuri}->{''} ||
322 $ElementDefault;
323 if ($docel_def->{is_root}) {
324 #
325 } elsif ($docel_def->{is_xml_root}) {
326 unless ($doc->manakai_is_html) {
327 #
328 } else {
329 $onerror->(node => $docel, type => 'element not allowed:root:xml');
330 }
331 } else {
332 $onerror->(node => $docel, type => 'element not allowed:root');
333 }
334
335 ## TODO: Check for other items other than document element
336 ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
337
338 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
339
340 ## TODO: Test for these checks are necessary.
341 my $charset_name = $doc->input_encoding;
342 if (defined $charset_name) {
343 require Message::Charset::Info;
344 my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
345
346 if ($doc->manakai_is_html) {
347 if (not $doc->manakai_has_bom and
348 not defined $doc->manakai_charset) {
349 unless ($charset->{is_html_ascii_superset}) {
350 $onerror->(node => $doc, level => $self->{must_level},
351 type => 'non ascii superset:'.$charset_name);
352 }
353
354 if (not $self->{has_charset} and ## TODO: This does not work now.
355 not $charset->{iana_names}->{'us-ascii'}) {
356 $onerror->(node => $doc, level => $self->{must_level},
357 type => 'no character encoding declaration:'.$charset_name);
358 }
359 }
360
361 if ($charset->{iana_names}->{'utf-8'}) {
362 #
363 } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
364 $charset->{iana_names}->{'x-jis0208'} or
365 $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
366 $charset->{is_ebcdic_based}) {
367 $onerror->(node => $doc,
368 type => 'character encoding:'.$charset_name,
369 level => $self->{should_level});
370 } elsif ($charset->{iana_names}->{'cesu-8'} or
371 $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
372 $charset->{iana_names}->{'bocu-1'} or
373 $charset->{iana_names}->{'scsu'}) {
374 $onerror->(node => $doc,
375 type => 'character encoding:'.$charset_name,
376 level => $self->{must_level});
377 } else {
378 $onerror->(node => $doc,
379 type => 'character encoding:'.$charset_name,
380 level => $self->{good_level});
381 }
382 }
383 } elsif ($doc->manakai_is_html) {
384 ## NOTE: MUST and SHOULD requirements above cannot be tested,
385 ## since the document has no input charset encoding information.
386 $onerror->(node => $doc,
387 type => 'character encoding:',
388 level => 'unsupported');
389 }
390
391 return $return;
392 } # check_document
393
394 sub check_element ($$$;$) {
395 my ($self, $el, $onerror, $onsubdoc) = @_;
396 $self = bless {}, $self unless ref $self;
397 $self->{onerror} = $onerror;
398 $self->{onsubdoc} = $onsubdoc || sub {
399 warn "A subdocument is not conformance-checked";
400 };
401
402 $self->{must_level} = 'm';
403 $self->{fact_level} = 'f';
404 $self->{should_level} = 's';
405 $self->{good_level} = 'w';
406 $self->{info_level} = 'i';
407 $self->{unsupported_level} = 'u';
408
409 $self->{plus_elements} = {};
410 $self->{minus_elements} = {};
411 $self->{id} = {};
412 $self->{term} = {};
413 $self->{usemap} = [];
414 $self->{contextmenu} = [];
415 $self->{map} = {};
416 $self->{menu} = {};
417 $self->{has_link_type} = {};
418 $self->{flag} = {};
419 #$self->{has_uri_attr};
420 #$self->{has_hyperlink_element};
421 #$self->{has_charset};
422 #$self->{has_base};
423 $self->{return} = {
424 class => {},
425 id => $self->{id}, table => [], term => $self->{term},
426 rdf => [],
427 };
428
429 my @item = ({type => 'element', node => $el, parent_state => {}});
430 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
431 while (@item) {
432 my $item = shift @item;
433 if (ref $item eq 'ARRAY') {
434 my $code = shift @$item;
435 next unless $code;## TODO: temp.
436 $code->(@$item);
437 } elsif ($item->{type} eq 'element') {
438 my $el_nsuri = $item->{node}->namespace_uri;
439 $el_nsuri = '' unless defined $el_nsuri;
440 my $el_ln = $item->{node}->manakai_local_name;
441
442 unless ($Namespace->{$el_nsuri}->{loaded}) {
443 if ($Namespace->{$el_nsuri}->{module}) {
444 eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
445 } else {
446 $Namespace->{$el_nsuri}->{loaded} = 1;
447 }
448 }
449
450 my $element_state = {};
451 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
452 $Element->{$el_nsuri}->{''} ||
453 $ElementDefault;
454 my $content_def = $item->{transparent}
455 ? $item->{parent_def} || $eldef : $eldef;
456 my $content_state = $item->{transparent}
457 ? $item->{parent_def}
458 ? $item->{parent_state} || $element_state : $element_state
459 : $element_state;
460
461 unless ($eldef->{status} & FEATURE_STATUS_REC) {
462 my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
463 $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
464 $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
465 $self->{onerror}->(node => $item->{node},
466 type => 'status:'.$status.':element',
467 level => $self->{info_level});
468 }
469 if (not ($eldef->{status} & FEATURE_ALLOWED)) {
470 $self->{onerror}->(node => $item->{node},
471 type => 'element not defined',
472 level => $self->{must_level});
473 } elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) {
474 $self->{onerror}->(node => $item->{node},
475 type => 'deprecated:element',
476 level => $self->{should_level});
477 } elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) {
478 $self->{onerror}->(node => $item->{node},
479 type => 'deprecated:element',
480 level => $self->{info_level});
481 }
482
483 my @new_item;
484 push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
485 push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
486
487 my @child = @{$item->{node}->child_nodes};
488 while (@child) {
489 my $child = shift @child;
490 my $child_nt = $child->node_type;
491 if ($child_nt == 1) { # ELEMENT_NODE
492 my $child_nsuri = $child->namespace_uri;
493 $child_nsuri = '' unless defined $child_nsuri;
494 my $child_ln = $child->manakai_local_name;
495 if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
496 not (($self->{flag}->{in_head} or
497 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
498 $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
499 push @new_item, [$content_def->{check_child_element},
500 $self, $item, $child,
501 $child_nsuri, $child_ln, 1,
502 $content_state, $element_state];
503 push @new_item, {type => 'element', node => $child,
504 parent_state => $content_state,
505 parent_def => $content_def,
506 real_parent_state => $element_state,
507 transparent => 1};
508 } else {
509 if ($item->{parent_def} and # has parent
510 $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
511 if ($el_ln eq 'object') {
512 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
513 #
514 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
515 #
516 } else {
517 $content_def = $item->{parent_def} || $content_def;
518 $content_state = $item->{parent_state} || $content_state;
519 }
520 } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
521 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
522 #
523 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
524 $element_state->{has_source} = 1;
525 } else {
526 $content_def = $item->{parent_def} || $content_def;
527 $content_state = $item->{parent_state} || $content_state;
528 }
529 }
530 }
531
532 push @new_item, [$content_def->{check_child_element},
533 $self, $item, $child,
534 $child_nsuri, $child_ln,
535 $HTMLSemiTransparentElements
536 ->{$child_nsuri}->{$child_ln},
537 $content_state, $element_state];
538 push @new_item, {type => 'element', node => $child,
539 parent_def => $content_def,
540 real_parent_state => $element_state,
541 parent_state => $content_state};
542 }
543
544 if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
545 $element_state->{has_significant} = 1;
546 }
547 } elsif ($child_nt == 3 or # TEXT_NODE
548 $child_nt == 4) { # CDATA_SECTION_NODE
549 my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
550 push @new_item, [$content_def->{check_child_text},
551 $self, $item, $child, $has_significant,
552 $content_state, $element_state];
553 $element_state->{has_significant} ||= $has_significant;
554 if ($has_significant and
555 $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
556 $content_def = $item->{parent_def} || $content_def;
557 }
558 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
559 push @child, @{$child->child_nodes};
560 }
561 ## TODO: PI_NODE
562 ## TODO: Unknown node type
563 }
564
565 push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
566
567 unshift @item, @new_item;
568 } else {
569 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
570 }
571 }
572
573 for (@{$self->{usemap}}) {
574 unless ($self->{map}->{$_->[0]}) {
575 $self->{onerror}->(node => $_->[1], type => 'no referenced map');
576 }
577 }
578
579 for (@{$self->{contextmenu}}) {
580 unless ($self->{menu}->{$_->[0]}) {
581 $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
582 }
583 }
584
585 delete $self->{plus_elements};
586 delete $self->{minus_elements};
587 delete $self->{onerror};
588 delete $self->{id};
589 delete $self->{usemap};
590 delete $self->{map};
591 return $self->{return};
592 } # check_element
593
594 sub _add_minus_elements ($$@) {
595 my $self = shift;
596 my $element_state = shift;
597 for my $elements (@_) {
598 for my $nsuri (keys %$elements) {
599 for my $ln (keys %{$elements->{$nsuri}}) {
600 unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
601 $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
602 $self->{minus_elements}->{$nsuri}->{$ln} = 1;
603 }
604 }
605 }
606 }
607 } # _add_minus_elements
608
609 sub _remove_minus_elements ($$) {
610 my $self = shift;
611 my $element_state = shift;
612 for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
613 for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
614 delete $self->{minus_elements}->{$nsuri}->{$ln};
615 }
616 }
617 } # _remove_minus_elements
618
619 sub _add_plus_elements ($$@) {
620 my $self = shift;
621 my $element_state = shift;
622 for my $elements (@_) {
623 for my $nsuri (keys %$elements) {
624 for my $ln (keys %{$elements->{$nsuri}}) {
625 unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
626 $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
627 $self->{plus_elements}->{$nsuri}->{$ln} = 1;
628 }
629 }
630 }
631 }
632 } # _add_plus_elements
633
634 sub _remove_plus_elements ($$) {
635 my $self = shift;
636 my $element_state = shift;
637 for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
638 for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
639 delete $self->{plus_elements}->{$nsuri}->{$ln};
640 }
641 }
642 } # _remove_plus_elements
643
644 sub _attr_status_info ($$$) {
645 my ($self, $attr, $status_code) = @_;
646
647 if (not ($status_code & FEATURE_ALLOWED)) {
648 $self->{onerror}->(node => $attr,
649 type => 'attribute not defined',
650 level => $self->{must_level});
651 } elsif ($status_code & FEATURE_DEPRECATED_SHOULD) {
652 $self->{onerror}->(node => $attr,
653 type => 'deprecated:attr',
654 level => $self->{should_level});
655 } elsif ($status_code & FEATURE_DEPRECATED_INFO) {
656 $self->{onerror}->(node => $attr,
657 type => 'deprecated:attr',
658 level => $self->{info_level});
659 }
660
661 my $status;
662 if ($status_code & FEATURE_STATUS_REC) {
663 return;
664 } elsif ($status_code & FEATURE_STATUS_CR) {
665 $status = 'cr';
666 } elsif ($status_code & FEATURE_STATUS_LC) {
667 $status = 'lc';
668 } elsif ($status_code & FEATURE_STATUS_WD) {
669 $status = 'wd';
670 } else {
671 $status = 'non-standard';
672 }
673 $self->{onerror}->(node => $attr,
674 type => 'status:'.$status.':attr',
675 level => $self->{info_level});
676 } # _attr_status_info
677
678 sub _add_minuses ($@) {
679 my $self = shift;
680 my $r = {};
681 for my $list (@_) {
682 for my $ns (keys %$list) {
683 for my $ln (keys %{$list->{$ns}}) {
684 unless ($self->{minuses}->{$ns}->{$ln}) {
685 $self->{minuses}->{$ns}->{$ln} = 1;
686 $r->{$ns}->{$ln} = 1;
687 }
688 }
689 }
690 }
691 return {type => 'plus', list => $r};
692 } # _add_minuses
693
694 sub _add_pluses ($@) {
695 my $self = shift;
696 my $r = {};
697 for my $list (@_) {
698 for my $ns (keys %$list) {
699 for my $ln (keys %{$list->{$ns}}) {
700 unless ($self->{pluses}->{$ns}->{$ln}) {
701 $self->{pluses}->{$ns}->{$ln} = 1;
702 $r->{$ns}->{$ln} = 1;
703 }
704 }
705 }
706 }
707 return {type => 'minus', list => $r};
708 } # _add_pluses
709
710 sub _remove_minuses ($$) {
711 my ($self, $todo) = @_;
712 if ($todo->{type} eq 'minus') {
713 for my $ns (keys %{$todo->{list}}) {
714 for my $ln (keys %{$todo->{list}->{$ns}}) {
715 delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
716 }
717 }
718 } elsif ($todo->{type} eq 'plus') {
719 for my $ns (keys %{$todo->{list}}) {
720 for my $ln (keys %{$todo->{list}->{$ns}}) {
721 delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
722 }
723 }
724 } else {
725 die "$0: Unknown +- type: $todo->{type}";
726 }
727 1;
728 } # _remove_minuses
729
730 ## NOTE: Priority for "minuses" and "pluses" are currently left
731 ## undefined and implemented inconsistently; it is not a problem for
732 ## now, since no element belongs to both lists.
733
734 sub _check_get_children ($$$) {
735 my ($self, $node, $parent_todo) = @_;
736 my $new_todos = [];
737 my $sib = [];
738 TP: {
739 my $node_ns = $node->namespace_uri;
740 $node_ns = '' unless defined $node_ns;
741 my $node_ln = $node->manakai_local_name;
742 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
743 if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
744 if ($parent_todo->{flag}->{in_head}) {
745 #
746 } else {
747 my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
748 push @$sib, $end;
749
750 unshift @$sib, @{$node->child_nodes};
751 push @$new_todos, {type => 'element-attributes', node => $node};
752 last TP;
753 }
754 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
755 my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
756 unshift @$sib, @{$node->child_nodes};
757 push @$new_todos, {type => 'element-attributes', node => $node};
758 push @$new_todos,
759 {type => 'code',
760 code => sub {
761 $parent_todo->{flag}->{has_descendant}->{significant} = 0
762 if not $sig_flag;
763 }};
764 last TP;
765 } else {
766 unshift @$sib, @{$node->child_nodes};
767 push @$new_todos, {type => 'element-attributes', node => $node};
768 last TP;
769 }
770 }
771 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
772 if ($node->has_attribute_ns (undef, 'src')) {
773 unshift @$sib, @{$node->child_nodes};
774 push @$new_todos, {type => 'element-attributes', node => $node};
775 last TP;
776 } else {
777 my @cn = @{$node->child_nodes};
778 CN: while (@cn) {
779 my $cn = shift @cn;
780 my $cnt = $cn->node_type;
781 if ($cnt == 1) {
782 my $cn_nsuri = $cn->namespace_uri;
783 $cn_nsuri = '' unless defined $cn_nsuri;
784 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
785 #
786 } else {
787 last CN;
788 }
789 } elsif ($cnt == 3 or $cnt == 4) {
790 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
791 last CN;
792 }
793 }
794 } # CN
795 unshift @$sib, @cn;
796 }
797 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
798 my @cn = @{$node->child_nodes};
799 CN: while (@cn) {
800 my $cn = shift @cn;
801 my $cnt = $cn->node_type;
802 if ($cnt == 1) {
803 my $cn_nsuri = $cn->namespace_uri;
804 $cn_nsuri = '' unless defined $cn_nsuri;
805 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
806 #
807 } else {
808 last CN;
809 }
810 } elsif ($cnt == 3 or $cnt == 4) {
811 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
812 last CN;
813 }
814 }
815 } # CN
816 unshift @$sib, @cn;
817 }
818 push @$new_todos, {type => 'element', node => $node};
819 } # TP
820
821 for my $new_todo (@$new_todos) {
822 $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
823 }
824
825 return ($sib, $new_todos);
826 } # _check_get_children
827
828 =head1 LICENSE
829
830 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
831
832 This library is free software; you can redistribute it
833 and/or modify it under the same terms as Perl itself.
834
835 =cut
836
837 1;
838 # $Date: 2008/03/21 09:18:40 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24