/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.63 - (show annotations) (download)
Sat Feb 23 15:24:49 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.62: +11 -6 lines
++ whatpm/t/ChangeLog	23 Feb 2008 15:24:46 -0000
	* content-model-1.dat: Some test results are updated.
	New tests for |details| are added.

2008-02-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Feb 2008 15:22:12 -0000
	* ContentChecker.pm (check_element): In-element state
	was not properly managed for transparent cases.

2008-02-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	23 Feb 2008 15:24:01 -0000
	* HTML.pm (%HTMLProseContentChecker, details, figure): Don't
	change state when a transparent element is encountered.
	(details check): Reimplemented.

2008-02-23  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::ContentChecker;
2 use strict;
3 our $VERSION=do{my @r=(q$Revision: 1.62 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4
5 require Whatpm::URIChecker;
6
7 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8 ## be applied to an in-memory representation (i.e. DOM)?
9
10 ## TODO: Conformance of an HTML document with non-html root element.
11
12 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
13 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
14 my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
15
16 my $Namespace = {
17 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
18 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
19 $XML_NS => {loaded => 1},
20 $XMLNS_NS => {loaded => 1},
21 };
22
23 our $AttrChecker = {
24 $XML_NS => {
25 space => sub {
26 my ($self, $attr) = @_;
27 my $value = $attr->value;
28 if ($value eq 'default' or $value eq 'preserve') {
29 #
30 } else {
31 ## NOTE: An XML "error"
32 $self->{onerror}->(node => $attr, level => 'error',
33 type => 'invalid attribute value');
34 }
35 },
36 lang => sub {
37 my ($self, $attr) = @_;
38 my $value = $attr->value;
39 if ($value eq '') {
40 #
41 } else {
42 require Whatpm::LangTag;
43 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
44 my %opt = @_;
45 my $type = 'LangTag:'.$opt{type};
46 $type .= ':' . $opt{subtag} if defined $opt{subtag};
47 $self->{onerror}->(node => $attr, type => $type,
48 value => $opt{value}, level => $opt{level});
49 });
50 }
51
52 ## NOTE: "The values of the attribute are language identifiers
53 ## as defined by [IETF RFC 3066], Tags for the Identification
54 ## of Languages, or its successor; in addition, the empty string
55 ## may be specified." ("may" in lower case)
56 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
57 ## allowed today?
58
59 ## TODO: test data
60
61 if ($attr->owner_document->manakai_is_html) { # MUST NOT
62 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
63 ## TODO: Test data...
64 }
65 },
66 base => sub {
67 my ($self, $attr) = @_;
68 my $value = $attr->value;
69 if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
70 $self->{onerror}->(node => $attr,
71 type => 'invalid attribute value');
72 }
73 ## NOTE: Conformance to URI standard is not checked since there is
74 ## no author requirement on conformance in the XML Base specification.
75 },
76 id => sub {
77 my ($self, $attr) = @_;
78 my $value = $attr->value;
79 $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
80 $value =~ s/^\x20//;
81 $value =~ s/\x20$//;
82 ## TODO: NCName in XML 1.0 or 1.1
83 ## TODO: declared type is ID?
84 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
85 $self->{onerror}->(node => $attr, level => 'error',
86 type => 'duplicate ID');
87 push @{$self->{id}->{$value}}, $attr;
88 } else {
89 $self->{id}->{$value} = [$attr];
90 }
91 },
92 },
93 $XMLNS_NS => {
94 '' => sub {
95 my ($self, $attr) = @_;
96 my $ln = $attr->manakai_local_name;
97 my $value = $attr->value;
98 if ($value eq $XML_NS and $ln ne 'xml') {
99 $self->{onerror}
100 ->(node => $attr, level => 'NC',
101 type => 'Reserved Prefixes and Namespace Names:=xml');
102 } elsif ($value eq $XMLNS_NS) {
103 $self->{onerror}
104 ->(node => $attr, level => 'NC',
105 type => 'Reserved Prefixes and Namespace Names:=xmlns');
106 }
107 if ($ln eq 'xml' and $value ne $XML_NS) {
108 $self->{onerror}
109 ->(node => $attr, level => 'NC',
110 type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
111 } elsif ($ln eq 'xmlns') {
112 $self->{onerror}
113 ->(node => $attr, level => 'NC',
114 type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
115 }
116 ## TODO: If XML 1.0 and empty
117 },
118 xmlns => sub {
119 my ($self, $attr) = @_;
120 ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
121 ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
122 ## TODO: relative references are deprecated
123 my $value = $attr->value;
124 if ($value eq $XML_NS) {
125 $self->{onerror}
126 ->(node => $attr, level => 'NC',
127 type => 'Reserved Prefixes and Namespace Names:=xml');
128 } elsif ($value eq $XMLNS_NS) {
129 $self->{onerror}
130 ->(node => $attr, level => 'NC',
131 type => 'Reserved Prefixes and Namespace Names:=xmlns');
132 }
133 },
134 },
135 };
136
137 ## ISSUE: Should we really allow these attributes?
138 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
139 $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
140 $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
141 $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
142
143 our %AnyChecker = (
144 check_start => sub { },
145 check_attrs => sub {
146 my ($self, $item, $element_state) = @_;
147 for my $attr (@{$item->{node}->attributes}) {
148 my $attr_ns = $attr->namespace_uri;
149 $attr_ns = '' unless defined $attr_ns;
150 my $attr_ln = $attr->manakai_local_name;
151 my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
152 || $AttrChecker->{$attr_ns}->{''};
153 if ($checker) {
154 $checker->($self, $attr);
155 } else {
156 $self->{onerror}->(node => $attr, level => 'unsupported',
157 type => 'attribute');
158 }
159 }
160 },
161 check_child_element => sub {
162 my ($self, $item, $child_el, $child_nsuri, $child_ln,
163 $child_is_transparent, $element_state) = @_;
164 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
165 $self->{onerror}->(node => $child_el,
166 type => 'element not allowed:minus',
167 level => $self->{must_level});
168 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
169 #
170 } else {
171 #
172 }
173 },
174 check_child_text => sub { },
175 check_end => sub {
176 my ($self, $item, $element_state) = @_;
177 if ($element_state->{has_significant}) {
178 $item->{parent_state}->{has_significant} = 1;
179 }
180 },
181 );
182
183 our $ElementDefault = {
184 %AnyChecker,
185 check_start => sub {
186 my ($self, $item, $element_state) = @_;
187 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
188 type => 'element');
189 },
190 };
191
192 our $HTMLEmbeddedContent = {
193 ## NOTE: All embedded content is also phrasing content.
194 $HTML_NS => {
195 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
196 canvas => 1,
197 },
198 ## NOTE: MathML is mentioned in the HTML5 spec.
199 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
200 ## NOTE: SVG is mentioned in the HTML5 spec.
201 q<http://www.w3.org/2000/svg> => {svg => 1},
202 ## NOTE: Foreign elements with content (but no metadata) are
203 ## embedded content.
204 };
205
206 my $HTMLTransparentElements = {
207 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
208 ## NOTE: |html:noscript| is transparent if scripting is disabled
209 ## and not in |head|.
210 };
211
212 my $HTMLSemiTransparentElements = {
213 $HTML_NS => {object => 1, video => 1, audio => 1},
214 };
215
216 our $Element = {};
217
218 sub check_document ($$$;$) {
219 my ($self, $doc, $onerror, $onsubdoc) = @_;
220 $self = bless {}, $self unless ref $self;
221 $self->{onerror} = $onerror;
222 $self->{onsubdoc} = $onsubdoc || sub {
223 warn "A subdocument is not conformance-checked";
224 };
225
226 $self->{must_level} = 'm';
227 $self->{fact_level} = 'f';
228 $self->{should_level} = 's';
229 $self->{good_level} = 'w';
230 $self->{unsupported_lavel} = 'u';
231
232 my $docel = $doc->document_element;
233 unless (defined $docel) {
234 ## ISSUE: Should we check content of Document node?
235 $onerror->(node => $doc, type => 'no document element');
236 ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
237 return {
238 class => {},
239 id => {}, table => [], term => {},
240 };
241 }
242
243 ## ISSUE: Unexpanded entity references and HTML5 conformance
244
245 my $docel_nsuri = $docel->namespace_uri;
246 $docel_nsuri = '' unless defined $docel_nsuri;
247 unless ($Namespace->{$docel_nsuri}->{loaded}) {
248 if ($Namespace->{$docel_nsuri}->{module}) {
249 eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
250 } else {
251 $Namespace->{$docel_nsuri}->{loaded} = 1;
252 }
253 }
254 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
255 $Element->{$docel_nsuri}->{''} ||
256 $ElementDefault;
257 if ($docel_def->{is_root}) {
258 #
259 } elsif ($docel_def->{is_xml_root}) {
260 unless ($doc->manakai_is_html) {
261 #
262 } else {
263 $onerror->(node => $docel, type => 'element not allowed:root:xml');
264 }
265 } else {
266 $onerror->(node => $docel, type => 'element not allowed:root');
267 }
268
269 ## TODO: Check for other items other than document element
270 ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
271
272 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
273
274 ## TODO: Test for these checks are necessary.
275 my $charset_name = $doc->input_encoding;
276 if (defined $charset_name) {
277 require Message::Charset::Info;
278 my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
279
280 if ($doc->manakai_is_html and
281 not $doc->manakai_has_bom and
282 not defined $doc->manakai_charset) {
283 unless ($charset->{is_html_ascii_superset}) {
284 $onerror->(node => $doc, level => $self->{must_level},
285 type => 'non ascii superset:'.$charset_name);
286 }
287
288 if (not $self->{has_charset} and
289 not $charset->{iana_names}->{'us-ascii'}) {
290 $onerror->(node => $doc, level => $self->{must_level},
291 type => 'no character encoding declaration:'.$charset_name);
292 }
293 }
294
295 if ($charset->{iana_names}->{'utf-8'}) {
296 #
297 } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
298 $charset->{iana_names}->{'x-jis0208'} or
299 $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
300 $charset->{is_ebcdic_based}) {
301 $onerror->(node => $doc,
302 type => 'character encoding:'.$charset_name,
303 level => $self->{should_level});
304 } elsif ($charset->{iana_names}->{'cesu-8'} or
305 $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
306 $charset->{iana_names}->{'bocu-1'} or
307 $charset->{iana_names}->{'scsu'}) {
308 $onerror->(node => $doc,
309 type => 'character encoding:'.$charset_name,
310 level => $self->{must_level});
311 } else {
312 $onerror->(node => $doc,
313 type => 'character encoding:'.$charset_name,
314 level => $self->{good_level});
315 }
316 } elsif ($doc->manakai_is_html) {
317 ## NOTE: MUST and SHOULD requirements above cannot be tested,
318 ## since the document has no input charset encoding information.
319 $onerror->(node => $doc,
320 type => 'character encoding:',
321 level => 'unsupported');
322 }
323
324 return $return;
325 } # check_document
326
327 sub check_element ($$$;$) {
328 my ($self, $el, $onerror, $onsubdoc) = @_;
329 $self = bless {}, $self unless ref $self;
330 $self->{onerror} = $onerror;
331 $self->{onsubdoc} = $onsubdoc || sub {
332 warn "A subdocument is not conformance-checked";
333 };
334
335 $self->{must_level} = 'm';
336 $self->{fact_level} = 'f';
337 $self->{should_level} = 's';
338 $self->{good_level} = 'w';
339 $self->{unsupported_lavel} = 'u';
340
341 $self->{plus_elements} = {};
342 $self->{minus_elements} = {};
343 $self->{id} = {};
344 $self->{term} = {};
345 $self->{usemap} = [];
346 $self->{contextmenu} = [];
347 $self->{map} = {};
348 $self->{menu} = {};
349 $self->{has_link_type} = {};
350 $self->{flag} = {};
351 #$self->{has_uri_attr};
352 #$self->{has_hyperlink_element};
353 #$self->{has_charset};
354 #$self->{has_base};
355 $self->{return} = {
356 class => {},
357 id => $self->{id}, table => [], term => $self->{term},
358 };
359
360 my @item = ({type => 'element', node => $el, parent_state => {}});
361 while (@item) {
362 my $item = shift @item;
363 if (ref $item eq 'ARRAY') {
364 my $code = shift @$item;
365 next unless $code;## TODO: temp.
366 $code->(@$item);
367 } elsif ($item->{type} eq 'element') {
368 my $el_nsuri = $item->{node}->namespace_uri;
369 $el_nsuri = '' unless defined $el_nsuri;
370 my $el_ln = $item->{node}->manakai_local_name;
371
372 unless ($Namespace->{$el_nsuri}->{loaded}) {
373 if ($Namespace->{$el_nsuri}->{module}) {
374 eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
375 } else {
376 $Namespace->{$el_nsuri}->{loaded} = 1;
377 }
378 }
379
380 my $element_state = {};
381 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
382 $Element->{$el_nsuri}->{''} ||
383 $ElementDefault;
384 my $content_def = $item->{transparent}
385 ? $item->{parent_def} || $eldef : $eldef;
386 my $content_state = $item->{transparent}
387 ? $item->{parent_state} || $element_state : $element_state;
388
389 my @new_item;
390 push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
391 push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
392
393 my @child = @{$item->{node}->child_nodes};
394 while (@child) {
395 my $child = shift @child;
396 my $child_nt = $child->node_type;
397 if ($child_nt == 1) { # ELEMENT_NODE
398 my $child_nsuri = $child->namespace_uri;
399 $child_nsuri = '' unless defined $child_nsuri;
400 my $child_ln = $child->manakai_local_name;
401 if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
402 not (($self->{flag}->{in_head} or
403 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
404 $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
405 push @new_item, [$content_def->{check_child_element},
406 $self, $item, $child,
407 $child_nsuri, $child_ln, 1, $content_state];
408 push @new_item, {type => 'element', node => $child,
409 parent_state => $element_state,
410 parent_def => $content_def,
411 transparent => 1};
412 } else {
413 if ($el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
414 if ($el_ln eq 'object') {
415 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
416 #
417 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
418 #
419 } else {
420 $content_def = $item->{parent_def} || $content_def;
421 $content_state = $item->{parent_state} || $content_state;
422 }
423 } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
424 if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
425 #
426 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
427 $element_state->{has_source} = 1;
428 } else {
429 $content_def = $item->{parent_def} || $content_def;
430 $content_state = $item->{parent_state} || $content_state;
431 }
432 }
433 }
434
435 push @new_item, [$content_def->{check_child_element},
436 $self, $item, $child,
437 $child_nsuri, $child_ln, 0, $content_state];
438 push @new_item, {type => 'element', node => $child,
439 parent_def => $eldef,
440 parent_state => $element_state};
441 }
442
443 if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
444 $element_state->{has_significant} = 1;
445 }
446 } elsif ($child_nt == 3 or # TEXT_NODE
447 $child_nt == 4) { # CDATA_SECTION_NODE
448 my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
449 push @new_item, [$content_def->{check_child_text},
450 $self, $item, $child, $has_significant,
451 $content_state];
452 $element_state->{has_significant} ||= $has_significant;
453 if ($has_significant and
454 $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
455 $content_def = $item->{parent_def} || $content_def;
456 }
457 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
458 push @child, @{$child->child_nodes};
459 }
460 ## TODO: PI_NODE
461 ## TODO: Unknown node type
462 }
463
464 push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
465
466 unshift @item, @new_item;
467 } else {
468 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
469 }
470 }
471
472 for (@{$self->{usemap}}) {
473 unless ($self->{map}->{$_->[0]}) {
474 $self->{onerror}->(node => $_->[1], type => 'no referenced map');
475 }
476 }
477
478 for (@{$self->{contextmenu}}) {
479 unless ($self->{menu}->{$_->[0]}) {
480 $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
481 }
482 }
483
484 delete $self->{plus_elements};
485 delete $self->{minus_elements};
486 delete $self->{onerror};
487 delete $self->{id};
488 delete $self->{usemap};
489 delete $self->{map};
490 return $self->{return};
491 } # check_element
492
493 sub _add_minus_elements ($$@) {
494 my $self = shift;
495 my $element_state = shift;
496 for my $elements (@_) {
497 for my $nsuri (keys %$elements) {
498 for my $ln (keys %{$elements->{$nsuri}}) {
499 unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
500 $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
501 $self->{minus_elements}->{$nsuri}->{$ln} = 1;
502 }
503 }
504 }
505 }
506 } # _add_minus_elements
507
508 sub _remove_minus_elements ($$) {
509 my $self = shift;
510 my $element_state = shift;
511 for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
512 for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
513 delete $self->{minus_elements}->{$nsuri}->{$ln};
514 }
515 }
516 } # _remove_minus_elements
517
518 sub _add_plus_elements ($$@) {
519 my $self = shift;
520 my $element_state = shift;
521 for my $elements (@_) {
522 for my $nsuri (keys %$elements) {
523 for my $ln (keys %{$elements->{$nsuri}}) {
524 unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
525 $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
526 $self->{plus_elements}->{$nsuri}->{$ln} = 1;
527 }
528 }
529 }
530 }
531 } # _add_plus_elements
532
533 sub _remove_plus_elements ($$) {
534 my $self = shift;
535 my $element_state = shift;
536 for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
537 for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
538 delete $self->{plus_elements}->{$nsuri}->{$ln};
539 }
540 }
541 } # _remove_plus_elements
542
543 sub _add_minuses ($@) {
544 my $self = shift;
545 my $r = {};
546 for my $list (@_) {
547 for my $ns (keys %$list) {
548 for my $ln (keys %{$list->{$ns}}) {
549 unless ($self->{minuses}->{$ns}->{$ln}) {
550 $self->{minuses}->{$ns}->{$ln} = 1;
551 $r->{$ns}->{$ln} = 1;
552 }
553 }
554 }
555 }
556 return {type => 'plus', list => $r};
557 } # _add_minuses
558
559 sub _add_pluses ($@) {
560 my $self = shift;
561 my $r = {};
562 for my $list (@_) {
563 for my $ns (keys %$list) {
564 for my $ln (keys %{$list->{$ns}}) {
565 unless ($self->{pluses}->{$ns}->{$ln}) {
566 $self->{pluses}->{$ns}->{$ln} = 1;
567 $r->{$ns}->{$ln} = 1;
568 }
569 }
570 }
571 }
572 return {type => 'minus', list => $r};
573 } # _add_pluses
574
575 sub _remove_minuses ($$) {
576 my ($self, $todo) = @_;
577 if ($todo->{type} eq 'minus') {
578 for my $ns (keys %{$todo->{list}}) {
579 for my $ln (keys %{$todo->{list}->{$ns}}) {
580 delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
581 }
582 }
583 } elsif ($todo->{type} eq 'plus') {
584 for my $ns (keys %{$todo->{list}}) {
585 for my $ln (keys %{$todo->{list}->{$ns}}) {
586 delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
587 }
588 }
589 } else {
590 die "$0: Unknown +- type: $todo->{type}";
591 }
592 1;
593 } # _remove_minuses
594
595 ## NOTE: Priority for "minuses" and "pluses" are currently left
596 ## undefined and implemented inconsistently; it is not a problem for
597 ## now, since no element belongs to both lists.
598
599 sub _check_get_children ($$$) {
600 my ($self, $node, $parent_todo) = @_;
601 my $new_todos = [];
602 my $sib = [];
603 TP: {
604 my $node_ns = $node->namespace_uri;
605 $node_ns = '' unless defined $node_ns;
606 my $node_ln = $node->manakai_local_name;
607 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
608 if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
609 if ($parent_todo->{flag}->{in_head}) {
610 #
611 } else {
612 my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
613 push @$sib, $end;
614
615 unshift @$sib, @{$node->child_nodes};
616 push @$new_todos, {type => 'element-attributes', node => $node};
617 last TP;
618 }
619 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
620 my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
621 unshift @$sib, @{$node->child_nodes};
622 push @$new_todos, {type => 'element-attributes', node => $node};
623 push @$new_todos,
624 {type => 'code',
625 code => sub {
626 $parent_todo->{flag}->{has_descendant}->{significant} = 0
627 if not $sig_flag;
628 }};
629 last TP;
630 } else {
631 unshift @$sib, @{$node->child_nodes};
632 push @$new_todos, {type => 'element-attributes', node => $node};
633 last TP;
634 }
635 }
636 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
637 if ($node->has_attribute_ns (undef, 'src')) {
638 unshift @$sib, @{$node->child_nodes};
639 push @$new_todos, {type => 'element-attributes', node => $node};
640 last TP;
641 } else {
642 my @cn = @{$node->child_nodes};
643 CN: while (@cn) {
644 my $cn = shift @cn;
645 my $cnt = $cn->node_type;
646 if ($cnt == 1) {
647 my $cn_nsuri = $cn->namespace_uri;
648 $cn_nsuri = '' unless defined $cn_nsuri;
649 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
650 #
651 } else {
652 last CN;
653 }
654 } elsif ($cnt == 3 or $cnt == 4) {
655 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
656 last CN;
657 }
658 }
659 } # CN
660 unshift @$sib, @cn;
661 }
662 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
663 my @cn = @{$node->child_nodes};
664 CN: while (@cn) {
665 my $cn = shift @cn;
666 my $cnt = $cn->node_type;
667 if ($cnt == 1) {
668 my $cn_nsuri = $cn->namespace_uri;
669 $cn_nsuri = '' unless defined $cn_nsuri;
670 if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
671 #
672 } else {
673 last CN;
674 }
675 } elsif ($cnt == 3 or $cnt == 4) {
676 if ($cn->data =~ /[^\x09-\x0D\x20]/) {
677 last CN;
678 }
679 }
680 } # CN
681 unshift @$sib, @cn;
682 }
683 push @$new_todos, {type => 'element', node => $node};
684 } # TP
685
686 for my $new_todo (@$new_todos) {
687 $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
688 }
689
690 return ($sib, $new_todos);
691 } # _check_get_children
692
693 =head1 LICENSE
694
695 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
696
697 This library is free software; you can redistribute it
698 and/or modify it under the same terms as Perl itself.
699
700 =cut
701
702 1;
703 # $Date: 2008/02/23 14:37:09 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24