/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.59 - (hide annotations) (download)
Sun Feb 17 12:18:06 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.58: +4 -2 lines
++ whatpm/t/ChangeLog	17 Feb 2008 12:18:01 -0000
	* content-model-1.dat, content-model-2.dat: Updated.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	17 Feb 2008 12:12:56 -0000
	* ContentChecker.pm ({unsupported_level}): New value.

	* HTML.pm.src: Save whether |meta| |content| attribute
	contains character references or not.

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Feb 2008 12:17:33 -0000
	* HTML.pm: |<meta http-equiv=Content-Type| support (HTML5 revision
	1180).

2008-02-17  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.59 our $VERSION=do{my @r=(q$Revision: 1.58 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
13 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
14     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
15    
16 wakaba 1.42 my $Namespace = {
17 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
18 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
19     $XML_NS => {loaded => 1},
20     $XMLNS_NS => {loaded => 1},
21     };
22    
23     our $AttrChecker = {
24 wakaba 1.9 $XML_NS => {
25 wakaba 1.13 space => sub {
26     my ($self, $attr) = @_;
27     my $value = $attr->value;
28     if ($value eq 'default' or $value eq 'preserve') {
29     #
30     } else {
31     ## NOTE: An XML "error"
32 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
33     type => 'invalid attribute value');
34 wakaba 1.13 }
35     },
36     lang => sub {
37 wakaba 1.35 my ($self, $attr) = @_;
38 wakaba 1.47 my $value = $attr->value;
39     if ($value eq '') {
40     #
41     } else {
42     require Whatpm::LangTag;
43     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
44     my %opt = @_;
45     my $type = 'LangTag:'.$opt{type};
46     $type .= ':' . $opt{subtag} if defined $opt{subtag};
47     $self->{onerror}->(node => $attr, type => $type,
48     value => $opt{value}, level => $opt{level});
49     });
50     }
51    
52 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
53     ## as defined by [IETF RFC 3066], Tags for the Identification
54     ## of Languages, or its successor; in addition, the empty string
55     ## may be specified." ("may" in lower case)
56 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
57     ## allowed today?
58    
59     ## TODO: test data
60    
61 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
62 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
63 wakaba 1.35 ## TODO: Test data...
64     }
65 wakaba 1.13 },
66     base => sub {
67     my ($self, $attr) = @_;
68     my $value = $attr->value;
69     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
70     $self->{onerror}->(node => $attr,
71 wakaba 1.33 type => 'invalid attribute value');
72 wakaba 1.13 }
73 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
74     ## no author requirement on conformance in the XML Base specification.
75 wakaba 1.13 },
76     id => sub {
77     my ($self, $attr) = @_;
78     my $value = $attr->value;
79     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
80     $value =~ s/^\x20//;
81     $value =~ s/\x20$//;
82     ## TODO: NCName in XML 1.0 or 1.1
83     ## TODO: declared type is ID?
84 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
85     $self->{onerror}->(node => $attr, level => 'error',
86     type => 'duplicate ID');
87 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
88 wakaba 1.13 } else {
89 wakaba 1.37 $self->{id}->{$value} = [$attr];
90 wakaba 1.13 }
91     },
92 wakaba 1.9 },
93     $XMLNS_NS => {
94 wakaba 1.13 '' => sub {
95     my ($self, $attr) = @_;
96     my $ln = $attr->manakai_local_name;
97     my $value = $attr->value;
98     if ($value eq $XML_NS and $ln ne 'xml') {
99     $self->{onerror}
100 wakaba 1.33 ->(node => $attr, level => 'NC',
101     type => 'Reserved Prefixes and Namespace Names:=xml');
102 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
103     $self->{onerror}
104 wakaba 1.33 ->(node => $attr, level => 'NC',
105     type => 'Reserved Prefixes and Namespace Names:=xmlns');
106 wakaba 1.13 }
107     if ($ln eq 'xml' and $value ne $XML_NS) {
108     $self->{onerror}
109 wakaba 1.33 ->(node => $attr, level => 'NC',
110     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
111 wakaba 1.13 } elsif ($ln eq 'xmlns') {
112     $self->{onerror}
113 wakaba 1.33 ->(node => $attr, level => 'NC',
114     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
115 wakaba 1.13 }
116     ## TODO: If XML 1.0 and empty
117     },
118     xmlns => sub {
119     my ($self, $attr) = @_;
120     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
121     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
122 wakaba 1.18 ## TODO: relative references are deprecated
123 wakaba 1.13 my $value = $attr->value;
124     if ($value eq $XML_NS) {
125     $self->{onerror}
126 wakaba 1.33 ->(node => $attr, level => 'NC',
127     type => 'Reserved Prefixes and Namespace Names:=xml');
128 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
129     $self->{onerror}
130 wakaba 1.33 ->(node => $attr, level => 'NC',
131     type => 'Reserved Prefixes and Namespace Names:=xmlns');
132 wakaba 1.13 }
133     },
134 wakaba 1.9 },
135     };
136    
137 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
138 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
139     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
140     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
141     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
142    
143 wakaba 1.3 ## ANY
144 wakaba 1.42 our $AnyChecker = sub {
145 wakaba 1.4 my ($self, $todo) = @_;
146     my $el = $todo->{node};
147     my $new_todos = [];
148 wakaba 1.3 my @nodes = (@{$el->child_nodes});
149     while (@nodes) {
150     my $node = shift @nodes;
151     $self->_remove_minuses ($node) and next if ref $node eq 'HASH';
152    
153     my $nt = $node->node_type;
154     if ($nt == 1) {
155     my $node_ns = $node->namespace_uri;
156     $node_ns = '' unless defined $node_ns;
157     my $node_ln = $node->manakai_local_name;
158     if ($self->{minuses}->{$node_ns}->{$node_ln}) {
159     $self->{onerror}->(node => $node, type => 'element not allowed');
160     }
161 wakaba 1.54 my ($sib, $ch) = $self->_check_get_children ($node, $todo);
162     unshift @nodes, @$sib;
163     push @$new_todos, @$ch;
164     } elsif ($nt == 3 or $nt == 4) {
165     if ($node->data =~ /[^\x09-\x0D\x20]/) {
166     $todo->{flag}->{has_descendant}->{significant} = 1;
167     }
168 wakaba 1.3 } elsif ($nt == 5) {
169     unshift @nodes, @{$node->child_nodes};
170     }
171     }
172 wakaba 1.4 return ($new_todos);
173 wakaba 1.3 }; # $AnyChecker
174    
175 wakaba 1.42 our $ElementDefault = {
176 wakaba 1.1 checker => sub {
177 wakaba 1.4 my ($self, $todo) = @_;
178 wakaba 1.33 $self->{onerror}->(node => $todo->{node}, level => 'unsupported',
179     type => 'element');
180 wakaba 1.4 return $AnyChecker->($self, $todo);
181 wakaba 1.1 },
182 wakaba 1.9 attrs_checker => sub {
183     my ($self, $todo) = @_;
184     for my $attr (@{$todo->{node}->attributes}) {
185     my $attr_ns = $attr->namespace_uri;
186     $attr_ns = '' unless defined $attr_ns;
187     my $attr_ln = $attr->manakai_local_name;
188     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
189     || $AttrChecker->{$attr_ns}->{''};
190     if ($checker) {
191     $checker->($self, $attr);
192 wakaba 1.17 } else {
193 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
194     type => 'attribute');
195 wakaba 1.9 }
196     }
197     },
198 wakaba 1.1 };
199    
200 wakaba 1.7 my $HTMLTransparentElements = {
201 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
202 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
203     ## and not in |head|.
204 wakaba 1.7 };
205    
206 wakaba 1.57 ## Semi-transparent: html:video, html:audio, html:object
207    
208 wakaba 1.42 our $Element = {};
209 wakaba 1.7
210 wakaba 1.56 sub check_document ($$$;$) {
211     my ($self, $doc, $onerror, $onsubdoc) = @_;
212 wakaba 1.42 $self = bless {}, $self unless ref $self;
213     $self->{onerror} = $onerror;
214 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
215     warn "A subdocument is not conformance-checked";
216     };
217 wakaba 1.1
218 wakaba 1.48 $self->{must_level} = 'm';
219     $self->{fact_level} = 'f';
220     $self->{should_level} = 's';
221 wakaba 1.51 $self->{good_level} = 'w';
222 wakaba 1.59 $self->{unsupported_lavel} = 'u';
223 wakaba 1.48
224 wakaba 1.42 my $docel = $doc->document_element;
225     unless (defined $docel) {
226     ## ISSUE: Should we check content of Document node?
227     $onerror->(node => $doc, type => 'no document element');
228     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
229     return {
230     class => {},
231     id => {}, table => [], term => {},
232     };
233 wakaba 1.1 }
234    
235 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
236 wakaba 1.1
237 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
238     $docel_nsuri = '' unless defined $docel_nsuri;
239 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
240     if ($Namespace->{$docel_nsuri}->{module}) {
241     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
242     } else {
243     $Namespace->{$docel_nsuri}->{loaded} = 1;
244     }
245     }
246 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
247     $Element->{$docel_nsuri}->{''} ||
248     $ElementDefault;
249     if ($docel_def->{is_root}) {
250     #
251 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
252     unless ($doc->manakai_is_html) {
253     #
254     } else {
255     $onerror->(node => $docel, type => 'element not allowed:root:xml');
256     }
257 wakaba 1.42 } else {
258 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
259 wakaba 1.1 }
260    
261 wakaba 1.42 ## TODO: Check for other items other than document element
262     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
263 wakaba 1.2
264 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
265 wakaba 1.51
266 wakaba 1.52 ## TODO: Test for these checks are necessary.
267 wakaba 1.51 my $charset_name = $doc->input_encoding;
268     if (defined $charset_name) {
269     require Message::Charset::Info;
270     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
271    
272     if ($doc->manakai_is_html and
273     not $doc->manakai_has_bom and
274     not defined $doc->manakai_charset) {
275     unless ($charset->{is_html_ascii_superset}) {
276     $onerror->(node => $doc, level => $self->{must_level},
277     type => 'non ascii superset:'.$charset_name);
278     }
279    
280     if (not $self->{has_charset} and
281     not $charset->{iana_names}->{'us-ascii'}) {
282     $onerror->(node => $doc, level => $self->{must_level},
283     type => 'no character encoding declaration:'.$charset_name);
284     }
285     }
286    
287     if ($charset->{iana_names}->{'utf-8'}) {
288     #
289     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
290     $charset->{iana_names}->{'x-jis0208'} or
291     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
292     $charset->{is_ebcdic_based}) {
293     $onerror->(node => $doc,
294     type => 'character encoding:'.$charset_name,
295     level => $self->{should_level});
296     } elsif ($charset->{iana_names}->{'cesu-8'} or
297     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
298     $charset->{iana_names}->{'bocu-1'} or
299     $charset->{iana_names}->{'scsu'}) {
300     $onerror->(node => $doc,
301     type => 'character encoding:'.$charset_name,
302     level => $self->{must_level});
303     } else {
304     $onerror->(node => $doc,
305     type => 'character encoding:'.$charset_name,
306     level => $self->{good_level});
307     }
308 wakaba 1.52 } elsif ($doc->manakai_is_html) {
309     ## NOTE: MUST and SHOULD requirements above cannot be tested,
310     ## since the document has no input charset encoding information.
311     $onerror->(node => $doc,
312     type => 'character encoding:',
313     level => 'unsupported');
314 wakaba 1.51 }
315    
316     return $return;
317 wakaba 1.42 } # check_document
318 wakaba 1.1
319 wakaba 1.56 sub check_element ($$$;$) {
320     my ($self, $el, $onerror, $onsubdoc) = @_;
321 wakaba 1.42 $self = bless {}, $self unless ref $self;
322     $self->{onerror} = $onerror;
323 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
324     warn "A subdocument is not conformance-checked";
325     };
326 wakaba 1.2
327 wakaba 1.48 $self->{must_level} = 'm';
328     $self->{fact_level} = 'f';
329     $self->{should_level} = 's';
330 wakaba 1.51 $self->{good_level} = 'w';
331 wakaba 1.59 $self->{unsupported_lavel} = 'u';
332 wakaba 1.48
333 wakaba 1.50 $self->{pluses} = {};
334 wakaba 1.42 $self->{minuses} = {};
335     $self->{id} = {};
336     $self->{term} = {};
337     $self->{usemap} = [];
338     $self->{contextmenu} = [];
339     $self->{map} = {};
340     $self->{menu} = {};
341     $self->{has_link_type} = {};
342 wakaba 1.46 #$self->{has_uri_attr};
343     #$self->{has_hyperlink_element};
344 wakaba 1.51 #$self->{has_charset};
345 wakaba 1.57 #$self->{has_base};
346 wakaba 1.42 $self->{return} = {
347     class => {},
348     id => $self->{id}, table => [], term => $self->{term},
349     };
350 wakaba 1.4
351 wakaba 1.42 my @todo = ({type => 'element', node => $el});
352     while (@todo) {
353     my $todo = shift @todo;
354     if ($todo->{type} eq 'element') {
355     my $prefix = $todo->{node}->prefix;
356     if (defined $prefix and $prefix eq 'xmlns') {
357     $self->{onerror}
358     ->(node => $todo->{node}, level => 'NC',
359     type => 'Reserved Prefixes and Namespace Names:<xmlns:>');
360 wakaba 1.7 }
361 wakaba 1.42 my $nsuri = $todo->{node}->namespace_uri;
362     $nsuri = '' unless defined $nsuri;
363     unless ($Namespace->{$nsuri}->{loaded}) {
364     if ($Namespace->{$nsuri}->{module}) {
365     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
366     } else {
367     $Namespace->{$nsuri}->{loaded} = 1;
368 wakaba 1.1 }
369     }
370 wakaba 1.42 my $ln = $todo->{node}->manakai_local_name;
371     my $eldef = $Element->{$nsuri}->{$ln} ||
372     $Element->{$nsuri}->{''} ||
373     $ElementDefault;
374     $eldef->{attrs_checker}->($self, $todo);
375     my ($new_todos) = $eldef->{checker}->($self, $todo);
376     unshift @todo, @$new_todos;
377     } elsif ($todo->{type} eq 'element-attributes') {
378     my $prefix = $todo->{node}->prefix;
379     if (defined $prefix and $prefix eq 'xmlns') {
380     $self->{onerror}
381     ->(node => $todo->{node}, level => 'NC',
382     type => 'Reserved Prefixes and Namespace Names:<xmlns:>');
383     }
384     my $nsuri = $todo->{node}->namespace_uri;
385     $nsuri = '' unless defined $nsuri;
386     unless ($Namespace->{$nsuri}->{loaded}) {
387     if ($Namespace->{$nsuri}->{module}) {
388     eval qq{ require $Namespace->{$nsuri}->{module} } or die $@;
389 wakaba 1.1 } else {
390 wakaba 1.42 $Namespace->{$nsuri}->{loaded} = 1;
391 wakaba 1.1 }
392     }
393 wakaba 1.9 my $ln = $todo->{node}->manakai_local_name;
394     my $eldef = $Element->{$nsuri}->{$ln} ||
395     $Element->{$nsuri}->{''} ||
396     $ElementDefault;
397     $eldef->{attrs_checker}->($self, $todo);
398 wakaba 1.53 } elsif ($todo->{type} eq 'descendant') {
399     for my $key (keys %{$todo->{errors}}) {
400     unless ($todo->{flag}->{has_descendant}->{$key}) {
401     $todo->{errors}->{$key}->($self, $todo);
402     }
403     for my $key (keys %{$todo->{old_values}}) {
404     $todo->{flag}->{has_descendant}->{$key}
405     ||= $todo->{old_values}->{$key};
406     }
407     }
408 wakaba 1.50 } elsif ($todo->{type} eq 'plus' or $todo->{type} eq 'minus') {
409 wakaba 1.4 $self->_remove_minuses ($todo);
410 wakaba 1.30 } elsif ($todo->{type} eq 'code') {
411     $todo->{code}->();
412     } else {
413     die "$0: Internal error: Unsupported checking action type |$todo->{type}|";
414 wakaba 1.4 }
415 wakaba 1.1 }
416 wakaba 1.17
417     for (@{$self->{usemap}}) {
418     unless ($self->{map}->{$_->[0]}) {
419     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
420     }
421     }
422    
423 wakaba 1.32 for (@{$self->{contextmenu}}) {
424     unless ($self->{menu}->{$_->[0]}) {
425     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
426     }
427     }
428    
429 wakaba 1.50 delete $self->{pluses};
430 wakaba 1.17 delete $self->{minuses};
431     delete $self->{onerror};
432     delete $self->{id};
433     delete $self->{usemap};
434     delete $self->{map};
435 wakaba 1.33 return $self->{return};
436 wakaba 1.1 } # check_element
437    
438 wakaba 1.2 sub _add_minuses ($@) {
439     my $self = shift;
440     my $r = {};
441     for my $list (@_) {
442     for my $ns (keys %$list) {
443     for my $ln (keys %{$list->{$ns}}) {
444     unless ($self->{minuses}->{$ns}->{$ln}) {
445     $self->{minuses}->{$ns}->{$ln} = 1;
446     $r->{$ns}->{$ln} = 1;
447     }
448     }
449     }
450     }
451 wakaba 1.4 return {type => 'plus', list => $r};
452 wakaba 1.2 } # _add_minuses
453    
454 wakaba 1.50 sub _add_pluses ($@) {
455     my $self = shift;
456     my $r = {};
457     for my $list (@_) {
458     for my $ns (keys %$list) {
459     for my $ln (keys %{$list->{$ns}}) {
460     unless ($self->{pluses}->{$ns}->{$ln}) {
461     $self->{pluses}->{$ns}->{$ln} = 1;
462     $r->{$ns}->{$ln} = 1;
463     }
464     }
465     }
466     }
467     return {type => 'minus', list => $r};
468     } # _add_pluses
469    
470 wakaba 1.2 sub _remove_minuses ($$) {
471 wakaba 1.4 my ($self, $todo) = @_;
472 wakaba 1.50 if ($todo->{type} eq 'minus') {
473     for my $ns (keys %{$todo->{list}}) {
474     for my $ln (keys %{$todo->{list}->{$ns}}) {
475     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
476     }
477 wakaba 1.2 }
478 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
479     for my $ns (keys %{$todo->{list}}) {
480     for my $ln (keys %{$todo->{list}->{$ns}}) {
481     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
482     }
483     }
484     } else {
485     die "$0: Unknown +- type: $todo->{type}";
486 wakaba 1.2 }
487     1;
488     } # _remove_minuses
489    
490 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
491     ## undefined and implemented inconsistently; it is not a problem for
492     ## now, since no element belongs to both lists.
493    
494 wakaba 1.30 sub _check_get_children ($$$) {
495     my ($self, $node, $parent_todo) = @_;
496 wakaba 1.4 my $new_todos = [];
497 wakaba 1.2 my $sib = [];
498     TP: {
499     my $node_ns = $node->namespace_uri;
500     $node_ns = '' unless defined $node_ns;
501     my $node_ln = $node->manakai_local_name;
502 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
503     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
504     if ($parent_todo->{flag}->{in_head}) {
505     #
506     } else {
507     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
508     push @$sib, $end;
509    
510     unshift @$sib, @{$node->child_nodes};
511     push @$new_todos, {type => 'element-attributes', node => $node};
512     last TP;
513     }
514 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
515     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
516     unshift @$sib, @{$node->child_nodes};
517     push @$new_todos, {type => 'element-attributes', node => $node};
518     push @$new_todos,
519     {type => 'code',
520     code => sub {
521     $parent_todo->{flag}->{has_descendant}->{significant} = 0
522     if not $sig_flag;
523     }};
524     last TP;
525 wakaba 1.45 } else {
526     unshift @$sib, @{$node->child_nodes};
527     push @$new_todos, {type => 'element-attributes', node => $node};
528     last TP;
529 wakaba 1.2 }
530     }
531 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
532 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
533     unshift @$sib, @{$node->child_nodes};
534 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
535 wakaba 1.2 last TP;
536     } else {
537     my @cn = @{$node->child_nodes};
538     CN: while (@cn) {
539     my $cn = shift @cn;
540     my $cnt = $cn->node_type;
541     if ($cnt == 1) {
542 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
543     $cn_nsuri = '' unless defined $cn_nsuri;
544     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
545 wakaba 1.2 #
546     } else {
547     last CN;
548     }
549     } elsif ($cnt == 3 or $cnt == 4) {
550     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
551     last CN;
552     }
553     }
554     } # CN
555     unshift @$sib, @cn;
556     }
557 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
558     my @cn = @{$node->child_nodes};
559     CN: while (@cn) {
560     my $cn = shift @cn;
561     my $cnt = $cn->node_type;
562     if ($cnt == 1) {
563     my $cn_nsuri = $cn->namespace_uri;
564     $cn_nsuri = '' unless defined $cn_nsuri;
565     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
566     #
567     } else {
568     last CN;
569     }
570     } elsif ($cnt == 3 or $cnt == 4) {
571     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
572     last CN;
573     }
574     }
575     } # CN
576     unshift @$sib, @cn;
577 wakaba 1.2 }
578 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
579 wakaba 1.2 } # TP
580 wakaba 1.30
581     for my $new_todo (@$new_todos) {
582     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
583     }
584    
585 wakaba 1.4 return ($sib, $new_todos);
586 wakaba 1.2 } # _check_get_children
587    
588 wakaba 1.44 =head1 LICENSE
589    
590 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
591 wakaba 1.44
592     This library is free software; you can redistribute it
593     and/or modify it under the same terms as Perl itself.
594    
595     =cut
596    
597 wakaba 1.1 1;
598 wakaba 1.59 # $Date: 2008/02/17 11:04:08 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24