/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.66 - (hide annotations) (download)
Sun Feb 24 00:51:09 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.65: +11 -7 lines
++ whatpm/Whatpm/ChangeLog	24 Feb 2008 00:50:34 -0000
	* ContentChecker.pm (check_element): Fix |del|-and-significant
	problem by adding some more arguments.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Feb 2008 00:51:02 -0000
	* HTML.pm: Updated for |del|-and-significant problem.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.66 our $VERSION=do{my @r=(q$Revision: 1.65 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
13 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
14     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
15    
16 wakaba 1.42 my $Namespace = {
17 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
18 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
19     $XML_NS => {loaded => 1},
20     $XMLNS_NS => {loaded => 1},
21     };
22    
23     our $AttrChecker = {
24 wakaba 1.9 $XML_NS => {
25 wakaba 1.13 space => sub {
26     my ($self, $attr) = @_;
27     my $value = $attr->value;
28     if ($value eq 'default' or $value eq 'preserve') {
29     #
30     } else {
31     ## NOTE: An XML "error"
32 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
33     type => 'invalid attribute value');
34 wakaba 1.13 }
35     },
36     lang => sub {
37 wakaba 1.35 my ($self, $attr) = @_;
38 wakaba 1.47 my $value = $attr->value;
39     if ($value eq '') {
40     #
41     } else {
42     require Whatpm::LangTag;
43     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
44     my %opt = @_;
45     my $type = 'LangTag:'.$opt{type};
46     $type .= ':' . $opt{subtag} if defined $opt{subtag};
47     $self->{onerror}->(node => $attr, type => $type,
48     value => $opt{value}, level => $opt{level});
49     });
50     }
51    
52 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
53     ## as defined by [IETF RFC 3066], Tags for the Identification
54     ## of Languages, or its successor; in addition, the empty string
55     ## may be specified." ("may" in lower case)
56 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
57     ## allowed today?
58    
59     ## TODO: test data
60    
61 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
62 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
63 wakaba 1.35 ## TODO: Test data...
64     }
65 wakaba 1.13 },
66     base => sub {
67     my ($self, $attr) = @_;
68     my $value = $attr->value;
69     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
70     $self->{onerror}->(node => $attr,
71 wakaba 1.33 type => 'invalid attribute value');
72 wakaba 1.13 }
73 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
74     ## no author requirement on conformance in the XML Base specification.
75 wakaba 1.13 },
76     id => sub {
77     my ($self, $attr) = @_;
78     my $value = $attr->value;
79     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
80     $value =~ s/^\x20//;
81     $value =~ s/\x20$//;
82     ## TODO: NCName in XML 1.0 or 1.1
83     ## TODO: declared type is ID?
84 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
85     $self->{onerror}->(node => $attr, level => 'error',
86     type => 'duplicate ID');
87 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
88 wakaba 1.13 } else {
89 wakaba 1.37 $self->{id}->{$value} = [$attr];
90 wakaba 1.13 }
91     },
92 wakaba 1.9 },
93     $XMLNS_NS => {
94 wakaba 1.13 '' => sub {
95     my ($self, $attr) = @_;
96     my $ln = $attr->manakai_local_name;
97     my $value = $attr->value;
98     if ($value eq $XML_NS and $ln ne 'xml') {
99     $self->{onerror}
100 wakaba 1.33 ->(node => $attr, level => 'NC',
101     type => 'Reserved Prefixes and Namespace Names:=xml');
102 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
103     $self->{onerror}
104 wakaba 1.33 ->(node => $attr, level => 'NC',
105     type => 'Reserved Prefixes and Namespace Names:=xmlns');
106 wakaba 1.13 }
107     if ($ln eq 'xml' and $value ne $XML_NS) {
108     $self->{onerror}
109 wakaba 1.33 ->(node => $attr, level => 'NC',
110     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
111 wakaba 1.13 } elsif ($ln eq 'xmlns') {
112     $self->{onerror}
113 wakaba 1.33 ->(node => $attr, level => 'NC',
114     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
115 wakaba 1.13 }
116     ## TODO: If XML 1.0 and empty
117     },
118     xmlns => sub {
119     my ($self, $attr) = @_;
120     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
121     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
122 wakaba 1.18 ## TODO: relative references are deprecated
123 wakaba 1.13 my $value = $attr->value;
124     if ($value eq $XML_NS) {
125     $self->{onerror}
126 wakaba 1.33 ->(node => $attr, level => 'NC',
127     type => 'Reserved Prefixes and Namespace Names:=xml');
128 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
129     $self->{onerror}
130 wakaba 1.33 ->(node => $attr, level => 'NC',
131     type => 'Reserved Prefixes and Namespace Names:=xmlns');
132 wakaba 1.13 }
133     },
134 wakaba 1.9 },
135     };
136    
137 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
138 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
139     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
140     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
141     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
142    
143 wakaba 1.60 our %AnyChecker = (
144     check_start => sub { },
145     check_attrs => sub {
146     my ($self, $item, $element_state) = @_;
147     for my $attr (@{$item->{node}->attributes}) {
148 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
149     $attr_ns = '' unless defined $attr_ns;
150     my $attr_ln = $attr->manakai_local_name;
151     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
152 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
153 wakaba 1.9 if ($checker) {
154     $checker->($self, $attr);
155 wakaba 1.17 } else {
156 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
157     type => 'attribute');
158 wakaba 1.9 }
159     }
160     },
161 wakaba 1.60 check_child_element => sub {
162     my ($self, $item, $child_el, $child_nsuri, $child_ln,
163     $child_is_transparent, $element_state) = @_;
164     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
165     $self->{onerror}->(node => $child_el,
166     type => 'element not allowed:minus',
167     level => $self->{must_level});
168     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
169     #
170     } else {
171     #
172     }
173     },
174     check_child_text => sub { },
175     check_end => sub {
176     my ($self, $item, $element_state) = @_;
177     if ($element_state->{has_significant}) {
178 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
179 wakaba 1.60 }
180     },
181     );
182    
183     our $ElementDefault = {
184     %AnyChecker,
185     check_start => sub {
186     my ($self, $item, $element_state) = @_;
187     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
188     type => 'element');
189     },
190 wakaba 1.1 };
191    
192 wakaba 1.60 our $HTMLEmbeddedContent = {
193     ## NOTE: All embedded content is also phrasing content.
194     $HTML_NS => {
195     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
196     canvas => 1,
197     },
198     ## NOTE: MathML is mentioned in the HTML5 spec.
199     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
200     ## NOTE: SVG is mentioned in the HTML5 spec.
201     q<http://www.w3.org/2000/svg> => {svg => 1},
202     ## NOTE: Foreign elements with content (but no metadata) are
203     ## embedded content.
204     };
205    
206 wakaba 1.7 my $HTMLTransparentElements = {
207 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
208 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
209     ## and not in |head|.
210 wakaba 1.7 };
211    
212 wakaba 1.61 my $HTMLSemiTransparentElements = {
213     $HTML_NS => {object => 1, video => 1, audio => 1},
214     };
215 wakaba 1.57
216 wakaba 1.42 our $Element = {};
217 wakaba 1.7
218 wakaba 1.56 sub check_document ($$$;$) {
219     my ($self, $doc, $onerror, $onsubdoc) = @_;
220 wakaba 1.42 $self = bless {}, $self unless ref $self;
221     $self->{onerror} = $onerror;
222 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
223     warn "A subdocument is not conformance-checked";
224     };
225 wakaba 1.1
226 wakaba 1.48 $self->{must_level} = 'm';
227     $self->{fact_level} = 'f';
228     $self->{should_level} = 's';
229 wakaba 1.51 $self->{good_level} = 'w';
230 wakaba 1.59 $self->{unsupported_lavel} = 'u';
231 wakaba 1.48
232 wakaba 1.42 my $docel = $doc->document_element;
233     unless (defined $docel) {
234     ## ISSUE: Should we check content of Document node?
235     $onerror->(node => $doc, type => 'no document element');
236     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
237     return {
238     class => {},
239     id => {}, table => [], term => {},
240     };
241 wakaba 1.1 }
242    
243 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
244 wakaba 1.1
245 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
246     $docel_nsuri = '' unless defined $docel_nsuri;
247 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
248     if ($Namespace->{$docel_nsuri}->{module}) {
249     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
250     } else {
251     $Namespace->{$docel_nsuri}->{loaded} = 1;
252     }
253     }
254 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
255     $Element->{$docel_nsuri}->{''} ||
256     $ElementDefault;
257     if ($docel_def->{is_root}) {
258     #
259 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
260     unless ($doc->manakai_is_html) {
261     #
262     } else {
263     $onerror->(node => $docel, type => 'element not allowed:root:xml');
264     }
265 wakaba 1.42 } else {
266 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
267 wakaba 1.1 }
268    
269 wakaba 1.42 ## TODO: Check for other items other than document element
270     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
271 wakaba 1.2
272 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
273 wakaba 1.51
274 wakaba 1.52 ## TODO: Test for these checks are necessary.
275 wakaba 1.51 my $charset_name = $doc->input_encoding;
276     if (defined $charset_name) {
277     require Message::Charset::Info;
278     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
279    
280     if ($doc->manakai_is_html and
281     not $doc->manakai_has_bom and
282     not defined $doc->manakai_charset) {
283     unless ($charset->{is_html_ascii_superset}) {
284     $onerror->(node => $doc, level => $self->{must_level},
285     type => 'non ascii superset:'.$charset_name);
286     }
287    
288     if (not $self->{has_charset} and
289     not $charset->{iana_names}->{'us-ascii'}) {
290     $onerror->(node => $doc, level => $self->{must_level},
291     type => 'no character encoding declaration:'.$charset_name);
292     }
293     }
294    
295     if ($charset->{iana_names}->{'utf-8'}) {
296     #
297     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
298     $charset->{iana_names}->{'x-jis0208'} or
299     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
300     $charset->{is_ebcdic_based}) {
301     $onerror->(node => $doc,
302     type => 'character encoding:'.$charset_name,
303     level => $self->{should_level});
304     } elsif ($charset->{iana_names}->{'cesu-8'} or
305     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
306     $charset->{iana_names}->{'bocu-1'} or
307     $charset->{iana_names}->{'scsu'}) {
308     $onerror->(node => $doc,
309     type => 'character encoding:'.$charset_name,
310     level => $self->{must_level});
311     } else {
312     $onerror->(node => $doc,
313     type => 'character encoding:'.$charset_name,
314     level => $self->{good_level});
315     }
316 wakaba 1.52 } elsif ($doc->manakai_is_html) {
317     ## NOTE: MUST and SHOULD requirements above cannot be tested,
318     ## since the document has no input charset encoding information.
319     $onerror->(node => $doc,
320     type => 'character encoding:',
321     level => 'unsupported');
322 wakaba 1.51 }
323    
324     return $return;
325 wakaba 1.42 } # check_document
326 wakaba 1.1
327 wakaba 1.56 sub check_element ($$$;$) {
328     my ($self, $el, $onerror, $onsubdoc) = @_;
329 wakaba 1.42 $self = bless {}, $self unless ref $self;
330     $self->{onerror} = $onerror;
331 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
332     warn "A subdocument is not conformance-checked";
333     };
334 wakaba 1.2
335 wakaba 1.48 $self->{must_level} = 'm';
336     $self->{fact_level} = 'f';
337     $self->{should_level} = 's';
338 wakaba 1.51 $self->{good_level} = 'w';
339 wakaba 1.59 $self->{unsupported_lavel} = 'u';
340 wakaba 1.48
341 wakaba 1.61 $self->{plus_elements} = {};
342     $self->{minus_elements} = {};
343 wakaba 1.42 $self->{id} = {};
344     $self->{term} = {};
345     $self->{usemap} = [];
346     $self->{contextmenu} = [];
347     $self->{map} = {};
348     $self->{menu} = {};
349     $self->{has_link_type} = {};
350 wakaba 1.60 $self->{flag} = {};
351 wakaba 1.46 #$self->{has_uri_attr};
352     #$self->{has_hyperlink_element};
353 wakaba 1.51 #$self->{has_charset};
354 wakaba 1.57 #$self->{has_base};
355 wakaba 1.42 $self->{return} = {
356     class => {},
357     id => $self->{id}, table => [], term => $self->{term},
358     };
359 wakaba 1.4
360 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
361 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
362 wakaba 1.60 while (@item) {
363     my $item = shift @item;
364     if (ref $item eq 'ARRAY') {
365     my $code = shift @$item;
366     next unless $code;## TODO: temp.
367     $code->(@$item);
368     } elsif ($item->{type} eq 'element') {
369     my $el_nsuri = $item->{node}->namespace_uri;
370     $el_nsuri = '' unless defined $el_nsuri;
371     my $el_ln = $item->{node}->manakai_local_name;
372    
373     unless ($Namespace->{$el_nsuri}->{loaded}) {
374     if ($Namespace->{$el_nsuri}->{module}) {
375     eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
376 wakaba 1.42 } else {
377 wakaba 1.60 $Namespace->{$el_nsuri}->{loaded} = 1;
378 wakaba 1.1 }
379     }
380 wakaba 1.63
381     my $element_state = {};
382 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
383     $Element->{$el_nsuri}->{''} ||
384 wakaba 1.42 $ElementDefault;
385 wakaba 1.61 my $content_def = $item->{transparent}
386     ? $item->{parent_def} || $eldef : $eldef;
387 wakaba 1.63 my $content_state = $item->{transparent}
388 wakaba 1.65 ? $item->{parent_def}
389     ? $item->{parent_state} || $element_state : $element_state
390     : $element_state;
391 wakaba 1.60
392     my @new_item;
393     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
394     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
395 wakaba 1.61
396 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
397     while (@child) {
398     my $child = shift @child;
399     my $child_nt = $child->node_type;
400     if ($child_nt == 1) { # ELEMENT_NODE
401     my $child_nsuri = $child->namespace_uri;
402     $child_nsuri = '' unless defined $child_nsuri;
403     my $child_ln = $child->manakai_local_name;
404     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
405     not (($self->{flag}->{in_head} or
406 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
407     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
408 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
409     $self, $item, $child,
410 wakaba 1.66 $child_nsuri, $child_ln, 1,
411     $content_state, $element_state];
412 wakaba 1.60 push @new_item, {type => 'element', node => $child,
413 wakaba 1.65 parent_state => $content_state,
414 wakaba 1.61 parent_def => $content_def,
415 wakaba 1.66 real_parent_state => $element_state,
416 wakaba 1.60 transparent => 1};
417     } else {
418 wakaba 1.65 if ($item->{parent_def} and # has parent
419     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
420 wakaba 1.61 if ($el_ln eq 'object') {
421     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
422     #
423     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
424     #
425     } else {
426 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
427 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
428 wakaba 1.62 }
429     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
430     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
431     #
432     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
433     $element_state->{has_source} = 1;
434     } else {
435     $content_def = $item->{parent_def} || $content_def;
436 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
437 wakaba 1.61 }
438     }
439     }
440    
441 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
442     $self, $item, $child,
443 wakaba 1.64 $child_nsuri, $child_ln,
444     $HTMLSemiTransparentElements
445     ->{$child_nsuri}->{$child_ln},
446 wakaba 1.66 $content_state, $element_state];
447 wakaba 1.60 push @new_item, {type => 'element', node => $child,
448 wakaba 1.65 parent_def => $content_def,
449 wakaba 1.66 real_parent_state => $element_state,
450 wakaba 1.65 parent_state => $content_state};
451 wakaba 1.60 }
452    
453     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
454     $element_state->{has_significant} = 1;
455     }
456     } elsif ($child_nt == 3 or # TEXT_NODE
457     $child_nt == 4) { # CDATA_SECTION_NODE
458     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
459     push @new_item, [$content_def->{check_child_text},
460     $self, $item, $child, $has_significant,
461 wakaba 1.66 $content_state, $element_state];
462     $element_state->{has_significant} ||= $has_significant;
463 wakaba 1.61 if ($has_significant and
464     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
465     $content_def = $item->{parent_def} || $content_def;
466     }
467 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
468     push @child, @{$child->child_nodes};
469 wakaba 1.1 }
470 wakaba 1.60 ## TODO: PI_NODE
471     ## TODO: Unknown node type
472 wakaba 1.1 }
473 wakaba 1.60
474     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
475    
476     unshift @item, @new_item;
477 wakaba 1.30 } else {
478 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
479 wakaba 1.4 }
480 wakaba 1.1 }
481 wakaba 1.17
482     for (@{$self->{usemap}}) {
483     unless ($self->{map}->{$_->[0]}) {
484     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
485     }
486     }
487    
488 wakaba 1.32 for (@{$self->{contextmenu}}) {
489     unless ($self->{menu}->{$_->[0]}) {
490     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
491     }
492     }
493    
494 wakaba 1.61 delete $self->{plus_elements};
495     delete $self->{minus_elements};
496 wakaba 1.17 delete $self->{onerror};
497     delete $self->{id};
498     delete $self->{usemap};
499     delete $self->{map};
500 wakaba 1.33 return $self->{return};
501 wakaba 1.1 } # check_element
502    
503 wakaba 1.60 sub _add_minus_elements ($$@) {
504     my $self = shift;
505     my $element_state = shift;
506     for my $elements (@_) {
507     for my $nsuri (keys %$elements) {
508     for my $ln (keys %{$elements->{$nsuri}}) {
509     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
510     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
511     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
512     }
513     }
514     }
515     }
516     } # _add_minus_elements
517    
518     sub _remove_minus_elements ($$) {
519     my $self = shift;
520     my $element_state = shift;
521     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
522     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
523     delete $self->{minus_elements}->{$nsuri}->{$ln};
524     }
525     }
526     } # _remove_minus_elements
527    
528     sub _add_plus_elements ($$@) {
529     my $self = shift;
530     my $element_state = shift;
531     for my $elements (@_) {
532     for my $nsuri (keys %$elements) {
533     for my $ln (keys %{$elements->{$nsuri}}) {
534     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
535     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
536     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
537     }
538     }
539     }
540     }
541     } # _add_plus_elements
542    
543     sub _remove_plus_elements ($$) {
544     my $self = shift;
545     my $element_state = shift;
546     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
547     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
548     delete $self->{plus_elements}->{$nsuri}->{$ln};
549     }
550     }
551     } # _remove_plus_elements
552    
553 wakaba 1.2 sub _add_minuses ($@) {
554     my $self = shift;
555     my $r = {};
556     for my $list (@_) {
557     for my $ns (keys %$list) {
558     for my $ln (keys %{$list->{$ns}}) {
559     unless ($self->{minuses}->{$ns}->{$ln}) {
560     $self->{minuses}->{$ns}->{$ln} = 1;
561     $r->{$ns}->{$ln} = 1;
562     }
563     }
564     }
565     }
566 wakaba 1.4 return {type => 'plus', list => $r};
567 wakaba 1.2 } # _add_minuses
568    
569 wakaba 1.50 sub _add_pluses ($@) {
570     my $self = shift;
571     my $r = {};
572     for my $list (@_) {
573     for my $ns (keys %$list) {
574     for my $ln (keys %{$list->{$ns}}) {
575     unless ($self->{pluses}->{$ns}->{$ln}) {
576     $self->{pluses}->{$ns}->{$ln} = 1;
577     $r->{$ns}->{$ln} = 1;
578     }
579     }
580     }
581     }
582     return {type => 'minus', list => $r};
583     } # _add_pluses
584    
585 wakaba 1.2 sub _remove_minuses ($$) {
586 wakaba 1.4 my ($self, $todo) = @_;
587 wakaba 1.50 if ($todo->{type} eq 'minus') {
588     for my $ns (keys %{$todo->{list}}) {
589     for my $ln (keys %{$todo->{list}->{$ns}}) {
590     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
591     }
592 wakaba 1.2 }
593 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
594     for my $ns (keys %{$todo->{list}}) {
595     for my $ln (keys %{$todo->{list}->{$ns}}) {
596     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
597     }
598     }
599     } else {
600     die "$0: Unknown +- type: $todo->{type}";
601 wakaba 1.2 }
602     1;
603     } # _remove_minuses
604    
605 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
606     ## undefined and implemented inconsistently; it is not a problem for
607     ## now, since no element belongs to both lists.
608    
609 wakaba 1.30 sub _check_get_children ($$$) {
610     my ($self, $node, $parent_todo) = @_;
611 wakaba 1.4 my $new_todos = [];
612 wakaba 1.2 my $sib = [];
613     TP: {
614     my $node_ns = $node->namespace_uri;
615     $node_ns = '' unless defined $node_ns;
616     my $node_ln = $node->manakai_local_name;
617 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
618     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
619     if ($parent_todo->{flag}->{in_head}) {
620     #
621     } else {
622     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
623     push @$sib, $end;
624    
625     unshift @$sib, @{$node->child_nodes};
626     push @$new_todos, {type => 'element-attributes', node => $node};
627     last TP;
628     }
629 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
630     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
631     unshift @$sib, @{$node->child_nodes};
632     push @$new_todos, {type => 'element-attributes', node => $node};
633     push @$new_todos,
634     {type => 'code',
635     code => sub {
636     $parent_todo->{flag}->{has_descendant}->{significant} = 0
637     if not $sig_flag;
638     }};
639     last TP;
640 wakaba 1.45 } else {
641     unshift @$sib, @{$node->child_nodes};
642     push @$new_todos, {type => 'element-attributes', node => $node};
643     last TP;
644 wakaba 1.2 }
645     }
646 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
647 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
648     unshift @$sib, @{$node->child_nodes};
649 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
650 wakaba 1.2 last TP;
651     } else {
652     my @cn = @{$node->child_nodes};
653     CN: while (@cn) {
654     my $cn = shift @cn;
655     my $cnt = $cn->node_type;
656     if ($cnt == 1) {
657 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
658     $cn_nsuri = '' unless defined $cn_nsuri;
659     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
660 wakaba 1.2 #
661     } else {
662     last CN;
663     }
664     } elsif ($cnt == 3 or $cnt == 4) {
665     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
666     last CN;
667     }
668     }
669     } # CN
670     unshift @$sib, @cn;
671     }
672 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
673     my @cn = @{$node->child_nodes};
674     CN: while (@cn) {
675     my $cn = shift @cn;
676     my $cnt = $cn->node_type;
677     if ($cnt == 1) {
678     my $cn_nsuri = $cn->namespace_uri;
679     $cn_nsuri = '' unless defined $cn_nsuri;
680     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
681     #
682     } else {
683     last CN;
684     }
685     } elsif ($cnt == 3 or $cnt == 4) {
686     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
687     last CN;
688     }
689     }
690     } # CN
691     unshift @$sib, @cn;
692 wakaba 1.2 }
693 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
694 wakaba 1.2 } # TP
695 wakaba 1.30
696     for my $new_todo (@$new_todos) {
697     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
698     }
699    
700 wakaba 1.4 return ($sib, $new_todos);
701 wakaba 1.2 } # _check_get_children
702    
703 wakaba 1.44 =head1 LICENSE
704    
705 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
706 wakaba 1.44
707     This library is free software; you can redistribute it
708     and/or modify it under the same terms as Perl itself.
709    
710     =cut
711    
712 wakaba 1.1 1;
713 wakaba 1.66 # $Date: 2008/02/23 16:09:20 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24