/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.68 - (hide annotations) (download)
Sun Feb 24 07:51:19 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.67: +21 -2 lines
++ whatpm/t/ChangeLog	24 Feb 2008 07:51:13 -0000
	* content-model-1.dat, content-model-2.dat: Some test
	results are updated.  New test data on non-standard
	attributes are added.  Note that some test results do
	still need to be updated.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	24 Feb 2008 07:47:32 -0000
	* ContentChecker.pm (_attr_status_info): New internal method.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Feb 2008 07:50:04 -0000
	* HTML.pm: Standardized status for attributes are added (HTML5
	info are not added yet).  Make element or attribute not defined
	by HTML5 spec as a conformance error (though it is not clearly
	defined by HTML5 spec as non-conforming).  Note that more work
	is needed for non-standard attribute thing, since the current
	implementation prevents non-standard attribute from being
	syntactically checked (otherwise it is treated as if a
	standard attribute).

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3 wakaba 1.68 our $VERSION=do{my @r=(q$Revision: 1.67 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 require Whatpm::URIChecker;
6    
7 wakaba 1.13 ## ISSUE: How XML and XML Namespaces conformance can (or cannot)
8     ## be applied to an in-memory representation (i.e. DOM)?
9    
10 wakaba 1.50 ## TODO: Conformance of an HTML document with non-html root element.
11    
12 wakaba 1.67 sub FEATURE_STATUS_REC () { 0b1 } ## Interoperable standard
13     sub FEATURE_STATUS_CR () { 0b10 } ## Call for implementation
14     sub FEATURE_STATUS_LC () { 0b100 } ## Last call for comments
15     sub FEATURE_STATUS_WD () { 0b1000 } ## Working or editor's draft
16    
17 wakaba 1.42 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
18 wakaba 1.9 my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
19     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
20    
21 wakaba 1.42 my $Namespace = {
22 wakaba 1.43 q<http://www.w3.org/2005/Atom> => {module => 'Whatpm::ContentChecker::Atom'},
23 wakaba 1.42 $HTML_NS => {module => 'Whatpm::ContentChecker::HTML'},
24     $XML_NS => {loaded => 1},
25     $XMLNS_NS => {loaded => 1},
26     };
27    
28     our $AttrChecker = {
29 wakaba 1.9 $XML_NS => {
30 wakaba 1.13 space => sub {
31     my ($self, $attr) = @_;
32     my $value = $attr->value;
33     if ($value eq 'default' or $value eq 'preserve') {
34     #
35     } else {
36     ## NOTE: An XML "error"
37 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'error',
38     type => 'invalid attribute value');
39 wakaba 1.13 }
40     },
41     lang => sub {
42 wakaba 1.35 my ($self, $attr) = @_;
43 wakaba 1.47 my $value = $attr->value;
44     if ($value eq '') {
45     #
46     } else {
47     require Whatpm::LangTag;
48     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
49     my %opt = @_;
50     my $type = 'LangTag:'.$opt{type};
51     $type .= ':' . $opt{subtag} if defined $opt{subtag};
52     $self->{onerror}->(node => $attr, type => $type,
53     value => $opt{value}, level => $opt{level});
54     });
55     }
56    
57 wakaba 1.13 ## NOTE: "The values of the attribute are language identifiers
58     ## as defined by [IETF RFC 3066], Tags for the Identification
59     ## of Languages, or its successor; in addition, the empty string
60     ## may be specified." ("may" in lower case)
61 wakaba 1.47 ## NOTE: Is an RFC 3066-valid (but RFC 4647-invalid) language tag
62     ## allowed today?
63    
64     ## TODO: test data
65    
66 wakaba 1.35 if ($attr->owner_document->manakai_is_html) { # MUST NOT
67 wakaba 1.36 $self->{onerror}->(node => $attr, type => 'in HTML:xml:lang');
68 wakaba 1.35 ## TODO: Test data...
69     }
70 wakaba 1.13 },
71     base => sub {
72     my ($self, $attr) = @_;
73     my $value = $attr->value;
74     if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters?
75     $self->{onerror}->(node => $attr,
76 wakaba 1.33 type => 'invalid attribute value');
77 wakaba 1.13 }
78 wakaba 1.18 ## NOTE: Conformance to URI standard is not checked since there is
79     ## no author requirement on conformance in the XML Base specification.
80 wakaba 1.13 },
81     id => sub {
82     my ($self, $attr) = @_;
83     my $value = $attr->value;
84     $value =~ s/[\x09\x0A\x0D\x20]+/ /g;
85     $value =~ s/^\x20//;
86     $value =~ s/\x20$//;
87     ## TODO: NCName in XML 1.0 or 1.1
88     ## TODO: declared type is ID?
89 wakaba 1.33 if ($self->{id}->{$value}) { ## NOTE: An xml:id error
90     $self->{onerror}->(node => $attr, level => 'error',
91     type => 'duplicate ID');
92 wakaba 1.37 push @{$self->{id}->{$value}}, $attr;
93 wakaba 1.13 } else {
94 wakaba 1.37 $self->{id}->{$value} = [$attr];
95 wakaba 1.13 }
96     },
97 wakaba 1.9 },
98     $XMLNS_NS => {
99 wakaba 1.13 '' => sub {
100     my ($self, $attr) = @_;
101     my $ln = $attr->manakai_local_name;
102     my $value = $attr->value;
103     if ($value eq $XML_NS and $ln ne 'xml') {
104     $self->{onerror}
105 wakaba 1.33 ->(node => $attr, level => 'NC',
106     type => 'Reserved Prefixes and Namespace Names:=xml');
107 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
108     $self->{onerror}
109 wakaba 1.33 ->(node => $attr, level => 'NC',
110     type => 'Reserved Prefixes and Namespace Names:=xmlns');
111 wakaba 1.13 }
112     if ($ln eq 'xml' and $value ne $XML_NS) {
113     $self->{onerror}
114 wakaba 1.33 ->(node => $attr, level => 'NC',
115     type => 'Reserved Prefixes and Namespace Names:xmlns:xml=');
116 wakaba 1.13 } elsif ($ln eq 'xmlns') {
117     $self->{onerror}
118 wakaba 1.33 ->(node => $attr, level => 'NC',
119     type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns=');
120 wakaba 1.13 }
121     ## TODO: If XML 1.0 and empty
122     },
123     xmlns => sub {
124     my ($self, $attr) = @_;
125     ## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string
126     ## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string
127 wakaba 1.18 ## TODO: relative references are deprecated
128 wakaba 1.13 my $value = $attr->value;
129     if ($value eq $XML_NS) {
130     $self->{onerror}
131 wakaba 1.33 ->(node => $attr, level => 'NC',
132     type => 'Reserved Prefixes and Namespace Names:=xml');
133 wakaba 1.13 } elsif ($value eq $XMLNS_NS) {
134     $self->{onerror}
135 wakaba 1.33 ->(node => $attr, level => 'NC',
136     type => 'Reserved Prefixes and Namespace Names:=xmlns');
137 wakaba 1.13 }
138     },
139 wakaba 1.9 },
140     };
141    
142 wakaba 1.14 ## ISSUE: Should we really allow these attributes?
143 wakaba 1.13 $AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space};
144     $AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang};
145     $AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base};
146     $AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id};
147    
148 wakaba 1.60 our %AnyChecker = (
149     check_start => sub { },
150     check_attrs => sub {
151     my ($self, $item, $element_state) = @_;
152     for my $attr (@{$item->{node}->attributes}) {
153 wakaba 1.9 my $attr_ns = $attr->namespace_uri;
154     $attr_ns = '' unless defined $attr_ns;
155     my $attr_ln = $attr->manakai_local_name;
156     my $checker = $AttrChecker->{$attr_ns}->{$attr_ln}
157 wakaba 1.60 || $AttrChecker->{$attr_ns}->{''};
158 wakaba 1.9 if ($checker) {
159     $checker->($self, $attr);
160 wakaba 1.17 } else {
161 wakaba 1.33 $self->{onerror}->(node => $attr, level => 'unsupported',
162     type => 'attribute');
163 wakaba 1.9 }
164     }
165     },
166 wakaba 1.60 check_child_element => sub {
167     my ($self, $item, $child_el, $child_nsuri, $child_ln,
168     $child_is_transparent, $element_state) = @_;
169     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
170     $self->{onerror}->(node => $child_el,
171     type => 'element not allowed:minus',
172     level => $self->{must_level});
173     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
174     #
175     } else {
176     #
177     }
178     },
179     check_child_text => sub { },
180     check_end => sub {
181     my ($self, $item, $element_state) = @_;
182     if ($element_state->{has_significant}) {
183 wakaba 1.66 $item->{real_parent_state}->{has_significant} = 1;
184 wakaba 1.60 }
185     },
186     );
187    
188     our $ElementDefault = {
189     %AnyChecker,
190     check_start => sub {
191     my ($self, $item, $element_state) = @_;
192     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
193     type => 'element');
194     },
195 wakaba 1.1 };
196    
197 wakaba 1.60 our $HTMLEmbeddedContent = {
198     ## NOTE: All embedded content is also phrasing content.
199     $HTML_NS => {
200     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
201     canvas => 1,
202     },
203     ## NOTE: MathML is mentioned in the HTML5 spec.
204     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
205     ## NOTE: SVG is mentioned in the HTML5 spec.
206     q<http://www.w3.org/2000/svg> => {svg => 1},
207     ## NOTE: Foreign elements with content (but no metadata) are
208     ## embedded content.
209     };
210    
211 wakaba 1.7 my $HTMLTransparentElements = {
212 wakaba 1.57 $HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/},
213 wakaba 1.29 ## NOTE: |html:noscript| is transparent if scripting is disabled
214     ## and not in |head|.
215 wakaba 1.7 };
216    
217 wakaba 1.61 my $HTMLSemiTransparentElements = {
218     $HTML_NS => {object => 1, video => 1, audio => 1},
219     };
220 wakaba 1.57
221 wakaba 1.42 our $Element = {};
222 wakaba 1.7
223 wakaba 1.56 sub check_document ($$$;$) {
224     my ($self, $doc, $onerror, $onsubdoc) = @_;
225 wakaba 1.42 $self = bless {}, $self unless ref $self;
226     $self->{onerror} = $onerror;
227 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
228     warn "A subdocument is not conformance-checked";
229     };
230 wakaba 1.1
231 wakaba 1.48 $self->{must_level} = 'm';
232     $self->{fact_level} = 'f';
233     $self->{should_level} = 's';
234 wakaba 1.51 $self->{good_level} = 'w';
235 wakaba 1.67 $self->{info_level} = 'i';
236 wakaba 1.59 $self->{unsupported_lavel} = 'u';
237 wakaba 1.48
238 wakaba 1.42 my $docel = $doc->document_element;
239     unless (defined $docel) {
240     ## ISSUE: Should we check content of Document node?
241     $onerror->(node => $doc, type => 'no document element');
242     ## ISSUE: Is this non-conforming (to what spec)? Or just a warning?
243     return {
244     class => {},
245     id => {}, table => [], term => {},
246     };
247 wakaba 1.1 }
248    
249 wakaba 1.42 ## ISSUE: Unexpanded entity references and HTML5 conformance
250 wakaba 1.1
251 wakaba 1.42 my $docel_nsuri = $docel->namespace_uri;
252     $docel_nsuri = '' unless defined $docel_nsuri;
253 wakaba 1.43 unless ($Namespace->{$docel_nsuri}->{loaded}) {
254     if ($Namespace->{$docel_nsuri}->{module}) {
255     eval qq{ require $Namespace->{$docel_nsuri}->{module} } or die $@;
256     } else {
257     $Namespace->{$docel_nsuri}->{loaded} = 1;
258     }
259     }
260 wakaba 1.42 my $docel_def = $Element->{$docel_nsuri}->{$docel->manakai_local_name} ||
261     $Element->{$docel_nsuri}->{''} ||
262     $ElementDefault;
263     if ($docel_def->{is_root}) {
264     #
265 wakaba 1.50 } elsif ($docel_def->{is_xml_root}) {
266     unless ($doc->manakai_is_html) {
267     #
268     } else {
269     $onerror->(node => $docel, type => 'element not allowed:root:xml');
270     }
271 wakaba 1.42 } else {
272 wakaba 1.49 $onerror->(node => $docel, type => 'element not allowed:root');
273 wakaba 1.1 }
274    
275 wakaba 1.42 ## TODO: Check for other items other than document element
276     ## (second (errorous) element, text nodes, PI nodes, doctype nodes)
277 wakaba 1.2
278 wakaba 1.56 my $return = $self->check_element ($docel, $onerror, $onsubdoc);
279 wakaba 1.51
280 wakaba 1.52 ## TODO: Test for these checks are necessary.
281 wakaba 1.51 my $charset_name = $doc->input_encoding;
282     if (defined $charset_name) {
283     require Message::Charset::Info;
284     my $charset = $Message::Charset::Info::IANACharset->{$charset_name};
285    
286     if ($doc->manakai_is_html and
287     not $doc->manakai_has_bom and
288     not defined $doc->manakai_charset) {
289     unless ($charset->{is_html_ascii_superset}) {
290     $onerror->(node => $doc, level => $self->{must_level},
291     type => 'non ascii superset:'.$charset_name);
292     }
293    
294     if (not $self->{has_charset} and
295     not $charset->{iana_names}->{'us-ascii'}) {
296     $onerror->(node => $doc, level => $self->{must_level},
297     type => 'no character encoding declaration:'.$charset_name);
298     }
299     }
300    
301     if ($charset->{iana_names}->{'utf-8'}) {
302     #
303     } elsif ($charset->{iana_names}->{'jis_x0212-1990'} or
304     $charset->{iana_names}->{'x-jis0208'} or
305     $charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE?
306     $charset->{is_ebcdic_based}) {
307     $onerror->(node => $doc,
308     type => 'character encoding:'.$charset_name,
309     level => $self->{should_level});
310     } elsif ($charset->{iana_names}->{'cesu-8'} or
311     $charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7?
312     $charset->{iana_names}->{'bocu-1'} or
313     $charset->{iana_names}->{'scsu'}) {
314     $onerror->(node => $doc,
315     type => 'character encoding:'.$charset_name,
316     level => $self->{must_level});
317     } else {
318     $onerror->(node => $doc,
319     type => 'character encoding:'.$charset_name,
320     level => $self->{good_level});
321     }
322 wakaba 1.52 } elsif ($doc->manakai_is_html) {
323     ## NOTE: MUST and SHOULD requirements above cannot be tested,
324     ## since the document has no input charset encoding information.
325     $onerror->(node => $doc,
326     type => 'character encoding:',
327     level => 'unsupported');
328 wakaba 1.51 }
329    
330     return $return;
331 wakaba 1.42 } # check_document
332 wakaba 1.1
333 wakaba 1.56 sub check_element ($$$;$) {
334     my ($self, $el, $onerror, $onsubdoc) = @_;
335 wakaba 1.42 $self = bless {}, $self unless ref $self;
336     $self->{onerror} = $onerror;
337 wakaba 1.56 $self->{onsubdoc} = $onsubdoc || sub {
338     warn "A subdocument is not conformance-checked";
339     };
340 wakaba 1.2
341 wakaba 1.48 $self->{must_level} = 'm';
342     $self->{fact_level} = 'f';
343     $self->{should_level} = 's';
344 wakaba 1.51 $self->{good_level} = 'w';
345 wakaba 1.67 $self->{info_level} = 'i';
346 wakaba 1.59 $self->{unsupported_lavel} = 'u';
347 wakaba 1.48
348 wakaba 1.61 $self->{plus_elements} = {};
349     $self->{minus_elements} = {};
350 wakaba 1.42 $self->{id} = {};
351     $self->{term} = {};
352     $self->{usemap} = [];
353     $self->{contextmenu} = [];
354     $self->{map} = {};
355     $self->{menu} = {};
356     $self->{has_link_type} = {};
357 wakaba 1.60 $self->{flag} = {};
358 wakaba 1.46 #$self->{has_uri_attr};
359     #$self->{has_hyperlink_element};
360 wakaba 1.51 #$self->{has_charset};
361 wakaba 1.57 #$self->{has_base};
362 wakaba 1.42 $self->{return} = {
363     class => {},
364     id => $self->{id}, table => [], term => $self->{term},
365     };
366 wakaba 1.4
367 wakaba 1.60 my @item = ({type => 'element', node => $el, parent_state => {}});
368 wakaba 1.66 $item[-1]->{real_parent_state} = $item[-1]->{parent_state};
369 wakaba 1.60 while (@item) {
370     my $item = shift @item;
371     if (ref $item eq 'ARRAY') {
372     my $code = shift @$item;
373     next unless $code;## TODO: temp.
374     $code->(@$item);
375     } elsif ($item->{type} eq 'element') {
376     my $el_nsuri = $item->{node}->namespace_uri;
377     $el_nsuri = '' unless defined $el_nsuri;
378     my $el_ln = $item->{node}->manakai_local_name;
379    
380     unless ($Namespace->{$el_nsuri}->{loaded}) {
381     if ($Namespace->{$el_nsuri}->{module}) {
382     eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@;
383 wakaba 1.42 } else {
384 wakaba 1.60 $Namespace->{$el_nsuri}->{loaded} = 1;
385 wakaba 1.1 }
386     }
387 wakaba 1.63
388     my $element_state = {};
389 wakaba 1.60 my $eldef = $Element->{$el_nsuri}->{$el_ln} ||
390     $Element->{$el_nsuri}->{''} ||
391 wakaba 1.42 $ElementDefault;
392 wakaba 1.61 my $content_def = $item->{transparent}
393     ? $item->{parent_def} || $eldef : $eldef;
394 wakaba 1.63 my $content_state = $item->{transparent}
395 wakaba 1.65 ? $item->{parent_def}
396     ? $item->{parent_state} || $element_state : $element_state
397     : $element_state;
398 wakaba 1.60
399 wakaba 1.67 unless ($eldef->{status} & FEATURE_STATUS_REC) {
400     my $status = $eldef->{status} & FEATURE_STATUS_CR ? 'cr' :
401     $eldef->{status} & FEATURE_STATUS_LC ? 'lc' :
402     $eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard';
403     $self->{onerror}->(node => $item->{node},
404     type => 'status:'.$status.':element',
405     level => $self->{info_level});
406     }
407    
408 wakaba 1.60 my @new_item;
409     push @new_item, [$eldef->{check_start}, $self, $item, $element_state];
410     push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state];
411 wakaba 1.61
412 wakaba 1.60 my @child = @{$item->{node}->child_nodes};
413     while (@child) {
414     my $child = shift @child;
415     my $child_nt = $child->node_type;
416     if ($child_nt == 1) { # ELEMENT_NODE
417     my $child_nsuri = $child->namespace_uri;
418     $child_nsuri = '' unless defined $child_nsuri;
419     my $child_ln = $child->manakai_local_name;
420     if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and
421     not (($self->{flag}->{in_head} or
422 wakaba 1.61 ($el_nsuri eq $HTML_NS and $el_ln eq 'head')) and
423     $child_nsuri eq $HTML_NS and $child_ln eq 'noscript')) {
424 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
425     $self, $item, $child,
426 wakaba 1.66 $child_nsuri, $child_ln, 1,
427     $content_state, $element_state];
428 wakaba 1.60 push @new_item, {type => 'element', node => $child,
429 wakaba 1.65 parent_state => $content_state,
430 wakaba 1.61 parent_def => $content_def,
431 wakaba 1.66 real_parent_state => $element_state,
432 wakaba 1.60 transparent => 1};
433     } else {
434 wakaba 1.65 if ($item->{parent_def} and # has parent
435     $el_nsuri eq $HTML_NS) { ## $HTMLSemiTransparentElements
436 wakaba 1.61 if ($el_ln eq 'object') {
437     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
438     #
439     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
440     #
441     } else {
442 wakaba 1.62 $content_def = $item->{parent_def} || $content_def;
443 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
444 wakaba 1.62 }
445     } elsif ($el_ln eq 'video' or $el_ln eq 'audio') {
446     if ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
447     #
448     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
449     $element_state->{has_source} = 1;
450     } else {
451     $content_def = $item->{parent_def} || $content_def;
452 wakaba 1.63 $content_state = $item->{parent_state} || $content_state;
453 wakaba 1.61 }
454     }
455     }
456    
457 wakaba 1.60 push @new_item, [$content_def->{check_child_element},
458     $self, $item, $child,
459 wakaba 1.64 $child_nsuri, $child_ln,
460     $HTMLSemiTransparentElements
461     ->{$child_nsuri}->{$child_ln},
462 wakaba 1.66 $content_state, $element_state];
463 wakaba 1.60 push @new_item, {type => 'element', node => $child,
464 wakaba 1.65 parent_def => $content_def,
465 wakaba 1.66 real_parent_state => $element_state,
466 wakaba 1.65 parent_state => $content_state};
467 wakaba 1.60 }
468    
469     if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) {
470     $element_state->{has_significant} = 1;
471     }
472     } elsif ($child_nt == 3 or # TEXT_NODE
473     $child_nt == 4) { # CDATA_SECTION_NODE
474     my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/);
475     push @new_item, [$content_def->{check_child_text},
476     $self, $item, $child, $has_significant,
477 wakaba 1.66 $content_state, $element_state];
478     $element_state->{has_significant} ||= $has_significant;
479 wakaba 1.61 if ($has_significant and
480     $HTMLSemiTransparentElements->{$el_nsuri}->{$el_ln}) {
481     $content_def = $item->{parent_def} || $content_def;
482     }
483 wakaba 1.60 } elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE
484     push @child, @{$child->child_nodes};
485 wakaba 1.1 }
486 wakaba 1.60 ## TODO: PI_NODE
487     ## TODO: Unknown node type
488 wakaba 1.1 }
489 wakaba 1.60
490     push @new_item, [$eldef->{check_end}, $self, $item, $element_state];
491    
492     unshift @item, @new_item;
493 wakaba 1.30 } else {
494 wakaba 1.60 die "$0: Internal error: Unsupported checking action type |$item->{type}|";
495 wakaba 1.4 }
496 wakaba 1.1 }
497 wakaba 1.17
498     for (@{$self->{usemap}}) {
499     unless ($self->{map}->{$_->[0]}) {
500     $self->{onerror}->(node => $_->[1], type => 'no referenced map');
501     }
502     }
503    
504 wakaba 1.32 for (@{$self->{contextmenu}}) {
505     unless ($self->{menu}->{$_->[0]}) {
506     $self->{onerror}->(node => $_->[1], type => 'no referenced menu');
507     }
508     }
509    
510 wakaba 1.61 delete $self->{plus_elements};
511     delete $self->{minus_elements};
512 wakaba 1.17 delete $self->{onerror};
513     delete $self->{id};
514     delete $self->{usemap};
515     delete $self->{map};
516 wakaba 1.33 return $self->{return};
517 wakaba 1.1 } # check_element
518    
519 wakaba 1.60 sub _add_minus_elements ($$@) {
520     my $self = shift;
521     my $element_state = shift;
522     for my $elements (@_) {
523     for my $nsuri (keys %$elements) {
524     for my $ln (keys %{$elements->{$nsuri}}) {
525     unless ($self->{minus_elements}->{$nsuri}->{$ln}) {
526     $element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0;
527     $self->{minus_elements}->{$nsuri}->{$ln} = 1;
528     }
529     }
530     }
531     }
532     } # _add_minus_elements
533    
534     sub _remove_minus_elements ($$) {
535     my $self = shift;
536     my $element_state = shift;
537     for my $nsuri (keys %{$element_state->{minus_elements_original}}) {
538     for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) {
539     delete $self->{minus_elements}->{$nsuri}->{$ln};
540     }
541     }
542     } # _remove_minus_elements
543    
544     sub _add_plus_elements ($$@) {
545     my $self = shift;
546     my $element_state = shift;
547     for my $elements (@_) {
548     for my $nsuri (keys %$elements) {
549     for my $ln (keys %{$elements->{$nsuri}}) {
550     unless ($self->{plus_elements}->{$nsuri}->{$ln}) {
551     $element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0;
552     $self->{plus_elements}->{$nsuri}->{$ln} = 1;
553     }
554     }
555     }
556     }
557     } # _add_plus_elements
558    
559     sub _remove_plus_elements ($$) {
560     my $self = shift;
561     my $element_state = shift;
562     for my $nsuri (keys %{$element_state->{plus_elements_original}}) {
563     for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) {
564     delete $self->{plus_elements}->{$nsuri}->{$ln};
565     }
566     }
567     } # _remove_plus_elements
568    
569 wakaba 1.68 sub _attr_status_info ($$$) {
570     my ($self, $attr, $status_code) = @_;
571     my $status;
572     if ($status_code & FEATURE_STATUS_REC) {
573     return;
574     } elsif ($status_code & FEATURE_STATUS_CR) {
575     $status = 'cr';
576     } elsif ($status_code & FEATURE_STATUS_LC) {
577     $status = 'lc';
578     } elsif ($status_code & FEATURE_STATUS_WD) {
579     $status = 'wd';
580     } else {
581     $status = 'non-standard';
582     }
583     $self->{onerror}->(node => $attr,
584     type => 'status:'.$status.':attr',
585     level => $self->{info_level});
586     } # _attr_status_info
587    
588 wakaba 1.2 sub _add_minuses ($@) {
589     my $self = shift;
590     my $r = {};
591     for my $list (@_) {
592     for my $ns (keys %$list) {
593     for my $ln (keys %{$list->{$ns}}) {
594     unless ($self->{minuses}->{$ns}->{$ln}) {
595     $self->{minuses}->{$ns}->{$ln} = 1;
596     $r->{$ns}->{$ln} = 1;
597     }
598     }
599     }
600     }
601 wakaba 1.4 return {type => 'plus', list => $r};
602 wakaba 1.2 } # _add_minuses
603    
604 wakaba 1.50 sub _add_pluses ($@) {
605     my $self = shift;
606     my $r = {};
607     for my $list (@_) {
608     for my $ns (keys %$list) {
609     for my $ln (keys %{$list->{$ns}}) {
610     unless ($self->{pluses}->{$ns}->{$ln}) {
611     $self->{pluses}->{$ns}->{$ln} = 1;
612     $r->{$ns}->{$ln} = 1;
613     }
614     }
615     }
616     }
617     return {type => 'minus', list => $r};
618     } # _add_pluses
619    
620 wakaba 1.2 sub _remove_minuses ($$) {
621 wakaba 1.4 my ($self, $todo) = @_;
622 wakaba 1.50 if ($todo->{type} eq 'minus') {
623     for my $ns (keys %{$todo->{list}}) {
624     for my $ln (keys %{$todo->{list}->{$ns}}) {
625     delete $self->{pluses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
626     }
627 wakaba 1.2 }
628 wakaba 1.50 } elsif ($todo->{type} eq 'plus') {
629     for my $ns (keys %{$todo->{list}}) {
630     for my $ln (keys %{$todo->{list}->{$ns}}) {
631     delete $self->{minuses}->{$ns}->{$ln} if $todo->{list}->{$ns}->{$ln};
632     }
633     }
634     } else {
635     die "$0: Unknown +- type: $todo->{type}";
636 wakaba 1.2 }
637     1;
638     } # _remove_minuses
639    
640 wakaba 1.50 ## NOTE: Priority for "minuses" and "pluses" are currently left
641     ## undefined and implemented inconsistently; it is not a problem for
642     ## now, since no element belongs to both lists.
643    
644 wakaba 1.30 sub _check_get_children ($$$) {
645     my ($self, $node, $parent_todo) = @_;
646 wakaba 1.4 my $new_todos = [];
647 wakaba 1.2 my $sib = [];
648     TP: {
649     my $node_ns = $node->namespace_uri;
650     $node_ns = '' unless defined $node_ns;
651     my $node_ln = $node->manakai_local_name;
652 wakaba 1.45 if ($HTMLTransparentElements->{$node_ns}->{$node_ln}) {
653     if ($node_ns eq $HTML_NS and $node_ln eq 'noscript') {
654     if ($parent_todo->{flag}->{in_head}) {
655     #
656     } else {
657     my $end = $self->_add_minuses ({$HTML_NS, {noscript => 1}});
658     push @$sib, $end;
659    
660     unshift @$sib, @{$node->child_nodes};
661     push @$new_todos, {type => 'element-attributes', node => $node};
662     last TP;
663     }
664 wakaba 1.58 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'del') {
665     my $sig_flag = $parent_todo->{flag}->{has_descendant}->{significant};
666     unshift @$sib, @{$node->child_nodes};
667     push @$new_todos, {type => 'element-attributes', node => $node};
668     push @$new_todos,
669     {type => 'code',
670     code => sub {
671     $parent_todo->{flag}->{has_descendant}->{significant} = 0
672     if not $sig_flag;
673     }};
674     last TP;
675 wakaba 1.45 } else {
676     unshift @$sib, @{$node->child_nodes};
677     push @$new_todos, {type => 'element-attributes', node => $node};
678     last TP;
679 wakaba 1.2 }
680     }
681 wakaba 1.8 if ($node_ns eq $HTML_NS and ($node_ln eq 'video' or $node_ln eq 'audio')) {
682 wakaba 1.2 if ($node->has_attribute_ns (undef, 'src')) {
683     unshift @$sib, @{$node->child_nodes};
684 wakaba 1.9 push @$new_todos, {type => 'element-attributes', node => $node};
685 wakaba 1.2 last TP;
686     } else {
687     my @cn = @{$node->child_nodes};
688     CN: while (@cn) {
689     my $cn = shift @cn;
690     my $cnt = $cn->node_type;
691     if ($cnt == 1) {
692 wakaba 1.8 my $cn_nsuri = $cn->namespace_uri;
693     $cn_nsuri = '' unless defined $cn_nsuri;
694     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'source') {
695 wakaba 1.2 #
696     } else {
697     last CN;
698     }
699     } elsif ($cnt == 3 or $cnt == 4) {
700     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
701     last CN;
702     }
703     }
704     } # CN
705     unshift @$sib, @cn;
706     }
707 wakaba 1.57 } elsif ($node_ns eq $HTML_NS and $node_ln eq 'object') {
708     my @cn = @{$node->child_nodes};
709     CN: while (@cn) {
710     my $cn = shift @cn;
711     my $cnt = $cn->node_type;
712     if ($cnt == 1) {
713     my $cn_nsuri = $cn->namespace_uri;
714     $cn_nsuri = '' unless defined $cn_nsuri;
715     if ($cn_nsuri eq $HTML_NS and $cn->manakai_local_name eq 'param') {
716     #
717     } else {
718     last CN;
719     }
720     } elsif ($cnt == 3 or $cnt == 4) {
721     if ($cn->data =~ /[^\x09-\x0D\x20]/) {
722     last CN;
723     }
724     }
725     } # CN
726     unshift @$sib, @cn;
727 wakaba 1.2 }
728 wakaba 1.4 push @$new_todos, {type => 'element', node => $node};
729 wakaba 1.2 } # TP
730 wakaba 1.30
731     for my $new_todo (@$new_todos) {
732     $new_todo->{flag} = {%{$parent_todo->{flag} or {}}};
733     }
734    
735 wakaba 1.4 return ($sib, $new_todos);
736 wakaba 1.2 } # _check_get_children
737    
738 wakaba 1.44 =head1 LICENSE
739    
740 wakaba 1.56 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
741 wakaba 1.44
742     This library is free software; you can redistribute it
743     and/or modify it under the same terms as Perl itself.
744    
745     =cut
746    
747 wakaba 1.1 1;
748 wakaba 1.68 # $Date: 2008/02/24 01:38:36 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24